259 lines
7.9 KiB
Python
259 lines
7.9 KiB
Python
# Copyright Stanislav Mironov
|
||
|
||
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля
|
||
|
||
|
||
import json
|
||
import os
|
||
import random
|
||
import time
|
||
import traceback
|
||
import uuid
|
||
import parser
|
||
import translations
|
||
import utils
|
||
import json
|
||
import links_parser
|
||
import shutil
|
||
|
||
def currt():
|
||
return round(time.time())
|
||
|
||
FACULTETS = sorted([
|
||
"asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik"
|
||
])
|
||
DIRNAME = "excels"
|
||
PARSED_DIR = "parsed"
|
||
|
||
DEBUG_ONE_FAC = None #'fevt'
|
||
|
||
parser.LOGGING = LOGGING = False
|
||
|
||
def parse_sheets(download_place):
|
||
to_return = {}
|
||
try:
|
||
reader = translations.create_reader(download_place)
|
||
print("Reader info")
|
||
print(reader.info())
|
||
|
||
while True:
|
||
t = utils.StepTimeCounter()
|
||
print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)")
|
||
sheet_dict = {
|
||
"index": reader.get_sheet_index(),
|
||
"name": reader.get_sheet_name(),
|
||
"reader_info": reader.info(),
|
||
"groups": {}
|
||
}
|
||
to_return["SHEET_"+str(reader.get_sheet_index())] = sheet_dict
|
||
prs = parser.Parser(reader)
|
||
|
||
print("Parser created; parser.parse();")
|
||
prs.parse()
|
||
|
||
print("parsed done!")
|
||
sheet_dict['parse_time'] = round(t.step())
|
||
|
||
if len(prs.raw_no_schedule) > 0:
|
||
sheet_dict["other_raws"] = prs.raw_no_schedule
|
||
|
||
if len(prs.features) > 0:
|
||
sheet_dict["features"] = sorted(prs.features)
|
||
|
||
if prs.parser_error is not None:
|
||
sheet_dict["parser_error"] = prs.parser_error
|
||
|
||
if prs.parser_warnings is not None and len(prs.parser_warnings) > 0:
|
||
sheet_dict["parser_warnings"] = prs.parser_warnings
|
||
|
||
for group_name_key in prs.groups.keys():
|
||
gr = prs.groups[group_name_key]
|
||
sheet_dict['week_keys_metadata'] = prs.week_keys_metadata
|
||
sheet_dict['groups'][group_name_key] = gr
|
||
|
||
|
||
print(f"Populates {len(prs.groups)} groups: " + " ".join(prs.groups.keys()))
|
||
|
||
if not reader.has_next_sheet():
|
||
print("File ended")
|
||
break
|
||
else:
|
||
reader.next_sheet()
|
||
print("Next sheet!")
|
||
|
||
except Exception as e:
|
||
print(e)
|
||
traceback.print_exc()
|
||
u = uuid.uuid4()
|
||
to_return['error'] = {
|
||
"smile": ":(",
|
||
"error_message": str(e),
|
||
"log_anchor": str(u),
|
||
"time": currt()
|
||
}
|
||
print(f"Log Anchor: {u}")
|
||
|
||
return to_return
|
||
|
||
def parsed_file_path(excel_filename: str):
|
||
format = excel_filename.split(".")[-1]
|
||
fl = format.lower()
|
||
|
||
if fl not in ["json", "xls", "xlsx"]:
|
||
print(f"Unknown filename format: {excel_filename}")
|
||
return
|
||
|
||
if fl != "json":
|
||
excel_filename = excel_filename.replace("." + format, ".json")
|
||
|
||
excel_filename = excel_filename.lower()
|
||
filepath = PARSED_DIR + os.path.sep + excel_filename
|
||
return filepath
|
||
|
||
def load_parsed_state(excel_filename):
|
||
filepath = parsed_file_path(excel_filename)
|
||
if not os.path.exists(filepath):
|
||
return
|
||
|
||
with open(filepath, "r", encoding="utf-8") as fp:
|
||
return json.load(fp=fp)
|
||
|
||
def save_parsed_state(excel_filename, obj):
|
||
filepath = parsed_file_path(excel_filename)
|
||
|
||
with open(filepath, "w", encoding="utf-8") as fp:
|
||
json.dump(obj, fp=fp, ensure_ascii=False, sort_keys=True)
|
||
|
||
print(f"Saved parsed state to '{filepath}'")
|
||
|
||
|
||
def run_session():
|
||
faileds = []
|
||
|
||
t = utils.StepTimeCounter()
|
||
|
||
# Delete tempdir
|
||
try:
|
||
try:
|
||
shutil.rmtree(DIRNAME)
|
||
print(f"Directory '{DIRNAME}' and its contents deleted successfully.")
|
||
except Exception as e:
|
||
print(f"Error deleting directory '{DIRNAME}': {e}")
|
||
os.mkdir(DIRNAME)
|
||
print(f"Directory '{DIRNAME}' created successfully.")
|
||
except Exception as e:
|
||
print(f"Failed create '{DIRNAME}': ")
|
||
raise e
|
||
|
||
|
||
print("main(); parse links starting...")
|
||
EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC])
|
||
|
||
last_changeds = set()
|
||
for excel_dict in EXCEL_LINKS:
|
||
try:
|
||
last_changeds.add(excel_dict['last_changed'])
|
||
|
||
excel_url = excel_dict['url']
|
||
facultet = excel_dict['facultet']
|
||
excel_filename = excel_url.split("/")[-1]
|
||
excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1]
|
||
|
||
|
||
state = load_parsed_state(excel_filename)
|
||
is_new = state is None
|
||
if is_new:
|
||
state = {}
|
||
|
||
else:
|
||
same_date = False
|
||
try:
|
||
same_date = state['excel']['last_changed'] == excel_dict['last_changed']
|
||
print(f"Excel[{excel_filename}]: inServer={excel_dict['last_changed']}, inState={state['excel']['last_changed']} same={same_date}")
|
||
|
||
except Exception as e:
|
||
print(f"Excel[{excel_filename}]: failed testify last_changed")
|
||
|
||
if same_date:
|
||
state['actual_at'] = currt()
|
||
try:
|
||
del state['excel']['different_in_this_session']
|
||
except: pass
|
||
save_parsed_state(excel_filename, state)
|
||
continue
|
||
|
||
excel_dict['different_in_this_session'] = True
|
||
state['actual_at'] = currt()
|
||
state['excel'] = excel_dict
|
||
|
||
is_xlsx = excel_url.endswith(".xlsx")
|
||
download_place = f"{DIRNAME}/" + excel_filename + "_" + facultet + ".xls" + ("x" if is_xlsx else "")
|
||
utils.download_file_from_url(excel_url, download_place)
|
||
sha1hash = utils.calculate_sha1(download_place)
|
||
state['excel']['sha1hash'] = sha1hash
|
||
|
||
state['sheets'] = parse_sheets(download_place)
|
||
|
||
save_parsed_state(excel_filename, state)
|
||
|
||
except Exception as e:
|
||
faileds.append({
|
||
"uuid": str(uuid.uuid4()),
|
||
"exception": str(e),
|
||
"traceback": traceback.format_exception(e),
|
||
"context": f"Failed process excel file {excel_dict['url']}"
|
||
})
|
||
traceback.print_exception(e)
|
||
|
||
|
||
with open("parser.json", 'w', encoding="utf-8") as fp:
|
||
lc = {"*_x": ":("}
|
||
try:
|
||
s = sorted(last_changeds)
|
||
lc = {
|
||
"early": s[0],
|
||
"newly": s[-1]
|
||
}
|
||
except: pass
|
||
|
||
json.dump({
|
||
"last_changeds": lc,
|
||
"actual_at": currt(),
|
||
"all_files": EXCEL_LINKS,
|
||
"faileds": faileds
|
||
}, fp=fp, ensure_ascii=False)
|
||
|
||
# Delete a non-empty directory and its contents
|
||
try:
|
||
shutil.rmtree(DIRNAME)
|
||
print(f"Directory '{DIRNAME}' and its contents deleted successfully.")
|
||
except Exception as e:
|
||
print(f"Error deleting directory '{DIRNAME}': {e}")
|
||
|
||
def check_dirs():
|
||
if not os.path.exists(PARSED_DIR):
|
||
os.mkdir(PARSED_DIR)
|
||
|
||
def main():
|
||
while True:
|
||
try:
|
||
check_dirs()
|
||
|
||
print("BEGIN run_session();")
|
||
run_session()
|
||
print("END run_session();")
|
||
except Exception as e:
|
||
print("Exception in run_session();")
|
||
traceback.print_exception(e)
|
||
|
||
print("Sleep for 30 minutes")
|
||
time.sleep(60*30)
|
||
print("Wake up!")
|
||
|
||
|
||
|
||
if __name__ == "__main__":
|
||
print("Start")
|
||
main()
|
||
print("Bye!")
|