# Copyright Stanislav Mironov # Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля import json import os import random import time import traceback import uuid import parser import translations import utils import json import links_parser import shutil def currt(): return round(time.time()) FACULTETS = sorted([ "asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik" ]) DIRNAME = "excels" PARSED_DIR = "parsed" DEBUG_ONE_FAC = None #'fevt' parser.LOGGING = LOGGING = False def parse_sheets(download_place): to_return = {} try: reader = translations.create_reader(download_place) print("Reader info") print(reader.info()) while True: t = utils.StepTimeCounter() print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)") sheet_dict = { "index": reader.get_sheet_index(), "name": reader.get_sheet_name(), "reader_info": reader.info(), "groups": {} } to_return["SHEET_"+str(reader.get_sheet_index())] = sheet_dict prs = parser.Parser(reader) print("Parser created; parser.parse();") prs.parse() print("parsed done!") sheet_dict['parse_time'] = round(t.step()) if len(prs.raw_no_schedule) > 0: sheet_dict["other_raws"] = prs.raw_no_schedule if len(prs.features) > 0: sheet_dict["features"] = sorted(prs.features) if prs.parser_error is not None: sheet_dict["parser_error"] = prs.parser_error if prs.parser_warnings is not None and len(prs.parser_warnings) > 0: sheet_dict["parser_warnings"] = prs.parser_warnings for group_name_key in prs.groups.keys(): gr = prs.groups[group_name_key] sheet_dict['week_keys_metadata'] = prs.week_keys_metadata sheet_dict['groups'][group_name_key] = gr print(f"Populates {len(prs.groups)} groups: " + " ".join(prs.groups.keys())) if not reader.has_next_sheet(): print("File ended") break else: reader.next_sheet() print("Next sheet!") except Exception as e: print(e) traceback.print_exc() u = uuid.uuid4() to_return['error'] = { "smile": ":(", "error_message": str(e), "log_anchor": str(u), "time": currt() } print(f"Log Anchor: {u}") return to_return def parsed_file_path(excel_filename: str): format = excel_filename.split(".")[-1] fl = format.lower() if fl not in ["json", "xls", "xlsx"]: print(f"Unknown filename format: {excel_filename}") return if fl != "json": excel_filename = excel_filename.replace("." + format, ".json") excel_filename = excel_filename.lower() filepath = PARSED_DIR + os.path.sep + excel_filename return filepath def load_parsed_state(excel_filename): filepath = parsed_file_path(excel_filename) if not os.path.exists(filepath): return with open(filepath, "r", encoding="utf-8") as fp: return json.load(fp=fp) def save_parsed_state(excel_filename, obj): filepath = parsed_file_path(excel_filename) with open(filepath, "w", encoding="utf-8") as fp: json.dump(obj, fp=fp, ensure_ascii=False, sort_keys=True) print(f"Saved parsed state to '{filepath}'") def run_session(): faileds = [] t = utils.StepTimeCounter() # Delete tempdir try: try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") os.mkdir(DIRNAME) print(f"Directory '{DIRNAME}' created successfully.") except Exception as e: print(f"Failed create '{DIRNAME}': ") raise e print("main(); parse links starting...") EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC]) last_changeds = set() for excel_dict in EXCEL_LINKS: try: last_changeds.add(excel_dict['last_changed']) excel_url = excel_dict['url'] facultet = excel_dict['facultet'] excel_filename = excel_url.split("/")[-1] excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1] state = load_parsed_state(excel_filename) is_new = state is None if is_new: state = {} else: same_date = False try: same_date = state['excel']['last_changed'] == excel_dict['last_changed'] print(f"Excel[{excel_filename}]: inServer={excel_dict['last_changed']}, inState={state['excel']['last_changed']} same={same_date}") except Exception as e: print(f"Excel[{excel_filename}]: failed testify last_changed") if same_date: state['actual_at'] = currt() try: del state['excel']['different_in_this_session'] except: pass save_parsed_state(excel_filename, state) continue excel_dict['different_in_this_session'] = True state['actual_at'] = currt() state['excel'] = excel_dict is_xlsx = excel_url.endswith(".xlsx") download_place = f"{DIRNAME}/" + excel_filename + "_" + facultet + ".xls" + ("x" if is_xlsx else "") utils.download_file_from_url(excel_url, download_place) sha1hash = utils.calculate_sha1(download_place) state['excel']['sha1hash'] = sha1hash state['sheets'] = parse_sheets(download_place) save_parsed_state(excel_filename, state) except Exception as e: faileds.append({ "uuid": str(uuid.uuid4()), "exception": str(e), "traceback": traceback.format_exception(e), "context": f"Failed process excel file {excel_dict['url']}" }) traceback.print_exception(e) with open("parser.json", 'w', encoding="utf-8") as fp: lc = {"*_x": ":("} try: s = sorted(last_changeds) lc = { "early": s[0], "newly": s[-1] } except: pass json.dump({ "last_changeds": lc, "actual_at": currt(), "all_files": EXCEL_LINKS, "faileds": faileds }, fp=fp, ensure_ascii=False) # Delete a non-empty directory and its contents try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") def check_dirs(): if not os.path.exists(PARSED_DIR): os.mkdir(PARSED_DIR) def main(): while True: try: check_dirs() print("BEGIN run_session();") run_session() print("END run_session();") except Exception as e: print("Exception in run_session();") traceback.print_exception(e) print("Sleep for 30 minutes") time.sleep(60*30) print("Wake up!") if __name__ == "__main__": print("Start") main() print("Bye!")