diff --git a/main.py b/main.py index 75a3b8d..4e7d5f3 100644 --- a/main.py +++ b/main.py @@ -26,8 +26,9 @@ DIRNAME = "excels" PARSED_DIR = "parsed" DEBUG_ONE_FAC = None #'fevt' +DEBUG_NO_SAVE_STATES = False -parser.LOGGING = LOGGING = False +parser.LOGGING = LOGGING = True def parse_sheets(download_place): to_return = {} @@ -120,6 +121,8 @@ def load_parsed_state(excel_filename): def save_parsed_state(excel_filename, obj): filepath = parsed_file_path(excel_filename) + if DEBUG_NO_SAVE_STATES: + print("Saved! (fake because DEBUG_NO_SAVE_STATES)") with open(filepath, "w", encoding="utf-8") as fp: json.dump(obj, fp=fp, ensure_ascii=False, sort_keys=True) @@ -149,7 +152,12 @@ def run_session(): print("main(); parse links starting...") EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC]) + if len(EXCEL_LINKS) < 5 and not DEBUG_ONE_FAC: + raise Exception("Safety exception: excel links count < 5; maybe in vstu.ru tech works") + + last_changeds = set() + states = [] for excel_dict in EXCEL_LINKS: try: last_changeds.add(excel_dict['last_changed']) @@ -179,6 +187,7 @@ def run_session(): try: del state['excel']['different_in_this_session'] except: pass + states.append(state) save_parsed_state(excel_filename, state) continue @@ -195,6 +204,7 @@ def run_session(): state['sheets'] = parse_sheets(download_place) save_parsed_state(excel_filename, state) + states.append(state) except Exception as e: faileds.append({ @@ -222,6 +232,20 @@ def run_session(): "all_files": EXCEL_LINKS, "faileds": faileds }, fp=fp, ensure_ascii=False) + + with open("result_v2.json", 'w', encoding="utf-8") as fp: + all_files = states + json.dump({ + "version": 2, + "notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: ПРЕДОСТАВЛЯЕТСЯ КАК-ЕСТЬ (AS-IS) БЕЗ КАКИХ ЛИБО ГАРАНТИЙ", + "contact": "https://fazziclay.com/ или fazziclay@gmail.com", + "api_notices": { + "just_save_and_check_diffs": "просто сохраните и проверяйте разницу" + }, + "actual_at": currt(), + "all_files": sorted(all_files, key=lambda d: d['excel']['url']), + "faileds": faileds + }, fp=fp, ensure_ascii=False) # Delete a non-empty directory and its contents try: @@ -242,6 +266,11 @@ def main(): print("BEGIN run_session();") run_session() print("END run_session();") + + if DEBUG_ONE_FAC: + print("DEBUG_ONE_FAC; break infinity-loop") + break + except Exception as e: print("Exception in run_session();") traceback.print_exception(e) diff --git a/parser.py b/parser.py index ae07fb1..220c450 100644 --- a/parser.py +++ b/parser.py @@ -98,7 +98,7 @@ class Parser: head_joined = " ||| ".join([v for v in head if isinstance(v, str) and v.strip()]) print(head_joined) - if "1 неделя" in head_joined or "1 НЕДЕЛЯ" in head_joined or "2 неделя" in head_joined or "2 НЕДЕЛЯ" in head_joined or "ИЗМЕНЕНИЯ" in head_joined or "изменения" in head_joined or "vtf-vstu.ru" in head_joined: + if (len(head_joined) == 0) or "1 неделя" in head_joined or "1 НЕДЕЛЯ" in head_joined or "2 неделя" in head_joined or "2 НЕДЕЛЯ" in head_joined or "ИЗМЕНЕНИЯ" in head_joined or "изменения" in head_joined or "vtf-vstu.ru" in head_joined: head_rx -= 1 self.raw_no_schedule.append(head_joined) head = self.reader.get_row_values(head_rx) # get all ROW (months, groups)