From e2b96037ce79b9a718dd27b46b2318e87a15e985 Mon Sep 17 00:00:00 2001 From: FazziCLAY Date: Sun, 5 Apr 2026 13:03:54 +0300 Subject: [PATCH] created groups.json functional --- .gitea/workflows/deploy.yml | 1 + .gitignore | 3 +- main.py | 67 ++++++++++++++++++++++++++++++++++++- utils.py | 41 +++++++++++++++++++++++ 4 files changed, 110 insertions(+), 2 deletions(-) diff --git a/.gitea/workflows/deploy.yml b/.gitea/workflows/deploy.yml index 7d3a729..8ed0b91 100644 --- a/.gitea/workflows/deploy.yml +++ b/.gitea/workflows/deploy.yml @@ -44,6 +44,7 @@ jobs: -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \ -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \ -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \ + -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/facultets.json:/app/facultets.json \ --restart=always \ --name=vstu_schedule_parser_v2 \ vstu_schedule_parser_v2:latest diff --git a/.gitignore b/.gitignore index 26105a5..4ca9256 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ groups.json diffable_dates.txt parsed/ parser.json -.env \ No newline at end of file +.env +facultets.json \ No newline at end of file diff --git a/main.py b/main.py index ba1caf7..dff1db1 100644 --- a/main.py +++ b/main.py @@ -47,10 +47,56 @@ FACULTETS = sorted([ DIRNAME = "excels" PARSED_DIR = "parsed" -DEBUG_ONE_FAC = None #'fevt' +DEBUG_ONE_FAC = None# 'fevt' DEBUG_NO_SAVE_STATES = False DEBUG_NO_LINKS_DELAY = True +facultets_data = None + + +def gen_groups_from_states(states): + groups = {} + if facultets_data is None: + print("FAILED BECAUSE facultets_data is NONE!!!") + return + + for state in states: + for sheet in state['sheets'].values(): + gr = sheet['groups'] + if len(gr.keys()) == 0: + continue + + for key, group_dict in gr.items(): + group_name = group_dict['name'] + + full_path_key = key.upper() + recognized_fac = utils.get_preferer_facultet(facultets_data, state['excel']['url'], skip_for=['mag', 'asp']) + tech_fac = state['excel']['facultet'] + full_path_key = utils.get_abbrev_for_facultet(facultets_data, tech_fac) + "/" + full_path_key + if tech_fac != recognized_fac and recognized_fac is not None: + full_path_key = utils.get_abbrev_for_facultet(facultets_data, recognized_fac) + "/" + full_path_key + + full_path_key = full_path_key.replace(" ", "").replace("\n", "").upper().strip() + if full_path_key in groups.keys(): + groups[full_path_key]['doubled'] = True + groups[full_path_key]['excels'].append(state['excel']) + else: + groups[full_path_key] = { + "full_path_key": full_path_key, + "real_name": group_name, + "facultet_tech": tech_fac, + "facultet_regognized": recognized_fac, + "excels": [state['excel']], + "excel_position": group_dict['position_human'], + "excel_sheet": { + "name": sheet['name'], + "index": sheet['index'] + }, + "slots_weekdays_used": sorted(group_dict['slots'].keys()) + } + + return {"version": 1, "groups": json.loads(json.dumps(groups, sort_keys=True, ensure_ascii=False))} + def parse_sheets(download_place): to_return = {} try: @@ -187,6 +233,14 @@ def run_session(): last_changeds.add(excel_dict['last_changed']) excel_url = excel_dict['url'] + + for state in states: + ch = state['excel']['url'] + if excel_url == ch: + print(f"Doubled excel files(By URLs)! Current 1th={excel_dict}; 2th={state['excel']}") + print("Skipped!") + continue + facultet = excel_dict['facultet'] excel_filename = excel_url.split("/")[-1] excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1] @@ -251,6 +305,7 @@ def run_session(): changed_files += 1 changed = True excel_dict['different_in_this_session'] = True + excel_dict['recognized_facultet'] = utils.get_preferer_facultet(facultets_data, excel_url=excel_dict['url']) state['actual_at'] = currt() state['excel'] = excel_dict @@ -309,6 +364,9 @@ def run_session(): "faileds": faileds }, fp=fp, ensure_ascii=False) + with open("groups.json", 'w', encoding="utf-8") as fp: + json.dump(gen_groups_from_states(states), fp=fp, ensure_ascii=False) + if changed: all_files = states d = { @@ -322,6 +380,9 @@ def run_session(): "all_files": sorted(all_files, key=lambda d: d['excel']['url']), "faileds": faileds } + + + with open("result_v2.json", 'w', encoding="utf-8") as fp: json.dump(d, fp=fp, ensure_ascii=False) @@ -352,6 +413,10 @@ def check_dirs(): os.mkdir(PARSED_DIR) def main(): + global facultets_data + with open("facultets.json") as fp: + facultets_data = json.load(fp=fp) + flag = True while flag: if not INFINITY_LOOP: diff --git a/utils.py b/utils.py index 29b515b..2845f18 100644 --- a/utils.py +++ b/utils.py @@ -1,6 +1,7 @@ # Copyright Stanislav Mironov import time +import traceback import xlrd from coord import Coord from translations import ExcelSheetReader @@ -10,6 +11,46 @@ import hashlib import requests from urllib.parse import urlsplit, urlunsplit, quote +def get_preferer_facultet(facultets_data: dict, excel_url: str, skip_for=None, ): + if skip_for is None: + skip_for = [] + + for _key, _value in facultets_data.items(): + if _key.startswith("_"): + continue + if _key in skip_for: + continue + + short_names = _value.get("short_names", None) + if short_names is None: + continue + + for name in short_names: + if name.lower() in excel_url.lower(): + return _key + +def get_abbrev_for_facultet(facultets_data: dict, facultet_id: str, fallback_not_found="?", fallback_error="?", fallback_no_short_name="?"): + if (facultet_id == 'mag'): + return "МАГ" + if (facultet_id == 'asp'): + return "АСП" + + for _key, _value in facultets_data.items(): + if _key != facultet_id: + continue + + short_names = _value.get("short_names", None) + if short_names is None: + return fallback_no_short_name + + try: + return short_names[0] + except Exception as e: + traceback.print_exception(e) + return fallback_error + return fallback_not_found + + def download_file_from_url(url, output_filename): """ Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем.