created groups.json functional

2026-04-05 13:03:54 +03:00
parent 4e357e52f8
commit e2b96037ce
4 changed files with 110 additions and 2 deletions
--- a/.gitea/workflows/deploy.yml
+++ b/.gitea/workflows/deploy.yml
@@ -44,6 +44,7 @@ jobs:
            -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \
            -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \
            -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \
            -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/facultets.json:/app/facultets.json \
            --restart=always \
            --name=vstu_schedule_parser_v2 \
            vstu_schedule_parser_v2:latest
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,5 @@ groups.json
 diffable_dates.txt
 parsed/
 parser.json
-.env
+.env
 facultets.json
--- a/main.py
+++ b/main.py
@@ -47,10 +47,56 @@ FACULTETS = sorted([
 DIRNAME = "excels"
 PARSED_DIR = "parsed"
-DEBUG_ONE_FAC = None #'fevt'
+DEBUG_ONE_FAC = None# 'fevt'
 DEBUG_NO_SAVE_STATES = False
 DEBUG_NO_LINKS_DELAY = True
 facultets_data = None
 def gen_groups_from_states(states):
    groups = {}
    if facultets_data is None:
        print("FAILED BECAUSE facultets_data is NONE!!!")
        return
    for state in states:
        for sheet in state['sheets'].values():
            gr = sheet['groups']
            if len(gr.keys()) == 0:
                continue
            for key, group_dict in gr.items():
                group_name = group_dict['name']
                full_path_key = key.upper()
                recognized_fac = utils.get_preferer_facultet(facultets_data, state['excel']['url'], skip_for=['mag', 'asp'])
                tech_fac = state['excel']['facultet']
                full_path_key = utils.get_abbrev_for_facultet(facultets_data, tech_fac) + "/" + full_path_key
                if tech_fac != recognized_fac and recognized_fac is not None:
                    full_path_key = utils.get_abbrev_for_facultet(facultets_data, recognized_fac) + "/" + full_path_key
                full_path_key = full_path_key.replace(" ", "").replace("\n", "").upper().strip()
                if full_path_key in groups.keys():
                    groups[full_path_key]['doubled'] = True
                    groups[full_path_key]['excels'].append(state['excel'])
                else:
                    groups[full_path_key] = {
                        "full_path_key": full_path_key,
                        "real_name": group_name,
                        "facultet_tech": tech_fac,
                        "facultet_regognized": recognized_fac,
                        "excels": [state['excel']],
                        "excel_position": group_dict['position_human'],
                        "excel_sheet": {
                            "name": sheet['name'],
                            "index": sheet['index']
                        },
                        "slots_weekdays_used": sorted(group_dict['slots'].keys())
                    }
    return {"version": 1, "groups": json.loads(json.dumps(groups, sort_keys=True, ensure_ascii=False))}
 def parse_sheets(download_place):
    to_return = {}
    try:
@@ -187,6 +233,14 @@ def run_session():
            last_changeds.add(excel_dict['last_changed'])
            excel_url = excel_dict['url']
            for state in states:
                ch = state['excel']['url']
                if excel_url == ch:
                    print(f"Doubled excel files(By URLs)! Current 1th={excel_dict}; 2th={state['excel']}")
                    print("Skipped!")
                    continue
            facultet = excel_dict['facultet']
            excel_filename = excel_url.split("/")[-1]
            excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1]
@@ -251,6 +305,7 @@ def run_session():
            changed_files += 1
            changed = True
            excel_dict['different_in_this_session'] = True
            excel_dict['recognized_facultet'] = utils.get_preferer_facultet(facultets_data, excel_url=excel_dict['url'])
            state['actual_at'] = currt()
            state['excel'] = excel_dict
@@ -309,6 +364,9 @@ def run_session():
                "faileds": faileds
            }, fp=fp, ensure_ascii=False)
    with open("groups.json", 'w', encoding="utf-8") as fp:
        json.dump(gen_groups_from_states(states), fp=fp, ensure_ascii=False)
    if changed:
        all_files = states
        d = {
@@ -322,6 +380,9 @@ def run_session():
            "all_files": sorted(all_files, key=lambda d: d['excel']['url']),
            "faileds": faileds
        }
        with open("result_v2.json", 'w', encoding="utf-8") as fp:
            json.dump(d, fp=fp, ensure_ascii=False)
@@ -352,6 +413,10 @@ def check_dirs():
        os.mkdir(PARSED_DIR)
 def main():
    global facultets_data
    with open("facultets.json") as fp:
        facultets_data = json.load(fp=fp)
    flag = True
    while flag:
        if not INFINITY_LOOP:
--- a/utils.py
+++ b/utils.py
@@ -1,6 +1,7 @@
 # Copyright Stanislav Mironov
 import time
 import traceback
 import xlrd
 from coord import Coord
 from translations import ExcelSheetReader
@@ -10,6 +11,46 @@ import hashlib
 import requests
 from urllib.parse import urlsplit, urlunsplit, quote
 def get_preferer_facultet(facultets_data: dict, excel_url: str, skip_for=None, ):
    if skip_for is None:
        skip_for = []
    for _key, _value in facultets_data.items():
        if _key.startswith("_"):
            continue
        if _key in skip_for:
            continue
        short_names = _value.get("short_names", None)
        if short_names is None:
            continue
        for name in short_names:
            if name.lower() in excel_url.lower():
                return _key
 def get_abbrev_for_facultet(facultets_data: dict, facultet_id: str, fallback_not_found="?", fallback_error="?", fallback_no_short_name="?"):
    if (facultet_id == 'mag'):
        return "МАГ"
    if (facultet_id == 'asp'):
        return "АСП"
    for _key, _value in facultets_data.items():
        if _key != facultet_id:
            continue
        short_names = _value.get("short_names", None)
        if short_names is None:
            return fallback_no_short_name
        try:
            return short_names[0]
        except Exception as e:
            traceback.print_exception(e)
            return fallback_error
    return fallback_not_found
 def download_file_from_url(url, output_filename):
    """
    Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем.