created groups.json functional
All checks were successful
Build and Run VSTU Schedule Parser / build_and_run (push) Successful in 18s

This commit is contained in:
2026-04-05 13:03:54 +03:00
parent 4e357e52f8
commit e2b96037ce
4 changed files with 110 additions and 2 deletions

View File

@@ -44,6 +44,7 @@ jobs:
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \ -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \ -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \ -v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/facultets.json:/app/facultets.json \
--restart=always \ --restart=always \
--name=vstu_schedule_parser_v2 \ --name=vstu_schedule_parser_v2 \
vstu_schedule_parser_v2:latest vstu_schedule_parser_v2:latest

3
.gitignore vendored
View File

@@ -7,4 +7,5 @@ groups.json
diffable_dates.txt diffable_dates.txt
parsed/ parsed/
parser.json parser.json
.env .env
facultets.json

67
main.py
View File

@@ -47,10 +47,56 @@ FACULTETS = sorted([
DIRNAME = "excels" DIRNAME = "excels"
PARSED_DIR = "parsed" PARSED_DIR = "parsed"
DEBUG_ONE_FAC = None #'fevt' DEBUG_ONE_FAC = None# 'fevt'
DEBUG_NO_SAVE_STATES = False DEBUG_NO_SAVE_STATES = False
DEBUG_NO_LINKS_DELAY = True DEBUG_NO_LINKS_DELAY = True
facultets_data = None
def gen_groups_from_states(states):
groups = {}
if facultets_data is None:
print("FAILED BECAUSE facultets_data is NONE!!!")
return
for state in states:
for sheet in state['sheets'].values():
gr = sheet['groups']
if len(gr.keys()) == 0:
continue
for key, group_dict in gr.items():
group_name = group_dict['name']
full_path_key = key.upper()
recognized_fac = utils.get_preferer_facultet(facultets_data, state['excel']['url'], skip_for=['mag', 'asp'])
tech_fac = state['excel']['facultet']
full_path_key = utils.get_abbrev_for_facultet(facultets_data, tech_fac) + "/" + full_path_key
if tech_fac != recognized_fac and recognized_fac is not None:
full_path_key = utils.get_abbrev_for_facultet(facultets_data, recognized_fac) + "/" + full_path_key
full_path_key = full_path_key.replace(" ", "").replace("\n", "").upper().strip()
if full_path_key in groups.keys():
groups[full_path_key]['doubled'] = True
groups[full_path_key]['excels'].append(state['excel'])
else:
groups[full_path_key] = {
"full_path_key": full_path_key,
"real_name": group_name,
"facultet_tech": tech_fac,
"facultet_regognized": recognized_fac,
"excels": [state['excel']],
"excel_position": group_dict['position_human'],
"excel_sheet": {
"name": sheet['name'],
"index": sheet['index']
},
"slots_weekdays_used": sorted(group_dict['slots'].keys())
}
return {"version": 1, "groups": json.loads(json.dumps(groups, sort_keys=True, ensure_ascii=False))}
def parse_sheets(download_place): def parse_sheets(download_place):
to_return = {} to_return = {}
try: try:
@@ -187,6 +233,14 @@ def run_session():
last_changeds.add(excel_dict['last_changed']) last_changeds.add(excel_dict['last_changed'])
excel_url = excel_dict['url'] excel_url = excel_dict['url']
for state in states:
ch = state['excel']['url']
if excel_url == ch:
print(f"Doubled excel files(By URLs)! Current 1th={excel_dict}; 2th={state['excel']}")
print("Skipped!")
continue
facultet = excel_dict['facultet'] facultet = excel_dict['facultet']
excel_filename = excel_url.split("/")[-1] excel_filename = excel_url.split("/")[-1]
excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1] excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1]
@@ -251,6 +305,7 @@ def run_session():
changed_files += 1 changed_files += 1
changed = True changed = True
excel_dict['different_in_this_session'] = True excel_dict['different_in_this_session'] = True
excel_dict['recognized_facultet'] = utils.get_preferer_facultet(facultets_data, excel_url=excel_dict['url'])
state['actual_at'] = currt() state['actual_at'] = currt()
state['excel'] = excel_dict state['excel'] = excel_dict
@@ -309,6 +364,9 @@ def run_session():
"faileds": faileds "faileds": faileds
}, fp=fp, ensure_ascii=False) }, fp=fp, ensure_ascii=False)
with open("groups.json", 'w', encoding="utf-8") as fp:
json.dump(gen_groups_from_states(states), fp=fp, ensure_ascii=False)
if changed: if changed:
all_files = states all_files = states
d = { d = {
@@ -322,6 +380,9 @@ def run_session():
"all_files": sorted(all_files, key=lambda d: d['excel']['url']), "all_files": sorted(all_files, key=lambda d: d['excel']['url']),
"faileds": faileds "faileds": faileds
} }
with open("result_v2.json", 'w', encoding="utf-8") as fp: with open("result_v2.json", 'w', encoding="utf-8") as fp:
json.dump(d, fp=fp, ensure_ascii=False) json.dump(d, fp=fp, ensure_ascii=False)
@@ -352,6 +413,10 @@ def check_dirs():
os.mkdir(PARSED_DIR) os.mkdir(PARSED_DIR)
def main(): def main():
global facultets_data
with open("facultets.json") as fp:
facultets_data = json.load(fp=fp)
flag = True flag = True
while flag: while flag:
if not INFINITY_LOOP: if not INFINITY_LOOP:

View File

@@ -1,6 +1,7 @@
# Copyright Stanislav Mironov # Copyright Stanislav Mironov
import time import time
import traceback
import xlrd import xlrd
from coord import Coord from coord import Coord
from translations import ExcelSheetReader from translations import ExcelSheetReader
@@ -10,6 +11,46 @@ import hashlib
import requests import requests
from urllib.parse import urlsplit, urlunsplit, quote from urllib.parse import urlsplit, urlunsplit, quote
def get_preferer_facultet(facultets_data: dict, excel_url: str, skip_for=None, ):
if skip_for is None:
skip_for = []
for _key, _value in facultets_data.items():
if _key.startswith("_"):
continue
if _key in skip_for:
continue
short_names = _value.get("short_names", None)
if short_names is None:
continue
for name in short_names:
if name.lower() in excel_url.lower():
return _key
def get_abbrev_for_facultet(facultets_data: dict, facultet_id: str, fallback_not_found="?", fallback_error="?", fallback_no_short_name="?"):
if (facultet_id == 'mag'):
return "МАГ"
if (facultet_id == 'asp'):
return "АСП"
for _key, _value in facultets_data.items():
if _key != facultet_id:
continue
short_names = _value.get("short_names", None)
if short_names is None:
return fallback_no_short_name
try:
return short_names[0]
except Exception as e:
traceback.print_exception(e)
return fallback_error
return fallback_not_found
def download_file_from_url(url, output_filename): def download_file_from_url(url, output_filename):
""" """
Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем. Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем.