created groups.json functional
All checks were successful
Build and Run VSTU Schedule Parser / build_and_run (push) Successful in 18s
All checks were successful
Build and Run VSTU Schedule Parser / build_and_run (push) Successful in 18s
This commit is contained in:
@@ -44,6 +44,7 @@ jobs:
|
||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \
|
||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \
|
||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \
|
||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/facultets.json:/app/facultets.json \
|
||||
--restart=always \
|
||||
--name=vstu_schedule_parser_v2 \
|
||||
vstu_schedule_parser_v2:latest
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -7,4 +7,5 @@ groups.json
|
||||
diffable_dates.txt
|
||||
parsed/
|
||||
parser.json
|
||||
.env
|
||||
.env
|
||||
facultets.json
|
||||
67
main.py
67
main.py
@@ -47,10 +47,56 @@ FACULTETS = sorted([
|
||||
DIRNAME = "excels"
|
||||
PARSED_DIR = "parsed"
|
||||
|
||||
DEBUG_ONE_FAC = None #'fevt'
|
||||
DEBUG_ONE_FAC = None# 'fevt'
|
||||
DEBUG_NO_SAVE_STATES = False
|
||||
DEBUG_NO_LINKS_DELAY = True
|
||||
|
||||
facultets_data = None
|
||||
|
||||
|
||||
def gen_groups_from_states(states):
|
||||
groups = {}
|
||||
if facultets_data is None:
|
||||
print("FAILED BECAUSE facultets_data is NONE!!!")
|
||||
return
|
||||
|
||||
for state in states:
|
||||
for sheet in state['sheets'].values():
|
||||
gr = sheet['groups']
|
||||
if len(gr.keys()) == 0:
|
||||
continue
|
||||
|
||||
for key, group_dict in gr.items():
|
||||
group_name = group_dict['name']
|
||||
|
||||
full_path_key = key.upper()
|
||||
recognized_fac = utils.get_preferer_facultet(facultets_data, state['excel']['url'], skip_for=['mag', 'asp'])
|
||||
tech_fac = state['excel']['facultet']
|
||||
full_path_key = utils.get_abbrev_for_facultet(facultets_data, tech_fac) + "/" + full_path_key
|
||||
if tech_fac != recognized_fac and recognized_fac is not None:
|
||||
full_path_key = utils.get_abbrev_for_facultet(facultets_data, recognized_fac) + "/" + full_path_key
|
||||
|
||||
full_path_key = full_path_key.replace(" ", "").replace("\n", "").upper().strip()
|
||||
if full_path_key in groups.keys():
|
||||
groups[full_path_key]['doubled'] = True
|
||||
groups[full_path_key]['excels'].append(state['excel'])
|
||||
else:
|
||||
groups[full_path_key] = {
|
||||
"full_path_key": full_path_key,
|
||||
"real_name": group_name,
|
||||
"facultet_tech": tech_fac,
|
||||
"facultet_regognized": recognized_fac,
|
||||
"excels": [state['excel']],
|
||||
"excel_position": group_dict['position_human'],
|
||||
"excel_sheet": {
|
||||
"name": sheet['name'],
|
||||
"index": sheet['index']
|
||||
},
|
||||
"slots_weekdays_used": sorted(group_dict['slots'].keys())
|
||||
}
|
||||
|
||||
return {"version": 1, "groups": json.loads(json.dumps(groups, sort_keys=True, ensure_ascii=False))}
|
||||
|
||||
def parse_sheets(download_place):
|
||||
to_return = {}
|
||||
try:
|
||||
@@ -187,6 +233,14 @@ def run_session():
|
||||
last_changeds.add(excel_dict['last_changed'])
|
||||
|
||||
excel_url = excel_dict['url']
|
||||
|
||||
for state in states:
|
||||
ch = state['excel']['url']
|
||||
if excel_url == ch:
|
||||
print(f"Doubled excel files(By URLs)! Current 1th={excel_dict}; 2th={state['excel']}")
|
||||
print("Skipped!")
|
||||
continue
|
||||
|
||||
facultet = excel_dict['facultet']
|
||||
excel_filename = excel_url.split("/")[-1]
|
||||
excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1]
|
||||
@@ -251,6 +305,7 @@ def run_session():
|
||||
changed_files += 1
|
||||
changed = True
|
||||
excel_dict['different_in_this_session'] = True
|
||||
excel_dict['recognized_facultet'] = utils.get_preferer_facultet(facultets_data, excel_url=excel_dict['url'])
|
||||
state['actual_at'] = currt()
|
||||
state['excel'] = excel_dict
|
||||
|
||||
@@ -309,6 +364,9 @@ def run_session():
|
||||
"faileds": faileds
|
||||
}, fp=fp, ensure_ascii=False)
|
||||
|
||||
with open("groups.json", 'w', encoding="utf-8") as fp:
|
||||
json.dump(gen_groups_from_states(states), fp=fp, ensure_ascii=False)
|
||||
|
||||
if changed:
|
||||
all_files = states
|
||||
d = {
|
||||
@@ -322,6 +380,9 @@ def run_session():
|
||||
"all_files": sorted(all_files, key=lambda d: d['excel']['url']),
|
||||
"faileds": faileds
|
||||
}
|
||||
|
||||
|
||||
|
||||
with open("result_v2.json", 'w', encoding="utf-8") as fp:
|
||||
json.dump(d, fp=fp, ensure_ascii=False)
|
||||
|
||||
@@ -352,6 +413,10 @@ def check_dirs():
|
||||
os.mkdir(PARSED_DIR)
|
||||
|
||||
def main():
|
||||
global facultets_data
|
||||
with open("facultets.json") as fp:
|
||||
facultets_data = json.load(fp=fp)
|
||||
|
||||
flag = True
|
||||
while flag:
|
||||
if not INFINITY_LOOP:
|
||||
|
||||
41
utils.py
41
utils.py
@@ -1,6 +1,7 @@
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
import time
|
||||
import traceback
|
||||
import xlrd
|
||||
from coord import Coord
|
||||
from translations import ExcelSheetReader
|
||||
@@ -10,6 +11,46 @@ import hashlib
|
||||
import requests
|
||||
from urllib.parse import urlsplit, urlunsplit, quote
|
||||
|
||||
def get_preferer_facultet(facultets_data: dict, excel_url: str, skip_for=None, ):
|
||||
if skip_for is None:
|
||||
skip_for = []
|
||||
|
||||
for _key, _value in facultets_data.items():
|
||||
if _key.startswith("_"):
|
||||
continue
|
||||
if _key in skip_for:
|
||||
continue
|
||||
|
||||
short_names = _value.get("short_names", None)
|
||||
if short_names is None:
|
||||
continue
|
||||
|
||||
for name in short_names:
|
||||
if name.lower() in excel_url.lower():
|
||||
return _key
|
||||
|
||||
def get_abbrev_for_facultet(facultets_data: dict, facultet_id: str, fallback_not_found="?", fallback_error="?", fallback_no_short_name="?"):
|
||||
if (facultet_id == 'mag'):
|
||||
return "МАГ"
|
||||
if (facultet_id == 'asp'):
|
||||
return "АСП"
|
||||
|
||||
for _key, _value in facultets_data.items():
|
||||
if _key != facultet_id:
|
||||
continue
|
||||
|
||||
short_names = _value.get("short_names", None)
|
||||
if short_names is None:
|
||||
return fallback_no_short_name
|
||||
|
||||
try:
|
||||
return short_names[0]
|
||||
except Exception as e:
|
||||
traceback.print_exception(e)
|
||||
return fallback_error
|
||||
return fallback_not_found
|
||||
|
||||
|
||||
def download_file_from_url(url, output_filename):
|
||||
"""
|
||||
Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем.
|
||||
|
||||
Reference in New Issue
Block a user