created groups.json functional
All checks were successful
Build and Run VSTU Schedule Parser / build_and_run (push) Successful in 18s
All checks were successful
Build and Run VSTU Schedule Parser / build_and_run (push) Successful in 18s
This commit is contained in:
@@ -44,6 +44,7 @@ jobs:
|
|||||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \
|
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parsed:/app/parsed \
|
||||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \
|
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/parser.json:/app/parser.json \
|
||||||
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \
|
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/result_v2.json:/app/result_v2.json \
|
||||||
|
-v /home/holder/fclay/fclaydata/vstu_schedule_parser_v2/facultets.json:/app/facultets.json \
|
||||||
--restart=always \
|
--restart=always \
|
||||||
--name=vstu_schedule_parser_v2 \
|
--name=vstu_schedule_parser_v2 \
|
||||||
vstu_schedule_parser_v2:latest
|
vstu_schedule_parser_v2:latest
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -8,3 +8,4 @@ diffable_dates.txt
|
|||||||
parsed/
|
parsed/
|
||||||
parser.json
|
parser.json
|
||||||
.env
|
.env
|
||||||
|
facultets.json
|
||||||
67
main.py
67
main.py
@@ -47,10 +47,56 @@ FACULTETS = sorted([
|
|||||||
DIRNAME = "excels"
|
DIRNAME = "excels"
|
||||||
PARSED_DIR = "parsed"
|
PARSED_DIR = "parsed"
|
||||||
|
|
||||||
DEBUG_ONE_FAC = None #'fevt'
|
DEBUG_ONE_FAC = None# 'fevt'
|
||||||
DEBUG_NO_SAVE_STATES = False
|
DEBUG_NO_SAVE_STATES = False
|
||||||
DEBUG_NO_LINKS_DELAY = True
|
DEBUG_NO_LINKS_DELAY = True
|
||||||
|
|
||||||
|
facultets_data = None
|
||||||
|
|
||||||
|
|
||||||
|
def gen_groups_from_states(states):
|
||||||
|
groups = {}
|
||||||
|
if facultets_data is None:
|
||||||
|
print("FAILED BECAUSE facultets_data is NONE!!!")
|
||||||
|
return
|
||||||
|
|
||||||
|
for state in states:
|
||||||
|
for sheet in state['sheets'].values():
|
||||||
|
gr = sheet['groups']
|
||||||
|
if len(gr.keys()) == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for key, group_dict in gr.items():
|
||||||
|
group_name = group_dict['name']
|
||||||
|
|
||||||
|
full_path_key = key.upper()
|
||||||
|
recognized_fac = utils.get_preferer_facultet(facultets_data, state['excel']['url'], skip_for=['mag', 'asp'])
|
||||||
|
tech_fac = state['excel']['facultet']
|
||||||
|
full_path_key = utils.get_abbrev_for_facultet(facultets_data, tech_fac) + "/" + full_path_key
|
||||||
|
if tech_fac != recognized_fac and recognized_fac is not None:
|
||||||
|
full_path_key = utils.get_abbrev_for_facultet(facultets_data, recognized_fac) + "/" + full_path_key
|
||||||
|
|
||||||
|
full_path_key = full_path_key.replace(" ", "").replace("\n", "").upper().strip()
|
||||||
|
if full_path_key in groups.keys():
|
||||||
|
groups[full_path_key]['doubled'] = True
|
||||||
|
groups[full_path_key]['excels'].append(state['excel'])
|
||||||
|
else:
|
||||||
|
groups[full_path_key] = {
|
||||||
|
"full_path_key": full_path_key,
|
||||||
|
"real_name": group_name,
|
||||||
|
"facultet_tech": tech_fac,
|
||||||
|
"facultet_regognized": recognized_fac,
|
||||||
|
"excels": [state['excel']],
|
||||||
|
"excel_position": group_dict['position_human'],
|
||||||
|
"excel_sheet": {
|
||||||
|
"name": sheet['name'],
|
||||||
|
"index": sheet['index']
|
||||||
|
},
|
||||||
|
"slots_weekdays_used": sorted(group_dict['slots'].keys())
|
||||||
|
}
|
||||||
|
|
||||||
|
return {"version": 1, "groups": json.loads(json.dumps(groups, sort_keys=True, ensure_ascii=False))}
|
||||||
|
|
||||||
def parse_sheets(download_place):
|
def parse_sheets(download_place):
|
||||||
to_return = {}
|
to_return = {}
|
||||||
try:
|
try:
|
||||||
@@ -187,6 +233,14 @@ def run_session():
|
|||||||
last_changeds.add(excel_dict['last_changed'])
|
last_changeds.add(excel_dict['last_changed'])
|
||||||
|
|
||||||
excel_url = excel_dict['url']
|
excel_url = excel_dict['url']
|
||||||
|
|
||||||
|
for state in states:
|
||||||
|
ch = state['excel']['url']
|
||||||
|
if excel_url == ch:
|
||||||
|
print(f"Doubled excel files(By URLs)! Current 1th={excel_dict}; 2th={state['excel']}")
|
||||||
|
print("Skipped!")
|
||||||
|
continue
|
||||||
|
|
||||||
facultet = excel_dict['facultet']
|
facultet = excel_dict['facultet']
|
||||||
excel_filename = excel_url.split("/")[-1]
|
excel_filename = excel_url.split("/")[-1]
|
||||||
excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1]
|
excel_dict['json_represent'] = parsed_file_path(excel_filename).split(os.path.sep)[-1]
|
||||||
@@ -251,6 +305,7 @@ def run_session():
|
|||||||
changed_files += 1
|
changed_files += 1
|
||||||
changed = True
|
changed = True
|
||||||
excel_dict['different_in_this_session'] = True
|
excel_dict['different_in_this_session'] = True
|
||||||
|
excel_dict['recognized_facultet'] = utils.get_preferer_facultet(facultets_data, excel_url=excel_dict['url'])
|
||||||
state['actual_at'] = currt()
|
state['actual_at'] = currt()
|
||||||
state['excel'] = excel_dict
|
state['excel'] = excel_dict
|
||||||
|
|
||||||
@@ -309,6 +364,9 @@ def run_session():
|
|||||||
"faileds": faileds
|
"faileds": faileds
|
||||||
}, fp=fp, ensure_ascii=False)
|
}, fp=fp, ensure_ascii=False)
|
||||||
|
|
||||||
|
with open("groups.json", 'w', encoding="utf-8") as fp:
|
||||||
|
json.dump(gen_groups_from_states(states), fp=fp, ensure_ascii=False)
|
||||||
|
|
||||||
if changed:
|
if changed:
|
||||||
all_files = states
|
all_files = states
|
||||||
d = {
|
d = {
|
||||||
@@ -322,6 +380,9 @@ def run_session():
|
|||||||
"all_files": sorted(all_files, key=lambda d: d['excel']['url']),
|
"all_files": sorted(all_files, key=lambda d: d['excel']['url']),
|
||||||
"faileds": faileds
|
"faileds": faileds
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
with open("result_v2.json", 'w', encoding="utf-8") as fp:
|
with open("result_v2.json", 'w', encoding="utf-8") as fp:
|
||||||
json.dump(d, fp=fp, ensure_ascii=False)
|
json.dump(d, fp=fp, ensure_ascii=False)
|
||||||
|
|
||||||
@@ -352,6 +413,10 @@ def check_dirs():
|
|||||||
os.mkdir(PARSED_DIR)
|
os.mkdir(PARSED_DIR)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
global facultets_data
|
||||||
|
with open("facultets.json") as fp:
|
||||||
|
facultets_data = json.load(fp=fp)
|
||||||
|
|
||||||
flag = True
|
flag = True
|
||||||
while flag:
|
while flag:
|
||||||
if not INFINITY_LOOP:
|
if not INFINITY_LOOP:
|
||||||
|
|||||||
41
utils.py
41
utils.py
@@ -1,6 +1,7 @@
|
|||||||
# Copyright Stanislav Mironov
|
# Copyright Stanislav Mironov
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
import traceback
|
||||||
import xlrd
|
import xlrd
|
||||||
from coord import Coord
|
from coord import Coord
|
||||||
from translations import ExcelSheetReader
|
from translations import ExcelSheetReader
|
||||||
@@ -10,6 +11,46 @@ import hashlib
|
|||||||
import requests
|
import requests
|
||||||
from urllib.parse import urlsplit, urlunsplit, quote
|
from urllib.parse import urlsplit, urlunsplit, quote
|
||||||
|
|
||||||
|
def get_preferer_facultet(facultets_data: dict, excel_url: str, skip_for=None, ):
|
||||||
|
if skip_for is None:
|
||||||
|
skip_for = []
|
||||||
|
|
||||||
|
for _key, _value in facultets_data.items():
|
||||||
|
if _key.startswith("_"):
|
||||||
|
continue
|
||||||
|
if _key in skip_for:
|
||||||
|
continue
|
||||||
|
|
||||||
|
short_names = _value.get("short_names", None)
|
||||||
|
if short_names is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for name in short_names:
|
||||||
|
if name.lower() in excel_url.lower():
|
||||||
|
return _key
|
||||||
|
|
||||||
|
def get_abbrev_for_facultet(facultets_data: dict, facultet_id: str, fallback_not_found="?", fallback_error="?", fallback_no_short_name="?"):
|
||||||
|
if (facultet_id == 'mag'):
|
||||||
|
return "МАГ"
|
||||||
|
if (facultet_id == 'asp'):
|
||||||
|
return "АСП"
|
||||||
|
|
||||||
|
for _key, _value in facultets_data.items():
|
||||||
|
if _key != facultet_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
short_names = _value.get("short_names", None)
|
||||||
|
if short_names is None:
|
||||||
|
return fallback_no_short_name
|
||||||
|
|
||||||
|
try:
|
||||||
|
return short_names[0]
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exception(e)
|
||||||
|
return fallback_error
|
||||||
|
return fallback_not_found
|
||||||
|
|
||||||
|
|
||||||
def download_file_from_url(url, output_filename):
|
def download_file_from_url(url, output_filename):
|
||||||
"""
|
"""
|
||||||
Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем.
|
Скачивает файл по URL со спецсимволами и пробелами, сохраняя его под указанным именем.
|
||||||
|
|||||||
Reference in New Issue
Block a user