# Copyright Stanislav Mironov # Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля import json import os import random import time import traceback import uuid import aigenerated import parser import translations import utils import json import links_parser import shutil import hashes def currt(): return round(time.time()) FACULTETS = sorted([ "asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik" ]) DIRNAME = "excels" DIFFABLE_DATES = "diffable_dates.txt" SKIP_DIFFABLE_DATES = True DEBUG_ONE_FAC = None #'fevt' LOGGING = False unique_raws = set() result = { "version": 1, "notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: Данные, доступ к API и т.д. предоставляется КАК-ЕСТЬ (AS-IS) без каких либо, явно или не явно подразумеваемых гарантий.\n\nПарсер написал: Миронов Станислав\n\nИсточник данных: https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php", "actual_at": round(time.time()), "documentation": "https://fazziclay.com/api/v1/vstu_schedule_parser/scheme.json (temporary outdated)", "daypicture": "0w0", "daycite": "KIlLSWITCH", "contact": "https://fazziclay.com/", "university": "VSTU", "university_site": "https://www.vstu.ru/", "source": "https://fazziclay.com/api/v1/vstu_schedule_parser/result.json", "stat": { "total_parsing_time": -1, "excels": { "fine": 0, "bad": 0 }, "groups": 0, "unique_raws": -1 }, "api_notices": { "updated_at": 1773523692, "text_pre1": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;\n\n2025-10-05: добавлено data_source_hash в эксель и в группу. Это SHA1 of скачанный эксель файл.", "text": "2026-03-15 BREAKING CHANGES! By Stanislav Mironov.\n\nИзменено многое в угоду унифкации и расширению спаршенных групп. Пока alpha", "warning": True, "tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'" }, "debug": { "bleu~~": 3 }, "excels": [], "facultets": FACULTETS, "group_names_parsed": [], "unique_raws": unique_raws, "see_header_at_top_of_this_file": "SEE TOP OF THIS FILE | ОБРАТИТЕ ВНИМАНИЕ НА ВЕРХ ЭТОГО ФАЙЛА" } def process_obj(data): try: if isinstance(data, dict): for key, value in data.items(): if key == "raw": unique_raws.update(value) process_obj(value) # Если это список, проходим по его элементам elif isinstance(data, list): for item in data: process_obj(item) except Exception as e: print("Failed process_obj") print(e) def process_excel_file(facultet, excel_url, counter, latest_changed): is_xlsx = excel_url.endswith(".xlsx") download_place = f"{DIRNAME}/" + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "") excel_filename = excel_url.split("/")[-1] if "ФЭУ" not in excel_filename: print("SKIPPED") return excel_info = { "filename": excel_filename, "data_source_hash": None, "url": excel_url, "latest_changed": latest_changed, "download_place": download_place, "group_names_parsed": [], "facultet": facultet, "counter": counter, "sheets": [] } parser.LOGGING = LOGGING try: aigenerated.download_file_from_url(excel_url, download_place) sha1hash = hashes.calculate_sha1(download_place) excel_info['data_source_hash'] = sha1hash reader = translations.create_reader(download_place) print("Reader info") print(reader.info()) while True: print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)") sheet_dict = { "index": reader.get_sheet_index(), "name": reader.get_sheet_name(), "reader_info": reader.info(), "group_names_parsed": [], "groups": {} } excel_info['sheets'].append(sheet_dict) prs = parser.Parser(reader) print("Parser created; parser.parse();") prs.parse() print("parsed done!") if len(prs.raw_no_schedule) > 0: sheet_dict["raw_no_schedule"] = prs.raw_no_schedule if len(prs.features) > 0: sheet_dict["features"] = sorted(prs.features) if prs.parser_error is not None: sheet_dict["parser_error"] = prs.parser_error if prs.parser_warnings is not None and len(prs.parser_warnings) > 0: sheet_dict["parser_warnings"] = prs.parser_warnings for group_name in prs.groups.keys(): gr = prs.groups[group_name] gr["excel_url"] = excel_url sheet_dict["group_names_parsed"].append(group_name) excel_info["group_names_parsed"].append(group_name) result["group_names_parsed"].append(group_name) result['stat']['groups'] += 1 sheet_dict['week_keys_metadata'] = prs.week_keys_metadata sheet_dict['groups'][group_name] = gr process_obj(gr['slots']) print(f"Populates {len(prs.groups)} groups: " + " ".join(prs.groups.keys())) if not reader.has_next_sheet(): print("File ended") break else: reader.next_sheet() print("Next sheet!") except Exception as e: print(f"Error while {excel_url}") print(e) traceback.print_exc() u = uuid.uuid4() excel_info['error'] = { "smile": ":(", "error_message": str(e), "log_anchor": str(u), "time": currt() } print(f"Log Anchor: {u}") faileds.append({ "ex": e, "fac": facultet, "url": excel_url }) result['excels'].append(excel_info) k = "fine" if len(excel_info['group_names_parsed']) > 0 else "bad" result['stat']['excels'][k] += 1 faileds = [] def main(): global result t = utils.StepTimeCounter() try: try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") os.mkdir(DIRNAME) print(f"Directory '{DIRNAME}' created successfully.") except Exception as e: print(f"Failed create '{DIRNAME}': ") raise e print("main(); parse links starting...") EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC]) now_diffable_dates = links_parser.excels_to_diffabledates(EXCEL_LINKS) prev_diffable_dates = None if os.path.exists("diffable_dates.txt"): with open(DIFFABLE_DATES, 'r') as fp: prev_diffable_dates = fp.read().strip() with open(DIFFABLE_DATES, 'w') as fp: fp.write(now_diffable_dates) if now_diffable_dates == prev_diffable_dates: print("No date changes in vstu.ru website. Stopping") if not SKIP_DIFFABLE_DATES: return print("SKIP_DIFFABLE_DATES is True, force resuming") counter = 10000 for excel_link in EXCEL_LINKS: counter += 1 facultet = excel_link['facultet'] excel_url = excel_link['url'] latest_changed = excel_link['last_changed'] process_excel_file(facultet, excel_url, counter, latest_changed) print("Saving result.json") result['stat']['total_parsing_time'] = t.step() result['unique_raws'] = sorted(unique_raws) json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False) print("Saved to result.json indent=2") json.dump(result, open('result-no-indent.json', 'w'), ensure_ascii=False) print("Saved to result-no-indent.json") print("Faileds:") print(faileds) # Delete a non-empty directory and its contents try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") if __name__ == "__main__": print("Start") main() print("Bye!")