# Copyright Stanislav Mironov # Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля import json import os import time import traceback import uuid import aigenerated import parser import translations import utils import json import links_parser import shutil import hashes def currt(): return round(time.time()) FACULTETS = sorted([ "asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik" ]) DIRNAME = "excels" DIFFABLE_DATES = "diffable_dates.txt" DEBUG_ONE_FAC = None #'fevt' result_groups = {} result = { "version": 1, "notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: Данные, доступ к API и т.д. предоставляется КАК-ЕСТЬ (AS-IS) без каких либо, явно или не явно подразумеваемых гарантий.\n\nПарсер написал: Миронов Станислав\n\nИсточник данных: https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php", "actual_at": round(time.time()), "documentation": "https://fazziclay.com/api/v1/vstu_schedule_parser/scheme.json", "daypicture": "QwQ", "daycite": "running on a rope", "contact": "https://fazziclay.com/", "university": "VSTU", "university_site": "https://www.vstu.ru/", "source": "https://fazziclay.com/api/v1/vstu_schedule_parser/result.json", "stat": { "total_parsing_time": -1, }, "api_notices": { "updated_at": 1759651871, "text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;\n\n2025-10-05: добавлено data_source_hash в эксель и в группу. Это SHA1 of скачанный эксель файл.", "warning": False, "tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'" }, "doubled_groups": [], "debug": { "bleu~~": 2 }, "excels": [], "facultets": FACULTETS, "emptykey1": "", "emptykey2": "", "groups": result_groups, "emptykey3": "", "emptykey4": "", "see_header_at_top_of_this_file": "SEE TOP OF THIS FILE | ОБРАТИТЕ ВНИМАНИЕ НА ВЕРХ ЭТОГО ФАЙЛА" } def process_excel_file(facultet, excel_url, counter, latest_changed): is_xlsx = excel_url.endswith(".xlsx") download_place = f"{DIRNAME}/" + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "") excel_filename = excel_url.split("/")[-1] excel_info = { "filename": excel_filename, "data_source_hash": None, "url": excel_url, "latest_changed": latest_changed, "download_place": download_place, "group_names_parsed": [], "facultet": facultet, "counter": counter, "week_keys_metadata": {} } parser.LOGGING = False try: aigenerated.download_file_from_url(excel_url, download_place) sha1hash = hashes.calculate_sha1(download_place) excel_info['data_source_hash'] = sha1hash reader = translations.create_reader(download_place) print("Reader info") print(reader.info()) while True: print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)") prs = parser.Parser(reader) print("Parser created; parser.parse();") prs.parse() print("parsed done!") if prs.parser_error is not None: excel_info["parser_error_cycle_" + str(reader.get_sheet_index()+1)] = prs.parser_error if prs.parser_warnings is not None and len(prs.parser_warnings) > 0: excel_info["parser_warnings_cycle_" + str(reader.get_sheet_index()+1)] = prs.parser_warnings for group_name in prs.groups.keys(): if group_name in result_groups.keys(): print(f" -- WTF -- Doubled groups -- name: {group_name}") if 'warning_doubled_groups_skip' not in excel_info.keys(): excel_info['warning_doubled_groups_skip'] = [] excel_info['warning_doubled_groups_skip'].append(group_name) result['doubled_groups'].append(group_name) continue gr = result_groups[group_name] = prs.groups[group_name] gr['facultet'] = facultet gr['data_source'] = excel_filename # same as 'filename' in excel_info's gr['data_source_hash'] = sha1hash gr['debug'] = { "excel_url": excel_url, "reader_info": reader.info(), "reader_sheet_index": reader.get_sheet_index(), "download_place": download_place } excel_info["group_names_parsed"].append(group_name) excel_info['week_keys_metadata'] = prs.week_keys_metadata print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys())) if not reader.has_next_sheet(): print("File ended") break else: reader.next_sheet() print("Next sheet!") except Exception as e: print(f"Error while {excel_url}") print(e) traceback.print_exc() u = uuid.uuid4() excel_info['error'] = { "smile": ":(", "error_message": str(e), "log_anchor": str(u), "time": currt() } print(f"Log Anchor: {u}") faileds.append({ "ex": e, "fac": facultet, "url": excel_url }) result['excels'].append(excel_info) faileds = [] def main(): global result_groups, result t = utils.StepTimeCounter() try: try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") os.mkdir(DIRNAME) print(f"Directory '{DIRNAME}' created successfully.") except Exception as e: print(f"Failed create '{DIRNAME}': ") raise e print("main(); parse links starting...") EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC]) now_diffable_dates = links_parser.excels_to_diffabledates(EXCEL_LINKS) prev_diffable_dates = None if os.path.exists("diffable_dates.txt"): with open(DIFFABLE_DATES, 'r') as fp: prev_diffable_dates = fp.read().strip() with open(DIFFABLE_DATES, 'w') as fp: fp.write(now_diffable_dates) if now_diffable_dates == prev_diffable_dates: print("No date changes in vstu.ru website. Stopping") return counter = 10000 for excel_link in EXCEL_LINKS: counter += 1 facultet = excel_link['facultet'] excel_url = excel_link['url'] latest_changed = excel_link['last_changed'] process_excel_file(facultet, excel_url, counter, latest_changed) print("Saving result.json") group_names_alphabeticaly = sorted(result_groups.keys()) sorted_groups = {} for group_name in group_names_alphabeticaly: sorted_groups[group_name] = result_groups[group_name] result['groups'] = sorted_groups result['stat']['total_parsing_time'] = t.step() json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False) print("Saved to result.json indent=2") json.dump(result, open('result-no-indent.json', 'w'), ensure_ascii=False) print("Saved to result-no-indent.json") print("Faileds:") print(faileds) # Delete a non-empty directory and its contents try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") if __name__ == "__main__": print("Start") main() print("Bye!")