# Copyright Stanislav Mironov # Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля import json import os import time import traceback import uuid import aigenerated import parser import translations import utils import json import links_parser import shutil def currt(): return round(time.time()) FACULTETS = [ "asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik" ] DIRNAME = "excels" DEBUG_ONE_FAC = None #'htf' result_groups = {} result = { "version": 1, "notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: Данные, доступ к API и т.д. предоставляется КАК-ЕСТЬ (AS-IS) без каких либо, явно или не явно подразумеваемых гарантий.\n\nПарсер написал: Миронов Станислав\n\nИсточник данных: https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php", "actual_at": round(time.time()), "documentation": "https://fazziclay.com/api/v1/vstu_schedule_parser/scheme.json", "daypicture": "QwQ", "daycite": "running on a rope", "contact": "https://fazziclay.com/", "university": "VSTU", "university_site": "https://www.vstu.ru/", "source": "https://fazziclay.com/api/v1/vstu_schedule_parser/result.json", "stat": { "total_parsing_time": -1, }, "api_notices": { "updated_at": 1757688552, "text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;", "warning": False, "tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'" }, "doubled_groups": [], "debug": { "bleu~~": 1 }, "excels": [], "facultets": FACULTETS, "emptykey1": "", "emptykey2": "", "groups": result_groups, "emptykey3": "", "emptykey4": "", "see_header_at_top_of_this_file": "SEE TOP OF THIS FILE | ОБРАТИТЕ ВНИМАНИЕ НА ВЕРХ ЭТОГО ФАЙЛА" } def process_excel_file(facultet, excel_url, counter, timeid): is_xlsx = excel_url.endswith(".xlsx") filename = f"{DIRNAME}/" + timeid + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "") excel_info = { "filename": excel_url.split("/")[-1], "url": excel_url, "download_place": filename, "stat": { "download": -1, "create_reader": -1, "parse": -1, "cycles": 0 }, "group_names_parsed": [], "facultet": facultet, "counter": counter } parser.LOGGING = False try: t = utils.StepTimeCounter() aigenerated.download_file_from_url(excel_url, filename) excel_info["stat"]['download'] = t.step() reader = translations.create_reader(filename) print("Reader info") print(reader.info()) excel_info["stat"]['create_reader'] = t.step() while True: excel_info['stat']['cycles'] += 1 print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)") prs = parser.Parser(reader) print("Parser created; parser.parse();") prs.parse() print("parsed done!") if prs.parser_error is not None: excel_info["parser_error_cycle_" + str(excel_info['stat']['cycles'])] = prs.parser_error for group_name in prs.groups.keys(): if group_name in result_groups.keys(): print(f" -- WTF -- Doubled groups -- name: {group_name}") if 'warning_doubled_groups_skip' not in excel_info.keys(): excel_info['warning_doubled_groups_skip'] = [] excel_info['warning_doubled_groups_skip'].append(group_name) result['doubled_groups'].append(group_name) continue gr = result_groups[group_name] = prs.groups[group_name] gr['facultet'] = facultet gr['data_source'] = excel_url.split("/")[-1] gr['debug'] = { "counter": counter, "timeid": timeid, "excel_url": excel_url, "reader_info": reader.info(), "reader_sheet_index": reader.get_sheet_index(), "filename": filename } excel_info["group_names_parsed"].append(group_name) print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys())) if not reader.has_next_sheet(): print("File ended") break else: reader.next_sheet() print("Next sheet!") excel_info["stat"]['parse'] = t.step() except Exception as e: print(f"Error while {excel_url}") print(e) traceback.print_exc() u = uuid.uuid4() excel_info['error'] = { "smile": ":(", "error_message": str(e), "log_anchor": str(u), "time": currt() } print(f"Log Anchor: {u}") faileds.append({ "ex": e, "fac": facultet, "url": excel_url }) result['excels'].append(excel_info) faileds = [] def main(): t = utils.StepTimeCounter() try: os.mkdir(DIRNAME) print(f"Directory '{DIRNAME}' created successfully.") except Exception as e: print(f"Failed create '{DIRNAME}': ") raise e print("main(); parse links starting...") EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC]) counter = 0 timeid = str(round(time.time())) for facultet in EXCEL_LINKS.keys(): counter += 1000 print(f"\n\n-- Факультет '{facultet}' --") facultet_urls = EXCEL_LINKS[facultet] for excel_url in facultet_urls: counter += 1 print(f"\n\n-- Ссылка --") print(f"{excel_url}") print("Start processing excel file") process_excel_file(facultet, excel_url, counter, timeid) print("Excel file processing done!") print("Saving result.json") result['stat']['total_parsing_time'] = t.step() json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False) print("Saved to result.json") print("Faileds:") print(faileds) # Delete a non-empty directory and its contents try: shutil.rmtree(DIRNAME) print(f"Directory '{DIRNAME}' and its contents deleted successfully.") except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") if __name__ == "__main__": print("Start") main() print("Bye!")