Files
VSTU_Schedule_Parser/main.py

221 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright Stanislav Mironov
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля
import json
import os
import time
import traceback
import uuid
import aigenerated
import parser
import translations
import utils
import json
import links_parser
import shutil
def currt():
return round(time.time())
FACULTETS = sorted([
"asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik"
])
DIRNAME = "excels"
DIFFABLE_DATES = "diffable_dates.txt"
DEBUG_ONE_FAC = None #'htf'
result_groups = {}
result = {
"version": 1,
"notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: Данные, доступ к API и т.д. предоставляется КАК-ЕСТЬ (AS-IS) без каких либо, явно или не явно подразумеваемых гарантий.\n\nПарсер написал: Миронов Станислав\n\nИсточник данных: https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php",
"actual_at": round(time.time()),
"documentation": "https://fazziclay.com/api/v1/vstu_schedule_parser/scheme.json",
"daypicture": "QwQ",
"daycite": "running on a rope",
"contact": "https://fazziclay.com/",
"university": "VSTU",
"university_site": "https://www.vstu.ru/",
"source": "https://fazziclay.com/api/v1/vstu_schedule_parser/result.json",
"stat": {
"total_parsing_time": -1,
},
"api_notices": {
"updated_at": 1757688552,
"text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;",
"warning": False,
"tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'"
},
"doubled_groups": [],
"debug": {
"bleu~~": 1
},
"excels": [],
"facultets": FACULTETS,
"emptykey1": "",
"emptykey2": "",
"groups": result_groups,
"emptykey3": "",
"emptykey4": "",
"see_header_at_top_of_this_file": "SEE TOP OF THIS FILE | ОБРАТИТЕ ВНИМАНИЕ НА ВЕРХ ЭТОГО ФАЙЛА"
}
def process_excel_file(facultet, excel_url, counter, latest_changed):
is_xlsx = excel_url.endswith(".xlsx")
filename = f"{DIRNAME}/" + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "")
excel_info = {
"filename": excel_url.split("/")[-1],
"url": excel_url,
"latest_changed": latest_changed,
"download_place": filename,
"group_names_parsed": [],
"facultet": facultet,
"counter": counter
}
parser.LOGGING = False
try:
aigenerated.download_file_from_url(excel_url, filename)
reader = translations.create_reader(filename)
print("Reader info")
print(reader.info())
while True:
print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)")
prs = parser.Parser(reader)
print("Parser created; parser.parse();")
prs.parse()
print("parsed done!")
if prs.parser_error is not None:
excel_info["parser_error_cycle_" + str(reader.get_sheet_index()+1)] = prs.parser_error
for group_name in prs.groups.keys():
if group_name in result_groups.keys():
print(f" -- WTF -- Doubled groups -- name: {group_name}")
if 'warning_doubled_groups_skip' not in excel_info.keys():
excel_info['warning_doubled_groups_skip'] = []
excel_info['warning_doubled_groups_skip'].append(group_name)
result['doubled_groups'].append(group_name)
continue
gr = result_groups[group_name] = prs.groups[group_name]
gr['facultet'] = facultet
gr['data_source'] = excel_url.split("/")[-1]
gr['debug'] = {
"excel_url": excel_url,
"reader_info": reader.info(),
"reader_sheet_index": reader.get_sheet_index(),
"filename": filename
}
excel_info["group_names_parsed"].append(group_name)
print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys()))
if not reader.has_next_sheet():
print("File ended")
break
else:
reader.next_sheet()
print("Next sheet!")
except Exception as e:
print(f"Error while {excel_url}")
print(e)
traceback.print_exc()
u = uuid.uuid4()
excel_info['error'] = {
"smile": ":(",
"error_message": str(e),
"log_anchor": str(u),
"time": currt()
}
print(f"Log Anchor: {u}")
faileds.append({
"ex": e,
"fac": facultet,
"url": excel_url
})
result['excels'].append(excel_info)
faileds = []
def main():
global result_groups, result
t = utils.StepTimeCounter()
try:
try:
shutil.rmtree(DIRNAME)
print(f"Directory '{DIRNAME}' and its contents deleted successfully.")
except Exception as e:
print(f"Error deleting directory '{DIRNAME}': {e}")
os.mkdir(DIRNAME)
print(f"Directory '{DIRNAME}' created successfully.")
except Exception as e:
print(f"Failed create '{DIRNAME}': ")
raise e
print("main(); parse links starting...")
EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC])
now_diffable_dates = links_parser.excels_to_diffabledates(EXCEL_LINKS)
prev_diffable_dates = None
if os.path.exists("diffable_dates.txt"):
with open(DIFFABLE_DATES, 'r') as fp:
prev_diffable_dates = fp.read().strip()
with open(DIFFABLE_DATES, 'w') as fp:
fp.write(now_diffable_dates)
if now_diffable_dates == prev_diffable_dates:
print("No date changes in vstu.ru website. Stopping")
return
counter = 10000
for excel_link in EXCEL_LINKS:
counter += 1
facultet = excel_link['facultet']
excel_url = excel_link['url']
latest_changed = excel_link['last_changed']
process_excel_file(facultet, excel_url, counter, latest_changed)
print("Saving result.json")
group_names_alphabeticaly = sorted(result_groups.keys())
sorted_groups = {}
for group_name in group_names_alphabeticaly:
sorted_groups[group_name] = result_groups[group_name]
result['groups'] = sorted_groups
result['stat']['total_parsing_time'] = t.step()
json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False)
print("Saved to result.json indent=2")
json.dump(result, open('result-no-indent.json', 'w'), ensure_ascii=False)
print("Saved to result-no-indent.json")
print("Faileds:")
print(faileds)
# Delete a non-empty directory and its contents
try:
shutil.rmtree(DIRNAME)
print(f"Directory '{DIRNAME}' and its contents deleted successfully.")
except Exception as e:
print(f"Error deleting directory '{DIRNAME}': {e}")
if __name__ == "__main__":
print("Start")
main()
print("Bye!")