Added left calendar dates parsing
This commit is contained in:
34
hashes.py
Normal file
34
hashes.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import hashlib
|
||||||
|
|
||||||
|
def calculate_sha1(filepath):
|
||||||
|
"""
|
||||||
|
Calculates the SHA1 hash of a given file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filepath (str): The path to the file.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The hexadecimal representation of the SHA1 hash, or None if the file is not found.
|
||||||
|
"""
|
||||||
|
sha1_hash = hashlib.sha1()
|
||||||
|
try:
|
||||||
|
with open(filepath, "rb") as f:
|
||||||
|
# Read the file in chunks to handle large files efficiently
|
||||||
|
for chunk in iter(lambda: f.read(4096), b""):
|
||||||
|
sha1_hash.update(chunk)
|
||||||
|
return sha1_hash.hexdigest()
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: File not found at {filepath}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Example usage:
|
||||||
|
file_path = "xls.xls" # Replace with the actual path to your file
|
||||||
|
sha1_result = calculate_sha1(file_path)
|
||||||
|
|
||||||
|
if sha1_result:
|
||||||
|
print(f"The SHA1 hash of '{file_path}' is: {sha1_result}")
|
||||||
36
main.py
36
main.py
@@ -15,6 +15,7 @@ import utils
|
|||||||
import json
|
import json
|
||||||
import links_parser
|
import links_parser
|
||||||
import shutil
|
import shutil
|
||||||
|
import hashes
|
||||||
|
|
||||||
def currt():
|
def currt():
|
||||||
return round(time.time())
|
return round(time.time())
|
||||||
@@ -25,7 +26,7 @@ FACULTETS = sorted([
|
|||||||
DIRNAME = "excels"
|
DIRNAME = "excels"
|
||||||
DIFFABLE_DATES = "diffable_dates.txt"
|
DIFFABLE_DATES = "diffable_dates.txt"
|
||||||
|
|
||||||
DEBUG_ONE_FAC = None #'htf'
|
DEBUG_ONE_FAC = None #'fevt'
|
||||||
result_groups = {}
|
result_groups = {}
|
||||||
result = {
|
result = {
|
||||||
"version": 1,
|
"version": 1,
|
||||||
@@ -42,14 +43,14 @@ result = {
|
|||||||
"total_parsing_time": -1,
|
"total_parsing_time": -1,
|
||||||
},
|
},
|
||||||
"api_notices": {
|
"api_notices": {
|
||||||
"updated_at": 1757688552,
|
"updated_at": 1759651871,
|
||||||
"text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;",
|
"text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;\n\n2025-10-05: добавлено data_source_hash в эксель и в группу. Это SHA1 of скачанный эксель файл.",
|
||||||
"warning": False,
|
"warning": False,
|
||||||
"tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'"
|
"tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'"
|
||||||
},
|
},
|
||||||
"doubled_groups": [],
|
"doubled_groups": [],
|
||||||
"debug": {
|
"debug": {
|
||||||
"bleu~~": 1
|
"bleu~~": 2
|
||||||
},
|
},
|
||||||
"excels": [],
|
"excels": [],
|
||||||
"facultets": FACULTETS,
|
"facultets": FACULTETS,
|
||||||
@@ -66,22 +67,28 @@ result = {
|
|||||||
|
|
||||||
def process_excel_file(facultet, excel_url, counter, latest_changed):
|
def process_excel_file(facultet, excel_url, counter, latest_changed):
|
||||||
is_xlsx = excel_url.endswith(".xlsx")
|
is_xlsx = excel_url.endswith(".xlsx")
|
||||||
filename = f"{DIRNAME}/" + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "")
|
download_place = f"{DIRNAME}/" + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "")
|
||||||
|
|
||||||
|
excel_filename = excel_url.split("/")[-1]
|
||||||
|
|
||||||
excel_info = {
|
excel_info = {
|
||||||
"filename": excel_url.split("/")[-1],
|
"filename": excel_filename,
|
||||||
|
"data_source_hash": None,
|
||||||
"url": excel_url,
|
"url": excel_url,
|
||||||
"latest_changed": latest_changed,
|
"latest_changed": latest_changed,
|
||||||
"download_place": filename,
|
"download_place": download_place,
|
||||||
"group_names_parsed": [],
|
"group_names_parsed": [],
|
||||||
"facultet": facultet,
|
"facultet": facultet,
|
||||||
"counter": counter
|
"counter": counter,
|
||||||
|
"week_keys_metadata": {}
|
||||||
}
|
}
|
||||||
parser.LOGGING = False
|
parser.LOGGING = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
aigenerated.download_file_from_url(excel_url, filename)
|
aigenerated.download_file_from_url(excel_url, download_place)
|
||||||
reader = translations.create_reader(filename)
|
sha1hash = hashes.calculate_sha1(download_place)
|
||||||
|
excel_info['data_source_hash'] = sha1hash
|
||||||
|
reader = translations.create_reader(download_place)
|
||||||
print("Reader info")
|
print("Reader info")
|
||||||
print(reader.info())
|
print(reader.info())
|
||||||
|
|
||||||
@@ -96,6 +103,9 @@ def process_excel_file(facultet, excel_url, counter, latest_changed):
|
|||||||
if prs.parser_error is not None:
|
if prs.parser_error is not None:
|
||||||
excel_info["parser_error_cycle_" + str(reader.get_sheet_index()+1)] = prs.parser_error
|
excel_info["parser_error_cycle_" + str(reader.get_sheet_index()+1)] = prs.parser_error
|
||||||
|
|
||||||
|
if prs.parser_warnings is not None and len(prs.parser_warnings) > 0:
|
||||||
|
excel_info["parser_warnings_cycle_" + str(reader.get_sheet_index()+1)] = prs.parser_warnings
|
||||||
|
|
||||||
for group_name in prs.groups.keys():
|
for group_name in prs.groups.keys():
|
||||||
if group_name in result_groups.keys():
|
if group_name in result_groups.keys():
|
||||||
print(f" -- WTF -- Doubled groups -- name: {group_name}")
|
print(f" -- WTF -- Doubled groups -- name: {group_name}")
|
||||||
@@ -110,14 +120,16 @@ def process_excel_file(facultet, excel_url, counter, latest_changed):
|
|||||||
|
|
||||||
gr = result_groups[group_name] = prs.groups[group_name]
|
gr = result_groups[group_name] = prs.groups[group_name]
|
||||||
gr['facultet'] = facultet
|
gr['facultet'] = facultet
|
||||||
gr['data_source'] = excel_url.split("/")[-1]
|
gr['data_source'] = excel_filename # same as 'filename' in excel_info's
|
||||||
|
gr['data_source_hash'] = sha1hash
|
||||||
gr['debug'] = {
|
gr['debug'] = {
|
||||||
"excel_url": excel_url,
|
"excel_url": excel_url,
|
||||||
"reader_info": reader.info(),
|
"reader_info": reader.info(),
|
||||||
"reader_sheet_index": reader.get_sheet_index(),
|
"reader_sheet_index": reader.get_sheet_index(),
|
||||||
"filename": filename
|
"download_place": download_place
|
||||||
}
|
}
|
||||||
excel_info["group_names_parsed"].append(group_name)
|
excel_info["group_names_parsed"].append(group_name)
|
||||||
|
excel_info['week_keys_metadata'] = prs.week_keys_metadata
|
||||||
|
|
||||||
print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys()))
|
print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys()))
|
||||||
|
|
||||||
|
|||||||
77
parser.py
77
parser.py
@@ -10,8 +10,9 @@ import aigenerated
|
|||||||
from coord import Coord, Merged
|
from coord import Coord, Merged
|
||||||
from translations import ExcelSheetReader
|
from translations import ExcelSheetReader
|
||||||
import utils
|
import utils
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
LOGGING = True
|
LOGGING = False
|
||||||
|
|
||||||
def pprint(*args, **kwargs):
|
def pprint(*args, **kwargs):
|
||||||
if LOGGING:
|
if LOGGING:
|
||||||
@@ -21,9 +22,11 @@ class Parser:
|
|||||||
def __init__(self, reader: ExcelSheetReader):
|
def __init__(self, reader: ExcelSheetReader):
|
||||||
self.reader = reader
|
self.reader = reader
|
||||||
self.groups = {}
|
self.groups = {}
|
||||||
self.teachers = set()
|
self.week_keys_metadata = {}
|
||||||
self.places = set()
|
|
||||||
|
self.weeknums: defaultdict = defaultdict(set) # no support json!
|
||||||
self.parser_error = None
|
self.parser_error = None
|
||||||
|
self.parser_warnings = []
|
||||||
pprint("Parser created for '{0}'".format(reader.info()))
|
pprint("Parser created for '{0}'".format(reader.info()))
|
||||||
|
|
||||||
def parse(self):
|
def parse(self):
|
||||||
@@ -34,6 +37,11 @@ class Parser:
|
|||||||
self.parser_error = "'ПОНЕДЕЛЬНИК' не найден в таблице."
|
self.parser_error = "'ПОНЕДЕЛЬНИК' не найден в таблице."
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if monday.col != 4:
|
||||||
|
print("--- warning parse! ---")
|
||||||
|
print(f"Monday col != 4 (actual: {monday})")
|
||||||
|
self.parser_warnings.append(f"Monday col != 4 (actual: {monday}); Это, наверное, может работать не стабильно!")
|
||||||
|
|
||||||
head_rx = monday.row - 1 # выше первого понидельника
|
head_rx = monday.row - 1 # выше первого понидельника
|
||||||
if head_rx < 0:
|
if head_rx < 0:
|
||||||
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
|
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
|
||||||
@@ -50,7 +58,61 @@ class Parser:
|
|||||||
self.process_group(group, monday)
|
self.process_group(group, monday)
|
||||||
pprint("\nEND OF PROCESS GROUP\n")
|
pprint("\nEND OF PROCESS GROUP\n")
|
||||||
|
|
||||||
pprint(self.teachers)
|
self.process_weekmetadatas(monday)
|
||||||
|
|
||||||
|
def process_weekmetadatas(self, first_monday: "Coord"):
|
||||||
|
for x in self.weeknums.keys():
|
||||||
|
pprint(x)
|
||||||
|
set_of_merged: set = self.weeknums[x]
|
||||||
|
l = len(set_of_merged)
|
||||||
|
if l != 1:
|
||||||
|
self.week_keys_metadata[x] = {
|
||||||
|
"error": True,
|
||||||
|
"error_text": f"Parse error: count of found '{x}' (need view like WEEKDAY_1; weekday - in r; 1 - weeknum[1, 2]) is {l}; required only one!"
|
||||||
|
}
|
||||||
|
self.parser_warnings.append(f"Processing weekmetadata for '{x}' failed because count of uniqie merged cells not one (actual: {l}). :<")
|
||||||
|
continue
|
||||||
|
|
||||||
|
weekday_merged: Merged = set_of_merged.pop()
|
||||||
|
if weekday_merged.width() != 1:
|
||||||
|
self.week_keys_metadata[x] = {
|
||||||
|
"error": True,
|
||||||
|
"error_text": f"Weekday excel block width != 1 (actual {weekday_merged.width()})"
|
||||||
|
}
|
||||||
|
self.parser_warnings.append(f"Processing weekmetadata for '{x}' failed because weekday excel block width != 1 (actual {weekday_merged.width()})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
month_row = first_monday.row - 1
|
||||||
|
curr_col = weekday_merged.low.col - 1
|
||||||
|
while curr_col >= 0:
|
||||||
|
month_pos = Coord(month_row, curr_col)
|
||||||
|
month_cell = month_pos.cell(self.reader)
|
||||||
|
if month_cell.is_empty():
|
||||||
|
pprint("month cell is empty")
|
||||||
|
break
|
||||||
|
month_name = str(month_cell.value).strip()
|
||||||
|
pprint(month_cell)
|
||||||
|
all_nums_of_month = utils.parse_all_dirt(self.reader, month_pos.shift(down=1), right=1, down=weekday_merged.height())
|
||||||
|
|
||||||
|
if (x not in self.week_keys_metadata.keys()):
|
||||||
|
self.week_keys_metadata[x] = {}
|
||||||
|
|
||||||
|
if (month_name not in self.week_keys_metadata[x].keys()):
|
||||||
|
self.week_keys_metadata[x][month_name] = []
|
||||||
|
|
||||||
|
for x2 in all_nums_of_month:
|
||||||
|
m = self.week_keys_metadata[x][month_name]
|
||||||
|
if x2 not in m:
|
||||||
|
try:
|
||||||
|
m.append(str(x2).replace(".0", ""))
|
||||||
|
except:
|
||||||
|
m.append(x2)
|
||||||
|
|
||||||
|
curr_col -= 1
|
||||||
|
|
||||||
|
|
||||||
|
def push_weekday_meta(self, weekday: str, weeknum: int, week_key_name: str, merged: "Merged"):
|
||||||
|
self.weeknums[week_key_name].add(merged)
|
||||||
|
|
||||||
def parse_potokoviy(self, merged: Merged):
|
def parse_potokoviy(self, merged: Merged):
|
||||||
speaker = None
|
speaker = None
|
||||||
@@ -66,7 +128,7 @@ class Parser:
|
|||||||
|
|
||||||
return {"loc": str(location).strip(), "leader": str(speaker).strip(), "name": str(merged.cell(self.reader).value).strip()}
|
return {"loc": str(location).strip(), "leader": str(speaker).strip(), "name": str(merged.cell(self.reader).value).strip()}
|
||||||
|
|
||||||
def process_group(self, group, monday):
|
def process_group(self, group: dict, monday: Coord):
|
||||||
"""
|
"""
|
||||||
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
|
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
|
||||||
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
|
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
|
||||||
@@ -109,6 +171,9 @@ class Parser:
|
|||||||
if not skip:
|
if not skip:
|
||||||
next = 3 # на сколько пыгнуть для следующего шага?
|
next = 3 # на сколько пыгнуть для следующего шага?
|
||||||
|
|
||||||
|
weekday_key_name = weekday + ("_1" if weeknum == 1 else "_2")
|
||||||
|
self.push_weekday_meta(weekday, weeknum, weekday_key_name, weekday_mr)
|
||||||
|
|
||||||
is_empty_lesson = len(utils.parse_all_dirt(self.reader, pos, 4, 3)) == 0 # если в поле не найдено ничего..
|
is_empty_lesson = len(utils.parse_all_dirt(self.reader, pos, 4, 3)) == 0 # если в поле не найдено ничего..
|
||||||
parsed_discipline_name = None
|
parsed_discipline_name = None
|
||||||
parsed_location = None
|
parsed_location = None
|
||||||
@@ -182,7 +247,7 @@ class Parser:
|
|||||||
# если не пустой предмет то записываем его
|
# если не пустой предмет то записываем его
|
||||||
if not is_empty_lesson:
|
if not is_empty_lesson:
|
||||||
slots = group['slots']
|
slots = group['slots']
|
||||||
w = weekday + ("_1" if weeknum == 1 else "_2")
|
w = weekday_key_name
|
||||||
if w not in slots.keys():
|
if w not in slots.keys():
|
||||||
slots[w] = {}
|
slots[w] = {}
|
||||||
|
|
||||||
|
|||||||
2
utils.py
2
utils.py
@@ -53,7 +53,7 @@ def remove_from_list(l: list, todel: list):
|
|||||||
|
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def parse_all_dirt(reader: "ExcelSheetReader", min_pos, right, down):
|
def parse_all_dirt(reader: "ExcelSheetReader", min_pos: Coord, right, down):
|
||||||
RET = set()
|
RET = set()
|
||||||
|
|
||||||
row = min_pos.row
|
row = min_pos.row
|
||||||
|
|||||||
Reference in New Issue
Block a user