Files
VSTU_Schedule_Parser/parser.py

243 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Copyright Stanislav Mironov
PAIR_NUMS = [
"1-2", "3-4", "5-6", "7-8", "9-10", "11-12", "13-14", "15-16"
]
import json
import uuid
import aigenerated
from coord import Coord, Merged
from translations import ExcelSheetReader
import utils
LOGGING = True
def pprint(*args, **kwargs):
if LOGGING:
print(*args, **kwargs)
class Parser:
def __init__(self, reader: ExcelSheetReader):
self.reader = reader
self.groups = {}
self.teachers = set()
self.places = set()
self.parser_error = None
pprint("Parser created for '{0}'".format(reader.info()))
def parse(self):
monday = self.reader.find("ПОНЕДЕЛЬНИК")
if monday is None:
print(" -- Failed parse! -- ")
print("ПОНЕДЕЛЬНИК НЕ НАЙДЕН!")
self.parser_error = "'ПОНЕДЕЛЬНИК' не найден в таблице."
return
head_rx = monday.row - 1 # выше первого понидельника
if head_rx < 0:
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
head = self.reader.get_row_values(head_rx) # get all ROW (months, groups)
pprint(f"head={head}")
self.groups = parse_groups(self.reader, head, monday, head_rx) # parse groups to self.groups
pprint(f'self.groups={json.dumps(self.groups, indent=2, ensure_ascii=False)}')
pprint("\n\n\n")
for group in self.groups.values():
pprint("\nSTART OF PROCESS GROUP\n")
self.process_group(group, monday)
pprint("\nEND OF PROCESS GROUP\n")
pprint(self.teachers)
def parse_potokoviy(self, merged: Merged):
speaker = None
location = None
# speaker
low = merged.low
speaker_pos = low.shift(down=merged.height())
speaker = speaker_pos.cell(self.reader).value
# location
location = merged.high.shift(down=1).cell(self.reader).value
return {"loc": str(location).strip(), "leader": str(speaker).strip(), "name": str(merged.cell(self.reader).value).strip()}
def process_group(self, group, monday):
"""
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
"""
pprint(f"process_group group={group}")
group_name = group['name']
pprint(group_name)
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
previous_pair = None
while row < self.reader.get_row_count(): # maybe условие чтобы не уйти ниже чем есть строк
pos = Coord(row, group['position'][1]) # текущая позиция, верхний левый угол (=low)
pos_right = pos.shift(right=3)
pair_pos = pos.replace(col=5)
weekday_pos = pos.replace(col=4)
merged = self.reader.get_merged_coord(pos)
merged_cell = merged.cell(self.reader)
cv = merged_cell.value
# В конце (12 пара:>) название группы, можно использовать как якорь
if utils.unspace(cv) == group_name:
pprint("Lesson == group name; ending group loop.")
break
weekday_mr = self.reader.get_merged_coord(weekday_pos)
weekday = utils.unspace(weekday_mr.cell(self.reader).value)
pair_mr = self.reader.get_merged_coord(pair_pos)
pair = utils.unspace(pair_mr.cell(self.reader).value)
fuck_empty_pair_in_excel = pair == ""
previous_dump = previous_pair
if fuck_empty_pair_in_excel:
if previous_pair is None or previous_pair == "":
pair = f"EMPTY_IN_EXCEL_{uuid.uuid4()}"
else:
pair = utils.next_element(PAIR_NUMS, previous_pair)
if pair != "":
previous_pair = pair
skip = 0
if weekday == "":
if weeknum == 1:
weeknum += 1
pprint("------")
skip = 1
row += 1
else:
break
if not skip:
next = 3 # на сколько пыгнуть для следующего шага?
is_empty_lesson = len(utils.parse_all_dirt(self.reader, pos, 4, 3)) == 0 # если в поле не найдено ничего..
parsed_discipline_name = None
parsed_location = None
parsed_leader = None
pairs = 1
is_solid = pos_right in merged
parsed_uncotigorized = []
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
if not is_empty_lesson:
cur = merged.low.shift(down=2)
while utils.has_no_bottom_border(self.reader, cur):
next += 3
pairs += 1
cur = cur.shift(down=3)
if is_wide_maybe_potokoviy:
ret = self.parse_potokoviy(merged)
parsed_location = ret['loc']
parsed_leader = ret['leader']
parsed_discipline_name = ret['name']
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, merged.width(), next))
else:
if (is_solid):
parsed_discipline_name = cv
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, 4, next))
# пытаемся из некотегорезированных данных выцепить место и лидера (препода)
prepods = set()
if parsed_leader is not None: prepods.add(parsed_leader.strip())
locations = set()
if parsed_location is not None: locations.add(parsed_location.strip().replace(" ", ""))
for x in list(parsed_uncotigorized):
if aigenerated.is_surname_string(x):
prepods.add(x.strip())
if aigenerated.is_room_number(x):
locations.add(x.strip().replace(" ", "") if x is not None else None)
# попытка починить пустую дисциплину
if parsed_discipline_name is None:
l = utils.remove_from_list(list(parsed_uncotigorized), [parsed_leader, parsed_location])
parsed_discipline_name = " ".join(l)
# чистим сеты от мусора
utils.discards_list(prepods, nones=True, emptystrings=True)
utils.discards_list(locations, nones=True, emptystrings=True)
utils.discards_list(parsed_uncotigorized, nones=True, emptystrings=True)
# если не пустой предмет то записываем его
if not is_empty_lesson:
slots = group['slots']
w = weekday + ("_1" if weeknum == 1 else "_2")
if w not in slots.keys():
slots[w] = {}
today = slots[w]
today[pair] = {
"excel_pos": str(pos),
"discipline_name": parsed_discipline_name.strip(),
"locations": list(locations),
"leads": list(prepods),
"is_solid": is_solid,
"time_coeff": pairs,
"is_flow": is_wide_maybe_potokoviy,
"lefttopmerged": {
"width": merged.width(),
"height": merged.height(),
"excel_range": utils.merged_humanize(merged.as_numbers())
},
"raw": parsed_uncotigorized,
"weekday": utils.weekday_to_num(weekday),
"weeknum": weeknum
}
if fuck_empty_pair_in_excel:
today[pair]['pair_num_empty'] = {
"prev": previous_dump,
"restoted": pair != "",
"pair": pair
}
# INCREMENT на next и конец цикла.
row += next
def parse_groups(reader: "ExcelSheetReader", head, monday: Coord, head_rx):
"""Распознать список групп и метаданные к ним, по сути получить список названий группы и координат её верхнего header-а (AQ6:AT6)"""
groups = {}
i = 0
while i < len(head):
x = head[i]
pprint(f"while i={i} head[i]={x}")
merged = reader.get_merged_coord(Coord(head_rx, i))
if i > monday.col + 1:
if merged is None or x == "":
break
if merged.width() != 4:
pprint(f"WARNING: group header witdh !=4 (found: {merged.width()}); blocks !=4 not supported by parser.")
break
name = utils.unspace(x)
groups[name] = {
"name": name,
"position": [head_rx, i],
"position_human": utils.merged_humanize(merged.as_numbers()),
"slots": {}
}
if merged is None:
i += 1
else:
i += merged.width()
return groups