253 lines
11 KiB
Python
253 lines
11 KiB
Python
# Copyright Stanislav Mironov
|
||
|
||
PAIR_NUMS = [
|
||
"1-2", "3-4", "5-6", "7-8", "9-10", "11-12", "13-14", "15-16"
|
||
]
|
||
|
||
import json
|
||
import uuid
|
||
import aigenerated
|
||
from coord import Coord, Merged
|
||
from translations import ExcelSheetReader
|
||
import utils
|
||
|
||
LOGGING = True
|
||
|
||
def pprint(*args, **kwargs):
|
||
if LOGGING:
|
||
print(*args, **kwargs)
|
||
|
||
class Parser:
|
||
def __init__(self, reader: ExcelSheetReader):
|
||
self.reader = reader
|
||
self.groups = {}
|
||
self.teachers = set()
|
||
self.places = set()
|
||
self.parser_error = None
|
||
pprint("Parser created for '{0}'".format(reader.info()))
|
||
|
||
def parse(self):
|
||
monday = self.reader.find("ПОНЕДЕЛЬНИК")
|
||
if monday is None:
|
||
print(" -- Failed parse! -- ")
|
||
print("ПОНЕДЕЛЬНИК НЕ НАЙДЕН!")
|
||
self.parser_error = "'ПОНЕДЕЛЬНИК' не найден в таблице."
|
||
return
|
||
|
||
head_rx = monday.row - 1 # выше первого понидельника
|
||
if head_rx < 0:
|
||
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
|
||
|
||
head = self.reader.get_row_values(head_rx) # get all ROW (months, groups)
|
||
pprint(f"head={head}")
|
||
self.groups = parse_groups(self.reader, head, monday, head_rx) # parse groups to self.groups
|
||
pprint(f'self.groups={json.dumps(self.groups, indent=2, ensure_ascii=False)}')
|
||
|
||
pprint("\n\n\n")
|
||
|
||
for group in self.groups.values():
|
||
pprint("\nSTART OF PROCESS GROUP\n")
|
||
self.process_group(group, monday)
|
||
pprint("\nEND OF PROCESS GROUP\n")
|
||
|
||
pprint(self.teachers)
|
||
|
||
def parse_potokoviy(self, merged: Merged):
|
||
speaker = None
|
||
location = None
|
||
|
||
# speaker
|
||
low = merged.low
|
||
speaker_pos = low.shift(down=merged.height())
|
||
speaker = speaker_pos.cell(self.reader).value
|
||
|
||
# location
|
||
location = merged.high.shift(down=1).cell(self.reader).value
|
||
|
||
return {"loc": str(location).strip(), "leader": str(speaker).strip(), "name": str(merged.cell(self.reader).value).strip()}
|
||
|
||
def process_group(self, group, monday):
|
||
"""
|
||
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
|
||
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
|
||
"""
|
||
pprint(f"process_group group={group}")
|
||
group_name = group['name']
|
||
pprint(group_name)
|
||
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
|
||
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
|
||
previous_pair = None
|
||
while row < self.reader.get_row_count(): # maybe условие чтобы не уйти ниже чем есть строк
|
||
pos = Coord(row, group['position'][1]) # текущая позиция, верхний левый угол (=low)
|
||
pprint(f"while pos={pos}")
|
||
pos_right = pos.shift(right=3)
|
||
pair_pos = pos.replace(col=5)
|
||
weekday_pos = pos.replace(col=4)
|
||
merged = self.reader.get_merged_coord(pos)
|
||
merged_cell = merged.cell(self.reader)
|
||
cv = merged_cell.value
|
||
# В конце (12 пара:>) название группы, можно использовать как якорь
|
||
if utils.unspace(cv) == group_name:
|
||
pprint("Lesson == group name; ending group loop.")
|
||
break
|
||
|
||
weekday_mr = self.reader.get_merged_coord(weekday_pos)
|
||
weekday = utils.unspace(weekday_mr.cell(self.reader).value)
|
||
pair_mr = self.reader.get_merged_coord(pair_pos)
|
||
pair = utils.unspace(pair_mr.cell(self.reader).value)
|
||
|
||
skip = 0
|
||
if weekday == "":
|
||
if weeknum == 1:
|
||
weeknum += 1
|
||
pprint("------")
|
||
skip = 1
|
||
row += 1
|
||
else:
|
||
break
|
||
|
||
if not skip:
|
||
next = 3 # на сколько пыгнуть для следующего шага?
|
||
|
||
is_empty_lesson = len(utils.parse_all_dirt(self.reader, pos, 4, 3)) == 0 # если в поле не найдено ничего..
|
||
parsed_discipline_name = None
|
||
parsed_location = None
|
||
parsed_leader = None
|
||
pairs = 1
|
||
wtf_tomanypairs = False
|
||
is_solid = pos_right in merged
|
||
parsed_uncotigorized = []
|
||
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
|
||
|
||
if not is_empty_lesson:
|
||
cur = pos.shift(down=2)
|
||
while utils.has_no_bottom_border(self.reader, cur):
|
||
next += 3
|
||
pairs += 1
|
||
pprint(f"next = {next} cur={cur}")
|
||
if pairs >= 7:
|
||
wtf_tomanypairs = True
|
||
break
|
||
cur = cur.shift(down=3)
|
||
|
||
if is_wide_maybe_potokoviy:
|
||
ret = self.parse_potokoviy(merged)
|
||
parsed_location = ret['loc']
|
||
parsed_leader = ret['leader']
|
||
parsed_discipline_name = ret['name']
|
||
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, merged.width(), next))
|
||
|
||
else:
|
||
if (is_solid):
|
||
parsed_discipline_name = cv
|
||
|
||
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, 4, next))
|
||
|
||
# попытка исправить пару (1-2) если пустая.
|
||
fuck_empty_pair_in_excel = pair == ""
|
||
previous_dump = previous_pair
|
||
if fuck_empty_pair_in_excel:
|
||
if previous_pair is None or previous_pair == "":
|
||
pair = f"EMPTY_IN_EXCEL_{uuid.uuid4()}"
|
||
else:
|
||
pair = utils.next_element(PAIR_NUMS, previous_pair)
|
||
|
||
if pair != "":
|
||
previous_pair = pair if next == 3 else None # костыль чтобы избежать гипотетически не верной даты.
|
||
|
||
# пытаемся из некотегорезированных данных выцепить место и лидера (препода)
|
||
prepods = set()
|
||
if parsed_leader is not None: prepods.add(parsed_leader.strip())
|
||
|
||
locations = set()
|
||
if parsed_location is not None: locations.add(parsed_location.strip().replace(" ", ""))
|
||
|
||
for x in list(parsed_uncotigorized):
|
||
if aigenerated.is_surname_string(x):
|
||
prepods.add(x.strip())
|
||
|
||
if aigenerated.is_room_number(x):
|
||
locations.add(x.strip().replace(" ", "") if x is not None else None)
|
||
|
||
# попытка починить пустую дисциплину
|
||
if parsed_discipline_name is None:
|
||
l = utils.remove_from_list(list(parsed_uncotigorized), [parsed_leader, parsed_location])
|
||
parsed_discipline_name = " ".join(l)
|
||
|
||
# чистим сеты от мусора
|
||
utils.discards_list(prepods, nones=True, emptystrings=True)
|
||
utils.discards_list(locations, nones=True, emptystrings=True)
|
||
utils.discards_list(parsed_uncotigorized, nones=True, emptystrings=True)
|
||
|
||
# если не пустой предмет то записываем его
|
||
if not is_empty_lesson:
|
||
slots = group['slots']
|
||
w = weekday + ("_1" if weeknum == 1 else "_2")
|
||
if w not in slots.keys():
|
||
slots[w] = {}
|
||
|
||
today = slots[w]
|
||
today[pair] = {
|
||
"excel_pos": str(pos),
|
||
"discipline_name": parsed_discipline_name.strip(),
|
||
"locations": list(locations),
|
||
"leads": list(prepods),
|
||
"is_solid": is_solid,
|
||
"time_coeff": pairs,
|
||
"is_flow": is_wide_maybe_potokoviy,
|
||
"lefttopmerged": {
|
||
"width": merged.width(),
|
||
"height": merged.height(),
|
||
"excel_range": utils.merged_humanize(merged.as_numbers())
|
||
},
|
||
"raw": parsed_uncotigorized,
|
||
"weekday": utils.weekday_to_num(weekday),
|
||
"weeknum": weeknum
|
||
}
|
||
if fuck_empty_pair_in_excel:
|
||
today[pair]['pair_num_empty'] = {
|
||
"prev": previous_dump,
|
||
"restored": pair != "",
|
||
"pair": pair
|
||
}
|
||
if wtf_tomanypairs:
|
||
today[pair]['to_many_parsing_time_coeff'] = True
|
||
|
||
|
||
# INCREMENT на next и конец цикла.
|
||
row += next
|
||
|
||
|
||
|
||
def parse_groups(reader: "ExcelSheetReader", head, monday: Coord, head_rx):
|
||
"""Распознать список групп и метаданные к ним, по сути получить список названий группы и координат её верхнего header-а (AQ6:AT6)"""
|
||
groups = {}
|
||
i = 0
|
||
while i < len(head):
|
||
x = head[i]
|
||
pprint(f"while i={i} head[i]={x}")
|
||
merged = reader.get_merged_coord(Coord(head_rx, i))
|
||
if i > monday.col + 1:
|
||
if merged is None or x == "":
|
||
break
|
||
|
||
if merged.width() != 4:
|
||
pprint(f"WARNING: group header witdh !=4 (found: {merged.width()}); blocks !=4 not supported by parser.")
|
||
break
|
||
|
||
name = utils.unspace(x)
|
||
groups[name] = {
|
||
"name": name,
|
||
"position": [head_rx, i],
|
||
"position_human": utils.merged_humanize(merged.as_numbers()),
|
||
"slots": {}
|
||
}
|
||
|
||
if merged is None:
|
||
i += 1
|
||
else:
|
||
i += merged.width()
|
||
|
||
return groups
|
||
|