Files
VSTU_Schedule_Parser/parser.py
2025-09-11 14:16:38 +03:00

228 lines
9.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import xlrd
import aigenerated
from coord import Coord, Merged
import utils
LOGGING = True
def pprint(*args, **kwargs):
if LOGGING:
print(*args, **kwargs)
class Parser:
def __init__(self, sheet: "xlrd.sheet.Sheet"):
self.sh: "xlrd.sheet.Sheet" = sheet
self.groups = {}
self.teachers = set()
self.places = set()
pprint("Parser created for '{0}': size: {1}x{2}".format(self.sh.name, self.sh.nrows, self.sh.ncols))
def parse(self):
monday = utils.find(self.sh, "ПОНЕДЕЛЬНИК")
if monday is None:
print(" -- Failed parse! -- ")
print("ПОНЕДЕЛЬНИК НЕ НАЙДЕН!")
return
head_rx = monday[0] - 1 # выше первого понидельника
if head_rx < 0:
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
head = self.sh.row(head_rx) # get all ROW (months, groups)
pprint(f"head={head}")
self.groups = parse_groups(self.sh, head, monday, head_rx) # parse groups to self.groups
pprint(f'self.groups={json.dumps(self.groups, indent=2, ensure_ascii=False)}')
pprint("\n\n\n")
for group in self.groups.values():
pprint("\nSTART OF PROCESS GROUP\n")
self.process_group(group, monday)
pprint("\nEND OF PROCESS GROUP\n")
pprint(self.teachers)
def parse_potokoviy(self, merged: Merged):
speaker = None
location = None
# speaker
low = merged.low
speaker_pos = low.shift(down=merged.height())
speaker = speaker_pos.cell(self.sh).value
# location
location = merged.high.shift(down=1).cell(self.sh).value
return {"loc": str(location), "leader": str(speaker), "name": str(merged.cell(self.sh).value)}
def process_group(self, group, monday):
"""
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
"""
pprint(f"process_group group={group}")
group_name = group['name']
pprint(group_name)
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
while row < self.sh.nrows: # maybe условие чтобы не уйти ниже чем есть строк
pos = Coord(row, group['position'][1]) # текущая позиция, верхний правый угол (=low)
pos_right = pos.shift(right=3)
pair_pos = pos.replace(col=5)
weekday_pos = pos.replace(col=4)
merged = utils.get_merged_coord(self.sh, pos)
right_cell = pos_right.cell(self.sh)
merged_cell = merged.cell(self.sh)
cv = merged_cell.value
# В конце (12 пара:>) название группы, можно использовать как якорь
if utils.unspace(cv) == group_name:
pprint("Lesson == group name; ending group loop.")
break
weekday_mr = utils.get_merged_coord(self.sh, weekday_pos)
weekday = utils.unspace(weekday_mr.cell(self.sh).value)
pair_mr = utils.get_merged_coord(self.sh, pair_pos)
pair = utils.unspace(pair_mr.cell(self.sh).value)
skip = 0
if weekday == "":
if weeknum == 1:
weeknum += 1
pprint("------")
skip = 1
row += 1
else:
break
if not skip:
next = 3 # на сколько пыгнуть для следующего шага?
is_empty_lesson = right_cell.ctype in utils.EMPTY_CTYPES and merged_cell.ctype in utils.EMPTY_CTYPES
dispname = ""
parsed_discipline_name = None
parsed_location = None
parsed_leader = None
is_2pair = False
is_solid = pos_right in merged
parsed_uncotigorized = []
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
if is_empty_lesson:
dispname = "<no lesson>"
if not is_empty_lesson:
may_prepod = merged.low.shift(down=2)
if utils.has_no_bottom_border(self.sh, may_prepod):
next = 6
is_2pair = True
if is_wide_maybe_potokoviy:
ret = self.parse_potokoviy(merged)
parsed_location = ret['loc']
parsed_leader = ret['leader']
parsed_discipline_name = ret['name']
parsed_uncotigorized = list(utils.parse_all_dirt(self.sh, merged.low, merged.width(), next))
else:
if (is_solid):
parsed_discipline_name = cv
dispname = cv
dispname += (" SOLD" if is_solid else " SPLIT")
dispname += (" [ДВУПАРНЫЙ]" if is_2pair else "")
parsed_uncotigorized = list(utils.parse_all_dirt(self.sh, merged.low, 4, next))
if parsed_leader: dispname += f" [{parsed_leader}]"
if parsed_location: dispname += f" [{parsed_location}]"
dispname = dispname.replace("\n", "\\n")
pprint(f"[{group_name}] row={row}; {pos} {pos_right} {pair} {weekday}: {'[ПОТОКОВЫЙ] ' if is_wide_maybe_potokoviy else ''}{dispname} {parsed_uncotigorized}")
# пытаемся из некотегорезированных данных выцепить место и лидера (препода)
prepods = set()
if parsed_leader is not None: prepods.add(aigenerated.extract_last_name(parsed_leader))
locations = set()
if parsed_location is not None: locations.add(parsed_location.replace(" ", "").replace("-", ""))
for x in list(parsed_uncotigorized):
if aigenerated.is_surname_string(x):
prepods.add(aigenerated.extract_last_name(x))
if aigenerated.is_room_number(x):
locations.add(x.replace(" ", "").replace("-", "") if x is not None else None)
# оставшееся в дисциплину (костыль)
if parsed_discipline_name is None:
parsed_discipline_name = " ".join(parsed_uncotigorized)
prepods.discard(None)
prepods.discard("")
locations.discard(None)
locations.discard("")
if not is_empty_lesson:
slots = group['slots']
w = weekday + ("_1" if weeknum == 1 else "_2")
if w not in slots.keys():
slots[w] = {}
today = slots[w]
today[pair] = {
"pos": str(pos),
"discipline": parsed_discipline_name,
"locations": list(locations),
"leads": list(prepods),
"is_solid": is_solid,
"is_2pair": is_2pair,
"is_flow": is_wide_maybe_potokoviy,
"raw": parsed_uncotigorized,
"weeday": utils.weekday_to_num(weekday),
"weeknum": weeknum
}
self.teachers.add(aigenerated.extract_last_name(parsed_leader))
# INCREMENT на next и конец цикла.
row += next
def parse_groups(sh, head, monday, head_rx):
"""Распознать список групп и метаданные к ним, по сути получить список названий группы и координат её верхнего header-а (AQ6:AT6)"""
groups = {}
i = 0
while i < len(head):
x = head[i]
pprint(f"while i={i} head[i]={x}")
merged = utils.get_merged_coord(sh, Coord(head_rx, i))
if i > monday[1] + 1:
if merged is None or x.value == "":
break
if merged.width() != 4:
pprint(f"WARNING: group header witdh !=4 (found: {merged.width()}); blocks !=4 not supported by parser.")
break
name = utils.unspace(x.value)
groups[name] = {
"name": name,
"position": [head_rx, i],
"position_human": utils.merged_humanize(merged.as_numbers()),
"slots": {}
}
if merged is None:
i += 1
else:
i += merged.width()
return groups