Files
VSTU_Schedule_Parser/parser.py
2025-09-10 23:43:41 +03:00

155 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import xlrd
from coord import Coord, Merged
import utils
class Parser:
def __init__(self, sheet: "xlrd.sheet.Sheet"):
self.sh: "xlrd.sheet.Sheet" = sheet
self.groups = {}
print("Parser created for '{0}': size: {1}x{2}".format(self.sh.name, self.sh.nrows, self.sh.ncols))
def parse(self):
monday = utils.find(self.sh, "ПОНЕДЕЛЬНИК")
head_rx = monday[0] - 1 # выше первого понидельника
if head_rx < 0:
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
head = self.sh.row(head_rx) # get all ROW (months, groups)
print(f"head={head}")
self.groups = parse_groups(self.sh, head, monday, head_rx) # parse groups to self.groups
print(f'self.groups={json.dumps(self.groups, indent=2, ensure_ascii=False)}')
print("\n\n\n")
for group in self.groups.values():
print("\nSTART OF PROCESS GROUP\n")
self.process_group(group, monday)
print("\nEND OF PROCESS GROUP\n")
def parse_potokoviy(self, merged: Merged):
speaker = None
location = None
# speaker
low = merged.low
speaker_pos = low.shift(down=merged.height())
speaker = speaker_pos.cell(self.sh).value
# location
location = merged.high.shift(down=1).cell(self.sh).value
return {"loc": location, "leader": speaker, "name": merged.cell(self.sh).value}
def process_group(self, group, monday):
"""
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
"""
print(f"process_group group={group}")
group_name = group['name']
print(group_name)
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
while row < self.sh.nrows: # maybe условие чтобы не уйти ниже чем есть строк
pos = Coord(row, group['position'][1]) # текущая позиция, верхний правый угол (=low)
pos_right = pos.shift(right=3)
pair_pos = pos.replace(col=5)
weekday_pos = pos.replace(col=4)
merged = utils.get_merged_coord(self.sh, pos)
right_cell = pos_right.cell(self.sh)
merged_cell = merged.cell(self.sh)
cv = merged_cell.value
# В конце (12 пара:>) название группы, можно использовать как якорь
if utils.unspace(cv) == group_name:
print("Lesson == group name; ending group loop.")
break
weekday_mr = utils.get_merged_coord(self.sh, weekday_pos)
weekday = utils.unspace(weekday_mr.cell(self.sh).value)
pair_mr = utils.get_merged_coord(self.sh, pair_pos)
pair = utils.unspace(pair_mr.cell(self.sh).value)
skip = 0
if weekday == "":
if weeknum == 1:
weeknum += 1
print("------")
skip = 1
row += 1
else:
break
if not skip:
next = 3 # на сколько пыгнуть для следующего шага?
is_empty_lesson = right_cell.ctype in utils.EMPTY_CTYPES and merged_cell.ctype in utils.EMPTY_CTYPES
dispname = ""
parsed_location = None
parsed_leader = None
parsed_uncotigorized = []
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
if is_empty_lesson:
dispname = "<no lesson>"
if not is_empty_lesson:
if is_wide_maybe_potokoviy:
ret = self.parse_potokoviy(merged)
parsed_location = ret['loc']
parsed_leader = ret['leader']
dispname = ret['name']
else:
is_solid = pos_right in merged
is_2pair = False
may_prepod = merged.low.shift(down=2)
if utils.border_bottom(self.sh, may_prepod) == 0 and utils.border_top(self.sh, may_prepod.shift(down=1)) == 0:
next = 6
is_2pair = True
dispname = cv
dispname += (" SOLD" if is_solid else " SPLIT")
dispname += (" [ДВУПАРНЫЙ]" if is_2pair else "")
parsed_uncotigorized = utils.parse_all_dirt(self.sh, merged.low, 2, next-1)
if parsed_leader: dispname += f" [{parsed_leader}]"
if parsed_location: dispname += f" [{parsed_location}]"
dispname = dispname.replace("\n", "\\n")
print(f"[{group_name}] row={row}; {pos} {pos_right} {pair} {weekday}: {'[ПОТОКОВЫЙ] ' if is_wide_maybe_potokoviy else ''}{dispname} {parsed_uncotigorized}")
# INCREMENT на next и конец цикла.
row += next
def parse_groups(sh, head, monday, head_rx):
"""Распознать список групп и метаданные к ним, по сути получить список названий группы и координат её верхнего header-а (AQ6:AT6)"""
groups = {}
i = 0
while i < len(head):
x = head[i]
print(f"while i={i} head[i]={x}")
merged = utils.get_merged(sh, head_rx, i)
if i > monday[1] + 1:
if merged is None or x.value == "":
break
name = utils.unspace(x.value)
groups[name] = {
"name": name,
"position": [head_rx, i],
"position_human": utils.merged_humanize(merged)
}
if merged is None:
i += 1
else:
i += (merged[3] - merged[1] + 1)
return groups
def get_weekday_left(sh, rowx, colx):
m = utils.get_merged(sh, rowx, colx)
return sh.cell(m[0], m[1]).value