initial commit
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
*.xls
|
||||
*.xlsx
|
||||
__pycache__
|
||||
.idea
|
||||
81
coord.py
Normal file
81
coord.py
Normal file
@@ -0,0 +1,81 @@
|
||||
|
||||
|
||||
|
||||
import xlrd
|
||||
|
||||
|
||||
class Coord:
|
||||
def __init__(self, row, col):
|
||||
self.row = row
|
||||
self.col = col
|
||||
|
||||
def shift(self, down=0, right=0, up=0, left=0) -> "Coord":
|
||||
return Coord(self.row + down - up,
|
||||
self.col + right - left)
|
||||
|
||||
def replace(self, row=None, col=None) -> "Coord":
|
||||
return Coord(self.row if row is None else row,
|
||||
self.col if col is None else col)
|
||||
|
||||
def cell(self, sh) -> "xlrd.sheet.Cell":
|
||||
return sh.cell(self.row, self.col)
|
||||
|
||||
def __repr__(self):
|
||||
import utils
|
||||
return utils.excel_coordinate(self.row, self.col)
|
||||
|
||||
def as_numbers(self):
|
||||
return self.row, self.col
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.row, self.col))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Coord):
|
||||
# don't attempt to compare against unrelated types
|
||||
return NotImplemented
|
||||
|
||||
return self.row == other.row and self.col == other.col
|
||||
|
||||
|
||||
class Merged:
|
||||
def __init__(self, coord1, coord2):
|
||||
self.low: Coord = coord1
|
||||
self.high: Coord = coord2
|
||||
|
||||
def height(self):
|
||||
return self.high.row - self.low.row + 1
|
||||
|
||||
def width(self):
|
||||
return self.high.col - self.low.col + 1
|
||||
|
||||
def cell(self, sh) -> "xlrd.sheet.Cell":
|
||||
return sh.cell(self.low.row, self.low.col)
|
||||
|
||||
def is_pseudo_merged(self):
|
||||
"""Псевдо-мержнутая значит размеом 1x1, оно же если начало совпадает с концом"""
|
||||
return self.low == self.high
|
||||
|
||||
def as_numbers(self):
|
||||
low = self.low.as_numbers()
|
||||
high = self.high.as_numbers()
|
||||
return low[0], low[1], high[0], high[1] # rlo, clo, rhi, chi
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.low, self.high))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, Merged):
|
||||
# don't attempt to compare against unrelated types
|
||||
return NotImplemented
|
||||
|
||||
return self.low == other.low and self.high == other.high
|
||||
|
||||
def __contains__(self, key):
|
||||
if not isinstance(key, Coord):
|
||||
return NotImplemented
|
||||
|
||||
row = key.row
|
||||
col = key.col
|
||||
|
||||
return (self.low.row <= row <= self.high.row) and (self.low.col <= col <= self.high.col)
|
||||
18
main.py
Normal file
18
main.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import json
|
||||
|
||||
import xlrd
|
||||
|
||||
import parser
|
||||
import utils
|
||||
|
||||
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля
|
||||
|
||||
|
||||
book = xlrd.open_workbook("ОН_ФЭВТ_2 курс.xls", formatting_info=True)
|
||||
print("The number of worksheets is {0}".format(book.nsheets))
|
||||
print("Worksheet name(s): {0}".format(book.sheet_names()))
|
||||
sh = book.sheet_by_index(0)
|
||||
|
||||
|
||||
prs = parser.Parser(sh)
|
||||
prs.parse()
|
||||
155
parser.py
Normal file
155
parser.py
Normal file
@@ -0,0 +1,155 @@
|
||||
import json
|
||||
|
||||
import xlrd
|
||||
|
||||
from coord import Coord, Merged
|
||||
import utils
|
||||
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, sheet: "xlrd.sheet.Sheet"):
|
||||
self.sh: "xlrd.sheet.Sheet" = sheet
|
||||
self.groups = {}
|
||||
print("Parser created for '{0}': size: {1}x{2}".format(self.sh.name, self.sh.nrows, self.sh.ncols))
|
||||
|
||||
def parse(self):
|
||||
monday = utils.find(self.sh, "ПОНЕДЕЛЬНИК")
|
||||
head_rx = monday[0] - 1 # выше первого понидельника
|
||||
if head_rx < 0:
|
||||
raise Exception("head_rx < 0: Программа пыталась найти 'ПОНЕДЕЛЬНИК', но по всей видимости не нашла.")
|
||||
|
||||
head = self.sh.row(head_rx) # get all ROW (months, groups)
|
||||
print(f"head={head}")
|
||||
self.groups = parse_groups(self.sh, head, monday, head_rx) # parse groups to self.groups
|
||||
print(f'self.groups={json.dumps(self.groups, indent=2, ensure_ascii=False)}')
|
||||
|
||||
print("\n\n\n")
|
||||
|
||||
for group in self.groups.values():
|
||||
print("\nSTART OF PROCESS GROUP\n")
|
||||
self.process_group(group, monday)
|
||||
print("\nEND OF PROCESS GROUP\n")
|
||||
|
||||
def parse_potokoviy(self, merged: Merged):
|
||||
speaker = None
|
||||
location = None
|
||||
|
||||
# speaker
|
||||
low = merged.low
|
||||
speaker_pos = low.shift(down=merged.height())
|
||||
speaker = speaker_pos.cell(self.sh).value
|
||||
|
||||
# location
|
||||
location = merged.high.shift(down=1).cell(self.sh).value
|
||||
|
||||
return {"loc": location, "leader": speaker, "name": merged.cell(self.sh).value}
|
||||
|
||||
def process_group(self, group, monday):
|
||||
"""
|
||||
Обработать группы, выполняется для каждой группы, после того как они распарены (parse_groups)
|
||||
group = {'name': 'ИВТ-260', 'position': [5, 6], 'position_human': 'G6:J6'}
|
||||
"""
|
||||
print(f"process_group group={group}")
|
||||
group_name = group['name']
|
||||
print(group_name)
|
||||
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
|
||||
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
|
||||
while row < self.sh.nrows: # maybe условие чтобы не уйти ниже чем есть строк
|
||||
pos = Coord(row, group['position'][1]) # текущая позиция, верхний правый угол (=low)
|
||||
pos_right = pos.shift(right=3)
|
||||
pair_pos = pos.replace(col=5)
|
||||
weekday_pos = pos.replace(col=4)
|
||||
merged = utils.get_merged_coord(self.sh, pos)
|
||||
right_cell = pos_right.cell(self.sh)
|
||||
merged_cell = merged.cell(self.sh)
|
||||
cv = merged_cell.value
|
||||
# В конце (12 пара:>) название группы, можно использовать как якорь
|
||||
if utils.unspace(cv) == group_name:
|
||||
print("Lesson == group name; ending group loop.")
|
||||
break
|
||||
|
||||
weekday_mr = utils.get_merged_coord(self.sh, weekday_pos)
|
||||
weekday = utils.unspace(weekday_mr.cell(self.sh).value)
|
||||
pair_mr = utils.get_merged_coord(self.sh, pair_pos)
|
||||
pair = utils.unspace(pair_mr.cell(self.sh).value)
|
||||
|
||||
skip = 0
|
||||
if weekday == "":
|
||||
if weeknum == 1:
|
||||
weeknum += 1
|
||||
print("------")
|
||||
skip = 1
|
||||
row += 1
|
||||
else:
|
||||
break
|
||||
if not skip:
|
||||
next = 3 # на сколько пыгнуть для следующего шага?
|
||||
|
||||
is_empty_lesson = right_cell.ctype in utils.EMPTY_CTYPES and merged_cell.ctype in utils.EMPTY_CTYPES
|
||||
dispname = ""
|
||||
parsed_location = None
|
||||
parsed_leader = None
|
||||
parsed_uncotigorized = []
|
||||
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
|
||||
if is_empty_lesson:
|
||||
dispname = "<no lesson>"
|
||||
|
||||
if not is_empty_lesson:
|
||||
if is_wide_maybe_potokoviy:
|
||||
ret = self.parse_potokoviy(merged)
|
||||
parsed_location = ret['loc']
|
||||
parsed_leader = ret['leader']
|
||||
dispname = ret['name']
|
||||
else:
|
||||
is_solid = pos_right in merged
|
||||
is_2pair = False
|
||||
may_prepod = merged.low.shift(down=2)
|
||||
if utils.border_bottom(self.sh, may_prepod) == 0 and utils.border_top(self.sh, may_prepod.shift(down=1)) == 0:
|
||||
next = 6
|
||||
is_2pair = True
|
||||
|
||||
dispname = cv
|
||||
dispname += (" SOLD" if is_solid else " SPLIT")
|
||||
dispname += (" [ДВУПАРНЫЙ]" if is_2pair else "")
|
||||
|
||||
parsed_uncotigorized = utils.parse_all_dirt(self.sh, merged.low, 2, next-1)
|
||||
|
||||
|
||||
if parsed_leader: dispname += f" [{parsed_leader}]"
|
||||
if parsed_location: dispname += f" [{parsed_location}]"
|
||||
dispname = dispname.replace("\n", "\\n")
|
||||
print(f"[{group_name}] row={row}; {pos} {pos_right} {pair} {weekday}: {'[ПОТОКОВЫЙ] ' if is_wide_maybe_potokoviy else ''}{dispname} {parsed_uncotigorized}")
|
||||
|
||||
# INCREMENT на next и конец цикла.
|
||||
row += next
|
||||
|
||||
|
||||
def parse_groups(sh, head, monday, head_rx):
|
||||
"""Распознать список групп и метаданные к ним, по сути получить список названий группы и координат её верхнего header-а (AQ6:AT6)"""
|
||||
groups = {}
|
||||
i = 0
|
||||
while i < len(head):
|
||||
x = head[i]
|
||||
print(f"while i={i} head[i]={x}")
|
||||
merged = utils.get_merged(sh, head_rx, i)
|
||||
if i > monday[1] + 1:
|
||||
if merged is None or x.value == "":
|
||||
break
|
||||
name = utils.unspace(x.value)
|
||||
groups[name] = {
|
||||
"name": name,
|
||||
"position": [head_rx, i],
|
||||
"position_human": utils.merged_humanize(merged)
|
||||
}
|
||||
|
||||
if merged is None:
|
||||
i += 1
|
||||
else:
|
||||
i += (merged[3] - merged[1] + 1)
|
||||
|
||||
return groups
|
||||
|
||||
def get_weekday_left(sh, rowx, colx):
|
||||
m = utils.get_merged(sh, rowx, colx)
|
||||
return sh.cell(m[0], m[1]).value
|
||||
97
utils.py
Normal file
97
utils.py
Normal file
@@ -0,0 +1,97 @@
|
||||
|
||||
# gemini generated
|
||||
import xlrd
|
||||
from coord import Coord, Merged
|
||||
|
||||
EMPTY_CTYPES = [xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK]
|
||||
|
||||
def border(sh, coord):
|
||||
cell = sh.cell(coord.row, coord.col)
|
||||
xf_style: "xlrd.formatting.XF" = sh.book.xf_list[cell.xf_index]
|
||||
return xf_style.border
|
||||
|
||||
def border_right(sh, cell):
|
||||
return border(sh, cell).right_line_style
|
||||
|
||||
def border_left(sh, cell):
|
||||
return border(sh, cell).left_line_style
|
||||
|
||||
def border_bottom(sh, cell):
|
||||
return border(sh, cell).bottom_line_style
|
||||
|
||||
def border_top(sh, cell):
|
||||
return border(sh, cell).top_line_style
|
||||
|
||||
def parse_all_dirt(sh, min_pos, right, down):
|
||||
RET = set()
|
||||
|
||||
row = min_pos.row
|
||||
while row <= min_pos.row + down:
|
||||
col = min_pos.col
|
||||
while col < min_pos.col + right:
|
||||
#print(excel_coordinate(row, col))
|
||||
value = str(sh.cell(row, col).value)
|
||||
if value is not None and len(value) > 0:
|
||||
RET.add(value)
|
||||
col += 1
|
||||
row += 1
|
||||
|
||||
return RET
|
||||
|
||||
def excel_coordinate(row, col):
|
||||
"""
|
||||
Преобразует координаты строки и столбца (начиная с 0) в эквивалент Excel (например, A7, CB34).
|
||||
|
||||
Args:
|
||||
row: Индекс строки (начиная с 0).
|
||||
col: Индекс столбца (начиная с 0).
|
||||
|
||||
Returns:
|
||||
Строка, представляющая координату ячейки в стиле Excel.
|
||||
|
||||
~ Google Gemini, tested
|
||||
"""
|
||||
|
||||
col_str = ''
|
||||
while col >= 0:
|
||||
col_str = chr(ord('A') + col % 26) + col_str # Преобразуем в буквы, начиная с A
|
||||
col = col // 26 - 1 # Уменьшаем номер столбца и учитываем переход к следующему разряду (как в 26-ричной системе)
|
||||
|
||||
return col_str + str(row + 1) # Добавляем номер строки (Excel начинается с 1)
|
||||
|
||||
|
||||
def get_merged(sh, rowx, colx):
|
||||
"""Даём ей координаты ячейки таблицы а она выдаёт её границы если переданные координаты находятся 'внутри' объединённой ячейки"""
|
||||
for crange in sh.merged_cells:
|
||||
rlo, rhi, clo, chi = crange
|
||||
chi -= 1
|
||||
rhi -= 1
|
||||
if rlo <= rowx <= rhi and chi >= colx >= clo:
|
||||
return rlo, clo, rhi, chi
|
||||
|
||||
# если ячейка не часть объединённых то начала и концы у неё равны.
|
||||
return rowx, colx, rowx, colx
|
||||
|
||||
def get_merged_coord(sh, coord):
|
||||
merged = get_merged(sh, coord.row, coord.col)
|
||||
return Merged(coord1=Coord(merged[0], merged[1]), coord2=Coord(merged[2], merged[3]))
|
||||
|
||||
|
||||
def merged_humanize(crange):
|
||||
"""Получить из 4 цифр границ AA:BB координаты как в Excel"""
|
||||
row_low, col_low, row_high, col_high = crange # see order!
|
||||
return excel_coordinate(row_low, col_low) + ":" + excel_coordinate(row_high, col_high)
|
||||
|
||||
def unspace(s: str):
|
||||
"""Убрать пробелы из текста"""
|
||||
return s.strip().replace(" ", "").replace("\t", "")
|
||||
|
||||
def find(sh, query = None):
|
||||
for rx in range(sh.nrows):
|
||||
i = 0
|
||||
for x in sh.row(rx):
|
||||
if x.value == query:
|
||||
return rx, i
|
||||
i += 1
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user