fixes 3 pairs подряд, etc

This commit is contained in:
2025-09-12 20:07:04 +03:00
parent 6920d24a98
commit ed65e5b483
8 changed files with 239 additions and 78 deletions

View File

@@ -1,3 +1,6 @@
# Copyright GEMINI
import re import re
# --- Ресурсы для алгоритма --- # --- Ресурсы для алгоритма ---

View File

@@ -1,3 +1,4 @@
# Copyright Stanislav Mironov
class Coord: class Coord:
@@ -13,6 +14,10 @@ class Coord:
return Coord(self.row if row is None else row, return Coord(self.row if row is None else row,
self.col if col is None else col) self.col if col is None else col)
def copy(self) -> "Coord":
return Coord(self.row,
self.col)
def cell(self, reader: "ExcelSheetReader") -> "TranschendentnostCell": def cell(self, reader: "ExcelSheetReader") -> "TranschendentnostCell":
return reader.cell(self.row, self.col) return reader.cell(self.row, self.col)

Binary file not shown.

View File

@@ -1,5 +1,7 @@
# Copyright Stanislav Mironov
import re import re
import time
from urllib.parse import urljoin from urllib.parse import urljoin
import requests import requests
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
@@ -8,7 +10,7 @@ from bs4 import BeautifulSoup
BASE_URL = "https://www.vstu.ru/" BASE_URL = "https://www.vstu.ru/"
RASP_PREFIX = "https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php?dep=" RASP_PREFIX = "https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php?dep="
# Парсит ссылки на эксель .xls & .xlsx файлы и выдаёт их
def parse_links(facultets): def parse_links(facultets):
session = requests.Session() session = requests.Session()
session.headers = CaseInsensitiveDict( session.headers = CaseInsensitiveDict(
@@ -18,17 +20,17 @@ def parse_links(facultets):
"Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding": "gzip, deflate", "Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive", "Connection": "keep-alive",
"Referer": "http://dump.vstu.ru/",
"Upgrade-Insecure-Requests": "1", "Upgrade-Insecure-Requests": "1",
"Priority": "u=0, i", "Priority": "u=0, i",
"Pragma": "no-cache", "Pragma": "no-cache",
"Cache-Control": "no-cach", "Cache-Control": "no-cach"
} }
) )
EXCEL_LINKS = {} EXCEL_LINKS = {}
for facultet in facultets: for facultet in facultets:
url = RASP_PREFIX + facultet url = RASP_PREFIX + facultet
print("getting...")
r = session.get(url) r = session.get(url)
print(f"GET {url}") print(f"GET {url}")
soup = BeautifulSoup(r.text, 'html.parser') soup = BeautifulSoup(r.text, 'html.parser')
@@ -51,3 +53,4 @@ def parse_links(facultets):
print(f"+url {excel_url}") print(f"+url {excel_url}")
return EXCEL_LINKS return EXCEL_LINKS

138
main.py
View File

@@ -1,62 +1,127 @@
# Copyright Stanislav Mironov
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля
import json import json
import re import os
import time import time
import traceback import traceback
from urllib.parse import urljoin import uuid
import pandas as pd
import xlwt
import xlrd
import requests
from bs4 import BeautifulSoup
import aigenerated import aigenerated
import parser import parser
import translations import translations
import utils import utils
import json import json
import links_parser import links_parser
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля import shutil
def currt():
return round(time.time())
FACULTETS = [ FACULTETS = [
"asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik" "asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik"
] ]
DIRNAME = "excels"
DEBUG_ONE_FAC = None #'fevt' DEBUG_ONE_FAC = None #'htf'
result_groups = {}
result = {
"version": 1,
"notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: Данные, доступ к API и т.д. предоставляется КАК-ЕСТЬ (AS-IS) без каких либо, явно или не явно подразумеваемых гарантий.\n\nПарсер написал: Миронов Станислав",
"actual_at": round(time.time()),
"documentation": "TODO",
"daypicture": "QwQ",
"university": "VSTU",
"university_site": "https://www.vstu.ru/",
"stat": {
"total_parsing_time": -1,
},
"api_notices": {
"updated_at": 1757688552,
"text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;",
"warning": False,
"tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'"
},
"doubled_groups": [],
"debug": {
"bleu~~": 1
},
"excels": [],
"facultets": FACULTETS,
"emptykey1": "",
"emptykey2": "",
"groups": result_groups,
"emptykey3": "",
"emptykey4": "",
"see_header_at_top_of_this_file": "SEE TOP OF THIS FILE | ОБРАТИТЕ ВНИМАНИЕ НА ВЕРХ ЭТОГО ФАЙЛА"
}
def process_excel_file(facultet, excel_url, counter, timeid): def process_excel_file(facultet, excel_url, counter, timeid):
is_xlsx = excel_url.endswith(".xlsx") is_xlsx = excel_url.endswith(".xlsx")
filename = f"{DIRNAME}/" + timeid + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "")
excel_info = {
"filename": excel_url.split("/")[-1],
"url": excel_url,
"download_place": filename,
"stat": {
"download": -1,
"create_reader": -1,
"parse": -1,
"cycles": 0
},
"group_names_parsed": [],
"facultet": facultet,
"counter": counter
}
parser.LOGGING = False
try: try:
filename = "excels/" + timeid + "_" + facultet + f"_[C{counter}]" + ".xls" + ("x" if is_xlsx else "") t = utils.StepTimeCounter()
aigenerated.download_file_from_url(excel_url, filename) aigenerated.download_file_from_url(excel_url, filename)
excel_info["stat"]['download'] = t.step()
reader = translations.create_reader(filename) reader = translations.create_reader(filename)
print("Reader info") print("Reader info")
print(reader.info()) print(reader.info())
excel_info["stat"]['create_reader'] = t.step()
while True: while True:
print(f"Parsing sheet №{reader.get_sheet_index()+1}") excel_info['stat']['cycles'] += 1
parser.LOGGING = False print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)")
prs = parser.Parser(reader) prs = parser.Parser(reader)
prs.parse() prs.parse()
if prs.parser_error is not None:
excel_info["parser_error_cycle_" + excel_info['stat']['cycles']] = prs.parser_error
for group_name in prs.groups.keys(): for group_name in prs.groups.keys():
if group_name in result.keys(): if group_name in result_groups.keys():
print(f" -- WTF -- Doubled groups -- name: {group_name}") print(f" -- WTF -- Doubled groups -- name: {group_name}")
if 'warning_doubled_groups_skip' not in excel_info.keys():
excel_info['warning_doubled_groups_skip'] = []
excel_info['warning_doubled_groups_skip'].append(group_name)
result['doubled_groups'].append(group_name)
continue continue
gr = result[group_name] = prs.groups[group_name] gr = result_groups[group_name] = prs.groups[group_name]
gr['facultet'] = facultet gr['facultet'] = facultet
gr['data_source'] = excel_url.split("/")[-1] gr['data_source'] = excel_url.split("/")[-1]
gr['parser_debug'] = { gr['debug'] = {
"C_COUNTER": counter, "counter": counter,
"timeid": timeid, "timeid": timeid,
"excel_url": excel_url, "excel_url": excel_url,
"reader_info": reader.info(), "reader_info": reader.info(),
"reader_sheet_index": reader.get_sheet_index(), "reader_sheet_index": reader.get_sheet_index(),
"filename": filename "filename": filename
} }
excel_info["group_names_parsed"].append(group_name)
print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys())) print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys()))
@@ -67,22 +132,40 @@ def process_excel_file(facultet, excel_url, counter, timeid):
reader.next_sheet() reader.next_sheet()
print("Next sheet!") print("Next sheet!")
excel_info["stat"]['parse'] = t.step()
except Exception as e: except Exception as e:
print(f"Error while {excel_url}") print(f"Error while {excel_url}")
print(e) print(e)
traceback.print_exc() traceback.print_exc()
u = uuid.uuid4()
excel_info['error'] = {
"smile": ":(",
"error_message": str(e),
"log_anchor": str(u),
"time": currt()
}
print(f"Log Anchor: {u}")
faileds.append({ faileds.append({
"ex": e, "ex": e,
"fac": facultet, "fac": facultet,
"url": excel_url "url": excel_url
}) })
result['excels'].append(excel_info)
result = {}
faileds = [] faileds = []
def main(): def main():
EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC]) t = utils.StepTimeCounter()
try:
os.mkdir(DIRNAME)
print(f"Directory '{DIRNAME}' created successfully.")
except Exception:
print(f"Directory '{DIRNAME}' already exists.")
print("main(); parse links starting...")
EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC])
counter = 0 counter = 0
timeid = str(round(time.time())) timeid = str(round(time.time()))
for facultet in EXCEL_LINKS.keys(): for facultet in EXCEL_LINKS.keys():
@@ -99,13 +182,24 @@ def main():
print("Excel file processing done!") print("Excel file processing done!")
print("Saving result.json") print("Saving result.json")
result['stat']['total_parsing_time'] = t.step()
json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False) json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False)
print("Saved to result.json") print("Saved to result.json")
print("Faileds:") print("Faileds:")
print(faileds) print(faileds)
# Delete a non-empty directory and its contents
try:
shutil.rmtree(DIRNAME)
print(f"Directory '{DIRNAME}' and its contents deleted successfully.")
except Exception as e:
print(f"Error deleting directory '{DIRNAME}': {e}")
if __name__ == "__main__": if __name__ == "__main__":
print("Start")
main() main()
print("Bye!") print("Bye!")

100
parser.py
View File

@@ -1,7 +1,11 @@
# Copyright Stanislav Mironov
PAIR_NUMS = [
"1-2", "3-4", "5-6", "7-8", "9-10", "11-12", "13-14", "15-16"
]
import json import json
import uuid
import xlrd
import aigenerated import aigenerated
from coord import Coord, Merged from coord import Coord, Merged
from translations import ExcelSheetReader from translations import ExcelSheetReader
@@ -13,13 +17,13 @@ def pprint(*args, **kwargs):
if LOGGING: if LOGGING:
print(*args, **kwargs) print(*args, **kwargs)
class Parser: class Parser:
def __init__(self, reader: ExcelSheetReader): def __init__(self, reader: ExcelSheetReader):
self.reader = reader self.reader = reader
self.groups = {} self.groups = {}
self.teachers = set() self.teachers = set()
self.places = set() self.places = set()
self.parser_error = None
pprint("Parser created for '{0}'".format(reader.info())) pprint("Parser created for '{0}'".format(reader.info()))
def parse(self): def parse(self):
@@ -27,6 +31,7 @@ class Parser:
if monday is None: if monday is None:
print(" -- Failed parse! -- ") print(" -- Failed parse! -- ")
print("ПОНЕДЕЛЬНИК НЕ НАЙДЕН!") print("ПОНЕДЕЛЬНИК НЕ НАЙДЕН!")
self.parser_error = "'ПОНЕДЕЛЬНИК' не найден в таблице."
return return
head_rx = monday.row - 1 # выше первого понидельника head_rx = monday.row - 1 # выше первого понидельника
@@ -59,7 +64,7 @@ class Parser:
# location # location
location = merged.high.shift(down=1).cell(self.reader).value location = merged.high.shift(down=1).cell(self.reader).value
return {"loc": str(location), "leader": str(speaker), "name": str(merged.cell(self.reader).value)} return {"loc": str(location).strip(), "leader": str(speaker).strip(), "name": str(merged.cell(self.reader).value).strip()}
def process_group(self, group, monday): def process_group(self, group, monday):
""" """
@@ -71,13 +76,13 @@ class Parser:
pprint(group_name) pprint(group_name)
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля) row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии. weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
previous_pair = None
while row < self.reader.get_row_count(): # maybe условие чтобы не уйти ниже чем есть строк while row < self.reader.get_row_count(): # maybe условие чтобы не уйти ниже чем есть строк
pos = Coord(row, group['position'][1]) # текущая позиция, верхний левый угол (=low) pos = Coord(row, group['position'][1]) # текущая позиция, верхний левый угол (=low)
pos_right = pos.shift(right=3) pos_right = pos.shift(right=3)
pair_pos = pos.replace(col=5) pair_pos = pos.replace(col=5)
weekday_pos = pos.replace(col=4) weekday_pos = pos.replace(col=4)
merged = self.reader.get_merged_coord(pos) merged = self.reader.get_merged_coord(pos)
right_cell = pos_right.cell(self.reader)
merged_cell = merged.cell(self.reader) merged_cell = merged.cell(self.reader)
cv = merged_cell.value cv = merged_cell.value
# В конце (12 пара:>) название группы, можно использовать как якорь # В конце (12 пара:>) название группы, можно использовать как якорь
@@ -89,6 +94,16 @@ class Parser:
weekday = utils.unspace(weekday_mr.cell(self.reader).value) weekday = utils.unspace(weekday_mr.cell(self.reader).value)
pair_mr = self.reader.get_merged_coord(pair_pos) pair_mr = self.reader.get_merged_coord(pair_pos)
pair = utils.unspace(pair_mr.cell(self.reader).value) pair = utils.unspace(pair_mr.cell(self.reader).value)
fuck_empty_pair_in_excel = pair == ""
previous_dump = previous_pair
if fuck_empty_pair_in_excel:
if previous_pair is None or previous_pair == "":
pair = f"EMPTY_IN_EXCEL_{uuid.uuid4()}"
else:
pair = utils.next_element(PAIR_NUMS, previous_pair)
if pair != "":
previous_pair = pair
skip = 0 skip = 0
if weekday == "": if weekday == "":
@@ -99,26 +114,25 @@ class Parser:
row += 1 row += 1
else: else:
break break
if not skip: if not skip:
next = 3 # на сколько пыгнуть для следующего шага? next = 3 # на сколько пыгнуть для следующего шага?
is_empty_lesson = right_cell.is_empty() and merged_cell.is_empty() is_empty_lesson = len(utils.parse_all_dirt(self.reader, pos, 4, 3)) == 0 # если в поле не найдено ничего..
dispname = ""
parsed_discipline_name = None parsed_discipline_name = None
parsed_location = None parsed_location = None
parsed_leader = None parsed_leader = None
is_2pair = False pairs = 1
is_solid = pos_right in merged is_solid = pos_right in merged
parsed_uncotigorized = [] parsed_uncotigorized = []
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.) is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
if is_empty_lesson:
dispname = "<no lesson>"
if not is_empty_lesson: if not is_empty_lesson:
may_prepod = merged.low.shift(down=2) cur = merged.low.shift(down=2)
if utils.has_no_bottom_border(self.reader, may_prepod): while utils.has_no_bottom_border(self.reader, cur):
next = 6 next += 3
is_2pair = True pairs += 1
cur = cur.shift(down=3)
if is_wide_maybe_potokoviy: if is_wide_maybe_potokoviy:
ret = self.parse_potokoviy(merged) ret = self.parse_potokoviy(merged)
@@ -127,45 +141,37 @@ class Parser:
parsed_discipline_name = ret['name'] parsed_discipline_name = ret['name']
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, merged.width(), next)) parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, merged.width(), next))
else: else:
if (is_solid): if (is_solid):
parsed_discipline_name = cv parsed_discipline_name = cv
dispname = cv
dispname += (" SOLD" if is_solid else " SPLIT")
dispname += (" [ДВУПАРНЫЙ]" if is_2pair else "")
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, 4, next)) parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, 4, next))
if parsed_leader: dispname += f" [{parsed_leader}]"
if parsed_location: dispname += f" [{parsed_location}]"
dispname = dispname.replace("\n", "\\n")
pprint(f"[{group_name}] row={row}; {pos} {pos_right} {pair} {weekday}: {'[ПОТОКОВЫЙ] ' if is_wide_maybe_potokoviy else ''}{dispname} {parsed_uncotigorized}")
# пытаемся из некотегорезированных данных выцепить место и лидера (препода) # пытаемся из некотегорезированных данных выцепить место и лидера (препода)
prepods = set() prepods = set()
if parsed_leader is not None: prepods.add(aigenerated.extract_last_name(parsed_leader)) if parsed_leader is not None: prepods.add(parsed_leader.strip())
locations = set() locations = set()
if parsed_location is not None: locations.add(parsed_location.replace(" ", "").replace("-", "")) if parsed_location is not None: locations.add(parsed_location.strip().replace(" ", ""))
for x in list(parsed_uncotigorized): for x in list(parsed_uncotigorized):
if aigenerated.is_surname_string(x): if aigenerated.is_surname_string(x):
prepods.add(aigenerated.extract_last_name(x)) prepods.add(x.strip())
if aigenerated.is_room_number(x): if aigenerated.is_room_number(x):
locations.add(x.replace(" ", "").replace("-", "") if x is not None else None) locations.add(x.strip().replace(" ", "") if x is not None else None)
# оставшееся в дисциплину (костыль) # попытка починить пустую дисциплину
if parsed_discipline_name is None: if parsed_discipline_name is None:
parsed_discipline_name = " ".join(parsed_uncotigorized) l = utils.remove_from_list(list(parsed_uncotigorized), [parsed_leader, parsed_location])
parsed_discipline_name = " ".join(l)
prepods.discard(None) # чистим сеты от мусора
prepods.discard("") utils.discards_list(prepods, nones=True, emptystrings=True)
locations.discard(None) utils.discards_list(locations, nones=True, emptystrings=True)
locations.discard("") utils.discards_list(parsed_uncotigorized, nones=True, emptystrings=True)
# если не пустой предмет то записываем его
if not is_empty_lesson: if not is_empty_lesson:
slots = group['slots'] slots = group['slots']
w = weekday + ("_1" if weeknum == 1 else "_2") w = weekday + ("_1" if weeknum == 1 else "_2")
@@ -174,22 +180,30 @@ class Parser:
today = slots[w] today = slots[w]
today[pair] = { today[pair] = {
"pos": str(pos), "excel_pos": str(pos),
"discipline": parsed_discipline_name, "discipline_name": parsed_discipline_name.strip(),
"locations": list(locations), "locations": list(locations),
"leads": list(prepods), "leads": list(prepods),
"is_solid": is_solid, "is_solid": is_solid,
"is_2pair": is_2pair, "time_coeff": pairs,
"is_flow": is_wide_maybe_potokoviy, "is_flow": is_wide_maybe_potokoviy,
"lefttopmerged": {
"width": merged.width(),
"height": merged.height(),
"excel_range": utils.merged_humanize(merged.as_numbers())
},
"raw": parsed_uncotigorized, "raw": parsed_uncotigorized,
"weeday": utils.weekday_to_num(weekday), "weekday": utils.weekday_to_num(weekday),
"weeknum": weeknum "weeknum": weeknum
} }
if fuck_empty_pair_in_excel:
today[pair]['pair_num_empty'] = {
"prev": previous_dump,
"restoted": pair != "",
"pair": pair
}
self.teachers.add(aigenerated.extract_last_name(parsed_leader))
# INCREMENT на next и конец цикла. # INCREMENT на next и конец цикла.
row += next row += next

View File

@@ -267,12 +267,11 @@ def create_reader(file_path, **kwargs) -> ExcelSheetReader:
Создает и возвращает подходящий экземпляр ридера в зависимости от расширения файла. Создает и возвращает подходящий экземпляр ридера в зависимости от расширения файла.
""" """
if file_path.lower().endswith('.xlsx'): if file_path.lower().endswith('.xlsx'):
print("Используется движок openpyxl для .xlsx")
return OpenpyxlSheetReader(file_path, **kwargs) return OpenpyxlSheetReader(file_path, **kwargs)
elif file_path.lower().endswith('.xls'): elif file_path.lower().endswith('.xls'):
print("Используется движок xlrd для .xls")
return XlrdSheetReader(file_path, **kwargs) return XlrdSheetReader(file_path, **kwargs)
else: else:
raise ValueError("Неподдерживаемый формат файла. Используйте .xls или .xlsx") raise ValueError("Неподдерживаемый формат файла. Используйте .xls или .xlsx")

View File

@@ -1,14 +1,58 @@
# gemini generated # Copyright Stanislav Mironov
import time
import xlrd import xlrd
from coord import Coord, Merged from coord import Coord, Merged
from translations import ExcelSheetReader from translations import ExcelSheetReader
import re
class StepTimeCounter:
def __init__(self):
self.time: float = -1.0
self.createtime = time.time()
self.setnow()
def setnow(self):
self.time = time.time()
def step(self, no_set_now=False):
left = time.time() - self.time
if not no_set_now:
self.setnow()
return left
def from_create(self):
left = time.time() - self.createtime
return left
EMPTY_CTYPES = [xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK] EMPTY_CTYPES = [xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK]
def discards_list(trg, nones=True, emptystrings=True):
if nones: remove_from_list(trg, [None])
if emptystrings: remove_from_list(trg, [""])
def has_no_bottom_border(reader: "ExcelSheetReader", coord): def has_no_bottom_border(reader: "ExcelSheetReader", coord):
return reader.get_border_style(coord, 'bottom') == 0 and reader.get_border_style(coord.shift(down=1), 'top') == 0 return reader.get_border_style(coord, 'bottom') == 0 and reader.get_border_style(coord.shift(down=1), 'top') == 0
def find_element_index(my_list, element):
if element in my_list:
return my_list.index(element)
else:
return -1
def next_element(arr, el):
index = find_element_index(arr, el)
return arr[index + 1]
def remove_from_list(l: list, todel: list):
for x in todel:
if x in l:
l.remove(x)
return l
def parse_all_dirt(reader: "ExcelSheetReader", min_pos, right, down): def parse_all_dirt(reader: "ExcelSheetReader", min_pos, right, down):
RET = set() RET = set()
@@ -17,17 +61,16 @@ def parse_all_dirt(reader: "ExcelSheetReader", min_pos, right, down):
col = min_pos.col col = min_pos.col
while col < min_pos.col + right: while col < min_pos.col + right:
#print(excel_coordinate(row, col)) #print(excel_coordinate(row, col))
value = str(reader.get_cell_value(row, col)) cv = reader.get_cell_value(row, col)
if value is not None and len(value) > 0: value = str(cv).strip()
if cv is not None and len(value) > 0:
RET.add(value) RET.add(value)
col += 1 col += 1
row += 1 row += 1
return RET return RET
import re # GEMINI GENERATED
# GEMINI
def normalize_name(raw_name): def normalize_name(raw_name):
""" """
Приводит разнородные записи ФИО к единому структурированному виду. Приводит разнородные записи ФИО к единому структурированному виду.