fixes 3 pairs подряд, etc
This commit is contained in:
@@ -1,3 +1,6 @@
|
||||
# Copyright GEMINI
|
||||
|
||||
|
||||
import re
|
||||
|
||||
# --- Ресурсы для алгоритма ---
|
||||
|
||||
5
coord.py
5
coord.py
@@ -1,3 +1,4 @@
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
|
||||
class Coord:
|
||||
@@ -13,6 +14,10 @@ class Coord:
|
||||
return Coord(self.row if row is None else row,
|
||||
self.col if col is None else col)
|
||||
|
||||
def copy(self) -> "Coord":
|
||||
return Coord(self.row,
|
||||
self.col)
|
||||
|
||||
def cell(self, reader: "ExcelSheetReader") -> "TranschendentnostCell":
|
||||
return reader.cell(self.row, self.col)
|
||||
|
||||
|
||||
Binary file not shown.
@@ -1,5 +1,7 @@
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
@@ -8,7 +10,7 @@ from bs4 import BeautifulSoup
|
||||
BASE_URL = "https://www.vstu.ru/"
|
||||
RASP_PREFIX = "https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php?dep="
|
||||
|
||||
|
||||
# Парсит ссылки на эксель .xls & .xlsx файлы и выдаёт их
|
||||
def parse_links(facultets):
|
||||
session = requests.Session()
|
||||
session.headers = CaseInsensitiveDict(
|
||||
@@ -18,17 +20,17 @@ def parse_links(facultets):
|
||||
"Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "http://dump.vstu.ru/",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Priority": "u=0, i",
|
||||
"Pragma": "no-cache",
|
||||
"Cache-Control": "no-cach",
|
||||
"Cache-Control": "no-cach"
|
||||
}
|
||||
)
|
||||
|
||||
EXCEL_LINKS = {}
|
||||
for facultet in facultets:
|
||||
url = RASP_PREFIX + facultet
|
||||
print("getting...")
|
||||
r = session.get(url)
|
||||
print(f"GET {url}")
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
@@ -51,3 +53,4 @@ def parse_links(facultets):
|
||||
print(f"+url {excel_url}")
|
||||
|
||||
return EXCEL_LINKS
|
||||
|
||||
|
||||
138
main.py
138
main.py
@@ -1,62 +1,127 @@
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля
|
||||
|
||||
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
from urllib.parse import urljoin
|
||||
import pandas as pd
|
||||
import xlwt
|
||||
|
||||
import xlrd
|
||||
import requests
|
||||
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import uuid
|
||||
import aigenerated
|
||||
import parser
|
||||
import translations
|
||||
import utils
|
||||
import json
|
||||
import links_parser
|
||||
# Общее правило проекта, сначала в координатах идёт ROW а потом COL, нумерация с нуля
|
||||
import shutil
|
||||
|
||||
def currt():
|
||||
return round(time.time())
|
||||
|
||||
FACULTETS = [
|
||||
"asp", "mag", "fastiv", "fat", "ftkm", "ftpp", "feu", "fevt", "htf", "vkf", "mmf", "fpik"
|
||||
]
|
||||
DIRNAME = "excels"
|
||||
|
||||
DEBUG_ONE_FAC = None #'fevt'
|
||||
DEBUG_ONE_FAC = None #'htf'
|
||||
result_groups = {}
|
||||
result = {
|
||||
"version": 1,
|
||||
"notice": "ОТКАЗ ОТ ОТВЕТСТВЕННОСТИ: Данные, доступ к API и т.д. предоставляется КАК-ЕСТЬ (AS-IS) без каких либо, явно или не явно подразумеваемых гарантий.\n\nПарсер написал: Миронов Станислав",
|
||||
"actual_at": round(time.time()),
|
||||
"documentation": "TODO",
|
||||
"daypicture": "QwQ",
|
||||
"university": "VSTU",
|
||||
"university_site": "https://www.vstu.ru/",
|
||||
"stat": {
|
||||
"total_parsing_time": -1,
|
||||
},
|
||||
"api_notices": {
|
||||
"updated_at": 1757688552,
|
||||
"text": "Пожалуйста сохраняйте 'updated_at', это время изменения ЭТОГО текста. Тут возможно будут появлятся важные BREAKING CHANGES и дедлайны к ним.\nПо хорошему если updated_at другой по сравнению с вашем кэшем это сообщение должно отправляться вам в телеграм как уведомление о поедстоящих изменениях\nwarning=True значит 'text' содержит важное а не как щас hint.\n\n ~fazziclay aka Stanislav;",
|
||||
"warning": False,
|
||||
"tut-plavayuschaya-struktura": "required only 'updated_at', 'text' and 'warning'"
|
||||
},
|
||||
"doubled_groups": [],
|
||||
"debug": {
|
||||
"bleu~~": 1
|
||||
},
|
||||
"excels": [],
|
||||
"facultets": FACULTETS,
|
||||
|
||||
"emptykey1": "",
|
||||
"emptykey2": "",
|
||||
|
||||
"groups": result_groups,
|
||||
|
||||
"emptykey3": "",
|
||||
"emptykey4": "",
|
||||
"see_header_at_top_of_this_file": "SEE TOP OF THIS FILE | ОБРАТИТЕ ВНИМАНИЕ НА ВЕРХ ЭТОГО ФАЙЛА"
|
||||
}
|
||||
|
||||
def process_excel_file(facultet, excel_url, counter, timeid):
|
||||
is_xlsx = excel_url.endswith(".xlsx")
|
||||
filename = f"{DIRNAME}/" + timeid + f"_[C{counter}]_" + facultet + ".xls" + ("x" if is_xlsx else "")
|
||||
|
||||
excel_info = {
|
||||
"filename": excel_url.split("/")[-1],
|
||||
"url": excel_url,
|
||||
"download_place": filename,
|
||||
"stat": {
|
||||
"download": -1,
|
||||
"create_reader": -1,
|
||||
"parse": -1,
|
||||
"cycles": 0
|
||||
},
|
||||
"group_names_parsed": [],
|
||||
"facultet": facultet,
|
||||
"counter": counter
|
||||
}
|
||||
parser.LOGGING = False
|
||||
|
||||
try:
|
||||
filename = "excels/" + timeid + "_" + facultet + f"_[C{counter}]" + ".xls" + ("x" if is_xlsx else "")
|
||||
t = utils.StepTimeCounter()
|
||||
aigenerated.download_file_from_url(excel_url, filename)
|
||||
excel_info["stat"]['download'] = t.step()
|
||||
|
||||
reader = translations.create_reader(filename)
|
||||
print("Reader info")
|
||||
print(reader.info())
|
||||
excel_info["stat"]['create_reader'] = t.step()
|
||||
|
||||
while True:
|
||||
print(f"Parsing sheet №{reader.get_sheet_index()+1}")
|
||||
parser.LOGGING = False
|
||||
excel_info['stat']['cycles'] += 1
|
||||
print(f"Parsing sheet №{reader.get_sheet_index()+1} (from 1)")
|
||||
prs = parser.Parser(reader)
|
||||
prs.parse()
|
||||
if prs.parser_error is not None:
|
||||
excel_info["parser_error_cycle_" + excel_info['stat']['cycles']] = prs.parser_error
|
||||
|
||||
for group_name in prs.groups.keys():
|
||||
if group_name in result.keys():
|
||||
if group_name in result_groups.keys():
|
||||
print(f" -- WTF -- Doubled groups -- name: {group_name}")
|
||||
if 'warning_doubled_groups_skip' not in excel_info.keys():
|
||||
excel_info['warning_doubled_groups_skip'] = []
|
||||
|
||||
excel_info['warning_doubled_groups_skip'].append(group_name)
|
||||
result['doubled_groups'].append(group_name)
|
||||
|
||||
|
||||
continue
|
||||
|
||||
gr = result[group_name] = prs.groups[group_name]
|
||||
gr = result_groups[group_name] = prs.groups[group_name]
|
||||
gr['facultet'] = facultet
|
||||
gr['data_source'] = excel_url.split("/")[-1]
|
||||
gr['parser_debug'] = {
|
||||
"C_COUNTER": counter,
|
||||
gr['debug'] = {
|
||||
"counter": counter,
|
||||
"timeid": timeid,
|
||||
"excel_url": excel_url,
|
||||
"reader_info": reader.info(),
|
||||
"reader_sheet_index": reader.get_sheet_index(),
|
||||
"filename": filename
|
||||
}
|
||||
excel_info["group_names_parsed"].append(group_name)
|
||||
|
||||
print(f"Populates {len(prs.groups)} groups to result: " + " ".join(prs.groups.keys()))
|
||||
|
||||
@@ -67,22 +132,40 @@ def process_excel_file(facultet, excel_url, counter, timeid):
|
||||
reader.next_sheet()
|
||||
print("Next sheet!")
|
||||
|
||||
excel_info["stat"]['parse'] = t.step()
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error while {excel_url}")
|
||||
print(e)
|
||||
traceback.print_exc()
|
||||
u = uuid.uuid4()
|
||||
excel_info['error'] = {
|
||||
"smile": ":(",
|
||||
"error_message": str(e),
|
||||
"log_anchor": str(u),
|
||||
"time": currt()
|
||||
}
|
||||
print(f"Log Anchor: {u}")
|
||||
faileds.append({
|
||||
"ex": e,
|
||||
"fac": facultet,
|
||||
"url": excel_url
|
||||
})
|
||||
|
||||
result['excels'].append(excel_info)
|
||||
|
||||
|
||||
result = {}
|
||||
faileds = []
|
||||
def main():
|
||||
EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC])
|
||||
t = utils.StepTimeCounter()
|
||||
try:
|
||||
os.mkdir(DIRNAME)
|
||||
print(f"Directory '{DIRNAME}' created successfully.")
|
||||
except Exception:
|
||||
print(f"Directory '{DIRNAME}' already exists.")
|
||||
|
||||
print("main(); parse links starting...")
|
||||
EXCEL_LINKS = links_parser.parse_links(FACULTETS if DEBUG_ONE_FAC is None else [DEBUG_ONE_FAC])
|
||||
counter = 0
|
||||
timeid = str(round(time.time()))
|
||||
for facultet in EXCEL_LINKS.keys():
|
||||
@@ -99,13 +182,24 @@ def main():
|
||||
print("Excel file processing done!")
|
||||
|
||||
print("Saving result.json")
|
||||
|
||||
result['stat']['total_parsing_time'] = t.step()
|
||||
|
||||
json.dump(result, open('result.json', 'w'), indent=2, ensure_ascii=False)
|
||||
print("Saved to result.json")
|
||||
|
||||
print("Faileds:")
|
||||
print(faileds)
|
||||
|
||||
# Delete a non-empty directory and its contents
|
||||
try:
|
||||
shutil.rmtree(DIRNAME)
|
||||
print(f"Directory '{DIRNAME}' and its contents deleted successfully.")
|
||||
except Exception as e:
|
||||
print(f"Error deleting directory '{DIRNAME}': {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Start")
|
||||
main()
|
||||
print("Bye!")
|
||||
|
||||
100
parser.py
100
parser.py
@@ -1,7 +1,11 @@
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
PAIR_NUMS = [
|
||||
"1-2", "3-4", "5-6", "7-8", "9-10", "11-12", "13-14", "15-16"
|
||||
]
|
||||
|
||||
import json
|
||||
|
||||
import xlrd
|
||||
|
||||
import uuid
|
||||
import aigenerated
|
||||
from coord import Coord, Merged
|
||||
from translations import ExcelSheetReader
|
||||
@@ -13,13 +17,13 @@ def pprint(*args, **kwargs):
|
||||
if LOGGING:
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, reader: ExcelSheetReader):
|
||||
self.reader = reader
|
||||
self.groups = {}
|
||||
self.teachers = set()
|
||||
self.places = set()
|
||||
self.parser_error = None
|
||||
pprint("Parser created for '{0}'".format(reader.info()))
|
||||
|
||||
def parse(self):
|
||||
@@ -27,6 +31,7 @@ class Parser:
|
||||
if monday is None:
|
||||
print(" -- Failed parse! -- ")
|
||||
print("ПОНЕДЕЛЬНИК НЕ НАЙДЕН!")
|
||||
self.parser_error = "'ПОНЕДЕЛЬНИК' не найден в таблице."
|
||||
return
|
||||
|
||||
head_rx = monday.row - 1 # выше первого понидельника
|
||||
@@ -59,7 +64,7 @@ class Parser:
|
||||
# location
|
||||
location = merged.high.shift(down=1).cell(self.reader).value
|
||||
|
||||
return {"loc": str(location), "leader": str(speaker), "name": str(merged.cell(self.reader).value)}
|
||||
return {"loc": str(location).strip(), "leader": str(speaker).strip(), "name": str(merged.cell(self.reader).value).strip()}
|
||||
|
||||
def process_group(self, group, monday):
|
||||
"""
|
||||
@@ -71,13 +76,13 @@ class Parser:
|
||||
pprint(group_name)
|
||||
row = group['position'][0] + 1 # counter for while, +1 for shift down; также номер строки в таблице (вроде с нуля)
|
||||
weeknum = 1 # номер недели, щёлкнет +1 при каком-то условии.
|
||||
previous_pair = None
|
||||
while row < self.reader.get_row_count(): # maybe условие чтобы не уйти ниже чем есть строк
|
||||
pos = Coord(row, group['position'][1]) # текущая позиция, верхний левый угол (=low)
|
||||
pos_right = pos.shift(right=3)
|
||||
pair_pos = pos.replace(col=5)
|
||||
weekday_pos = pos.replace(col=4)
|
||||
merged = self.reader.get_merged_coord(pos)
|
||||
right_cell = pos_right.cell(self.reader)
|
||||
merged_cell = merged.cell(self.reader)
|
||||
cv = merged_cell.value
|
||||
# В конце (12 пара:>) название группы, можно использовать как якорь
|
||||
@@ -89,6 +94,16 @@ class Parser:
|
||||
weekday = utils.unspace(weekday_mr.cell(self.reader).value)
|
||||
pair_mr = self.reader.get_merged_coord(pair_pos)
|
||||
pair = utils.unspace(pair_mr.cell(self.reader).value)
|
||||
fuck_empty_pair_in_excel = pair == ""
|
||||
previous_dump = previous_pair
|
||||
if fuck_empty_pair_in_excel:
|
||||
if previous_pair is None or previous_pair == "":
|
||||
pair = f"EMPTY_IN_EXCEL_{uuid.uuid4()}"
|
||||
else:
|
||||
pair = utils.next_element(PAIR_NUMS, previous_pair)
|
||||
|
||||
if pair != "":
|
||||
previous_pair = pair
|
||||
|
||||
skip = 0
|
||||
if weekday == "":
|
||||
@@ -99,26 +114,25 @@ class Parser:
|
||||
row += 1
|
||||
else:
|
||||
break
|
||||
|
||||
if not skip:
|
||||
next = 3 # на сколько пыгнуть для следующего шага?
|
||||
|
||||
is_empty_lesson = right_cell.is_empty() and merged_cell.is_empty()
|
||||
dispname = ""
|
||||
is_empty_lesson = len(utils.parse_all_dirt(self.reader, pos, 4, 3)) == 0 # если в поле не найдено ничего..
|
||||
parsed_discipline_name = None
|
||||
parsed_location = None
|
||||
parsed_leader = None
|
||||
is_2pair = False
|
||||
pairs = 1
|
||||
is_solid = pos_right in merged
|
||||
parsed_uncotigorized = []
|
||||
is_wide_maybe_potokoviy = merged.width() > 4 # потоковая ли лекция (занимает несколько групп.)
|
||||
if is_empty_lesson:
|
||||
dispname = "<no lesson>"
|
||||
|
||||
if not is_empty_lesson:
|
||||
may_prepod = merged.low.shift(down=2)
|
||||
if utils.has_no_bottom_border(self.reader, may_prepod):
|
||||
next = 6
|
||||
is_2pair = True
|
||||
cur = merged.low.shift(down=2)
|
||||
while utils.has_no_bottom_border(self.reader, cur):
|
||||
next += 3
|
||||
pairs += 1
|
||||
cur = cur.shift(down=3)
|
||||
|
||||
if is_wide_maybe_potokoviy:
|
||||
ret = self.parse_potokoviy(merged)
|
||||
@@ -127,45 +141,37 @@ class Parser:
|
||||
parsed_discipline_name = ret['name']
|
||||
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, merged.width(), next))
|
||||
|
||||
|
||||
else:
|
||||
if (is_solid):
|
||||
parsed_discipline_name = cv
|
||||
|
||||
dispname = cv
|
||||
dispname += (" SOLD" if is_solid else " SPLIT")
|
||||
dispname += (" [ДВУПАРНЫЙ]" if is_2pair else "")
|
||||
|
||||
parsed_uncotigorized = list(utils.parse_all_dirt(self.reader, merged.low, 4, next))
|
||||
|
||||
|
||||
if parsed_leader: dispname += f" [{parsed_leader}]"
|
||||
if parsed_location: dispname += f" [{parsed_location}]"
|
||||
dispname = dispname.replace("\n", "\\n")
|
||||
pprint(f"[{group_name}] row={row}; {pos} {pos_right} {pair} {weekday}: {'[ПОТОКОВЫЙ] ' if is_wide_maybe_potokoviy else ''}{dispname} {parsed_uncotigorized}")
|
||||
|
||||
# пытаемся из некотегорезированных данных выцепить место и лидера (препода)
|
||||
prepods = set()
|
||||
if parsed_leader is not None: prepods.add(aigenerated.extract_last_name(parsed_leader))
|
||||
if parsed_leader is not None: prepods.add(parsed_leader.strip())
|
||||
|
||||
locations = set()
|
||||
if parsed_location is not None: locations.add(parsed_location.replace(" ", "").replace("-", ""))
|
||||
if parsed_location is not None: locations.add(parsed_location.strip().replace(" ", ""))
|
||||
|
||||
for x in list(parsed_uncotigorized):
|
||||
if aigenerated.is_surname_string(x):
|
||||
prepods.add(aigenerated.extract_last_name(x))
|
||||
prepods.add(x.strip())
|
||||
|
||||
if aigenerated.is_room_number(x):
|
||||
locations.add(x.replace(" ", "").replace("-", "") if x is not None else None)
|
||||
locations.add(x.strip().replace(" ", "") if x is not None else None)
|
||||
|
||||
# оставшееся в дисциплину (костыль)
|
||||
# попытка починить пустую дисциплину
|
||||
if parsed_discipline_name is None:
|
||||
parsed_discipline_name = " ".join(parsed_uncotigorized)
|
||||
l = utils.remove_from_list(list(parsed_uncotigorized), [parsed_leader, parsed_location])
|
||||
parsed_discipline_name = " ".join(l)
|
||||
|
||||
prepods.discard(None)
|
||||
prepods.discard("")
|
||||
locations.discard(None)
|
||||
locations.discard("")
|
||||
# чистим сеты от мусора
|
||||
utils.discards_list(prepods, nones=True, emptystrings=True)
|
||||
utils.discards_list(locations, nones=True, emptystrings=True)
|
||||
utils.discards_list(parsed_uncotigorized, nones=True, emptystrings=True)
|
||||
|
||||
# если не пустой предмет то записываем его
|
||||
if not is_empty_lesson:
|
||||
slots = group['slots']
|
||||
w = weekday + ("_1" if weeknum == 1 else "_2")
|
||||
@@ -174,22 +180,30 @@ class Parser:
|
||||
|
||||
today = slots[w]
|
||||
today[pair] = {
|
||||
"pos": str(pos),
|
||||
"discipline": parsed_discipline_name,
|
||||
"excel_pos": str(pos),
|
||||
"discipline_name": parsed_discipline_name.strip(),
|
||||
"locations": list(locations),
|
||||
"leads": list(prepods),
|
||||
"is_solid": is_solid,
|
||||
"is_2pair": is_2pair,
|
||||
"time_coeff": pairs,
|
||||
"is_flow": is_wide_maybe_potokoviy,
|
||||
"lefttopmerged": {
|
||||
"width": merged.width(),
|
||||
"height": merged.height(),
|
||||
"excel_range": utils.merged_humanize(merged.as_numbers())
|
||||
},
|
||||
"raw": parsed_uncotigorized,
|
||||
"weeday": utils.weekday_to_num(weekday),
|
||||
"weekday": utils.weekday_to_num(weekday),
|
||||
"weeknum": weeknum
|
||||
}
|
||||
if fuck_empty_pair_in_excel:
|
||||
today[pair]['pair_num_empty'] = {
|
||||
"prev": previous_dump,
|
||||
"restoted": pair != "",
|
||||
"pair": pair
|
||||
}
|
||||
|
||||
|
||||
self.teachers.add(aigenerated.extract_last_name(parsed_leader))
|
||||
|
||||
|
||||
# INCREMENT на next и конец цикла.
|
||||
row += next
|
||||
|
||||
|
||||
@@ -267,12 +267,11 @@ def create_reader(file_path, **kwargs) -> ExcelSheetReader:
|
||||
Создает и возвращает подходящий экземпляр ридера в зависимости от расширения файла.
|
||||
"""
|
||||
if file_path.lower().endswith('.xlsx'):
|
||||
print("Используется движок openpyxl для .xlsx")
|
||||
return OpenpyxlSheetReader(file_path, **kwargs)
|
||||
|
||||
elif file_path.lower().endswith('.xls'):
|
||||
print("Используется движок xlrd для .xls")
|
||||
return XlrdSheetReader(file_path, **kwargs)
|
||||
|
||||
else:
|
||||
raise ValueError("Неподдерживаемый формат файла. Используйте .xls или .xlsx")
|
||||
raise ValueError("Неподдерживаемый формат файла. Используйте .xls или .xlsx")
|
||||
|
||||
|
||||
55
utils.py
55
utils.py
@@ -1,14 +1,58 @@
|
||||
|
||||
# gemini generated
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
import time
|
||||
import xlrd
|
||||
from coord import Coord, Merged
|
||||
from translations import ExcelSheetReader
|
||||
import re
|
||||
|
||||
|
||||
class StepTimeCounter:
|
||||
def __init__(self):
|
||||
self.time: float = -1.0
|
||||
self.createtime = time.time()
|
||||
self.setnow()
|
||||
|
||||
def setnow(self):
|
||||
self.time = time.time()
|
||||
|
||||
def step(self, no_set_now=False):
|
||||
left = time.time() - self.time
|
||||
if not no_set_now:
|
||||
self.setnow()
|
||||
return left
|
||||
|
||||
def from_create(self):
|
||||
left = time.time() - self.createtime
|
||||
return left
|
||||
|
||||
EMPTY_CTYPES = [xlrd.XL_CELL_EMPTY, xlrd.XL_CELL_BLANK]
|
||||
|
||||
def discards_list(trg, nones=True, emptystrings=True):
|
||||
if nones: remove_from_list(trg, [None])
|
||||
if emptystrings: remove_from_list(trg, [""])
|
||||
|
||||
def has_no_bottom_border(reader: "ExcelSheetReader", coord):
|
||||
return reader.get_border_style(coord, 'bottom') == 0 and reader.get_border_style(coord.shift(down=1), 'top') == 0
|
||||
|
||||
def find_element_index(my_list, element):
|
||||
if element in my_list:
|
||||
return my_list.index(element)
|
||||
else:
|
||||
return -1
|
||||
|
||||
def next_element(arr, el):
|
||||
index = find_element_index(arr, el)
|
||||
return arr[index + 1]
|
||||
|
||||
def remove_from_list(l: list, todel: list):
|
||||
for x in todel:
|
||||
if x in l:
|
||||
l.remove(x)
|
||||
|
||||
return l
|
||||
|
||||
def parse_all_dirt(reader: "ExcelSheetReader", min_pos, right, down):
|
||||
RET = set()
|
||||
|
||||
@@ -17,17 +61,16 @@ def parse_all_dirt(reader: "ExcelSheetReader", min_pos, right, down):
|
||||
col = min_pos.col
|
||||
while col < min_pos.col + right:
|
||||
#print(excel_coordinate(row, col))
|
||||
value = str(reader.get_cell_value(row, col))
|
||||
if value is not None and len(value) > 0:
|
||||
cv = reader.get_cell_value(row, col)
|
||||
value = str(cv).strip()
|
||||
if cv is not None and len(value) > 0:
|
||||
RET.add(value)
|
||||
col += 1
|
||||
row += 1
|
||||
|
||||
return RET
|
||||
|
||||
import re
|
||||
|
||||
# GEMINI
|
||||
# GEMINI GENERATED
|
||||
def normalize_name(raw_name):
|
||||
"""
|
||||
Приводит разнородные записи ФИО к единому структурированному виду.
|
||||
|
||||
Reference in New Issue
Block a user