Economy resources, sorts and latest_changes
This commit is contained in:
@@ -10,6 +10,11 @@ from bs4 import BeautifulSoup
|
||||
BASE_URL = "https://www.vstu.ru/"
|
||||
RASP_PREFIX = "https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php?dep="
|
||||
|
||||
def sibling_clear_to_date(s: str):
|
||||
if s is None:
|
||||
return "!!!Python None!!!"
|
||||
return s.lower().replace("(последнее изменение:", "").replace(")", "").strip()
|
||||
|
||||
# Парсит ссылки на эксель .xls & .xlsx файлы и выдаёт их
|
||||
def parse_links(facultets):
|
||||
session = requests.Session()
|
||||
@@ -27,7 +32,7 @@ def parse_links(facultets):
|
||||
}
|
||||
)
|
||||
|
||||
EXCEL_LINKS = {}
|
||||
EXCEL_LINKS = []
|
||||
for facultet in facultets:
|
||||
url = RASP_PREFIX + facultet
|
||||
print("getting...")
|
||||
@@ -38,19 +43,23 @@ def parse_links(facultets):
|
||||
|
||||
# Ищем все теги <a>, у которых атрибут href соответствует нашему паттерну
|
||||
excel_tags = soup.find_all('a', href=excel_pattern)
|
||||
excel_links = [tag.get('href') for tag in excel_tags]
|
||||
|
||||
# Предположим, вы уже получили excel_links из одного из методов выше
|
||||
# excel_links = ['../../../upload/raspisanie/z/ОН_ХТФ_1 курс.xlsx', ...]
|
||||
|
||||
absolute_links = [urljoin(BASE_URL, relative_link) for relative_link in excel_links]
|
||||
|
||||
if facultet not in EXCEL_LINKS.keys():
|
||||
EXCEL_LINKS[facultet] = set()
|
||||
|
||||
for excel_url in absolute_links:
|
||||
EXCEL_LINKS[facultet].add(excel_url)
|
||||
print(f"+url {excel_url}")
|
||||
for a in excel_tags:
|
||||
last_changed = sibling_clear_to_date(a.next_sibling)
|
||||
url = urljoin(BASE_URL, a.get('href'))
|
||||
record = {
|
||||
"facultet": facultet,
|
||||
"url": url,
|
||||
"last_changed": last_changed
|
||||
}
|
||||
print(record)
|
||||
EXCEL_LINKS.append(record)
|
||||
|
||||
return EXCEL_LINKS
|
||||
return sorted(EXCEL_LINKS, key=lambda x: x['url'])
|
||||
|
||||
|
||||
def excels_to_diffabledates(excels):
|
||||
dates = []
|
||||
for excel in excels:
|
||||
dates.append(f"{excel['last_changed']} {excel['facultet']} {excel['url']}")
|
||||
|
||||
return "\n".join(sorted(dates)).strip()
|
||||
|
||||
Reference in New Issue
Block a user