fixes 3 pairs подряд, etc

This commit is contained in:
2025-09-12 20:07:04 +03:00
parent 6920d24a98
commit ed65e5b483
8 changed files with 239 additions and 78 deletions

View File

@@ -1,5 +1,7 @@
# Copyright Stanislav Mironov
import re
import time
from urllib.parse import urljoin
import requests
from requests.structures import CaseInsensitiveDict
@@ -8,7 +10,7 @@ from bs4 import BeautifulSoup
BASE_URL = "https://www.vstu.ru/"
RASP_PREFIX = "https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php?dep="
# Парсит ссылки на эксель .xls & .xlsx файлы и выдаёт их
def parse_links(facultets):
session = requests.Session()
session.headers = CaseInsensitiveDict(
@@ -18,17 +20,17 @@ def parse_links(facultets):
"Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"Referer": "http://dump.vstu.ru/",
"Upgrade-Insecure-Requests": "1",
"Priority": "u=0, i",
"Pragma": "no-cache",
"Cache-Control": "no-cach",
"Cache-Control": "no-cach"
}
)
EXCEL_LINKS = {}
for facultet in facultets:
url = RASP_PREFIX + facultet
print("getting...")
r = session.get(url)
print(f"GET {url}")
soup = BeautifulSoup(r.text, 'html.parser')
@@ -51,3 +53,4 @@ def parse_links(facultets):
print(f"+url {excel_url}")
return EXCEL_LINKS