fixes 3 pairs подряд, etc
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
# Copyright Stanislav Mironov
|
||||
|
||||
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
@@ -8,7 +10,7 @@ from bs4 import BeautifulSoup
|
||||
BASE_URL = "https://www.vstu.ru/"
|
||||
RASP_PREFIX = "https://www.vstu.ru/student/raspisaniya/zanyatiy/index.php?dep="
|
||||
|
||||
|
||||
# Парсит ссылки на эксель .xls & .xlsx файлы и выдаёт их
|
||||
def parse_links(facultets):
|
||||
session = requests.Session()
|
||||
session.headers = CaseInsensitiveDict(
|
||||
@@ -18,17 +20,17 @@ def parse_links(facultets):
|
||||
"Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "keep-alive",
|
||||
"Referer": "http://dump.vstu.ru/",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Priority": "u=0, i",
|
||||
"Pragma": "no-cache",
|
||||
"Cache-Control": "no-cach",
|
||||
"Cache-Control": "no-cach"
|
||||
}
|
||||
)
|
||||
|
||||
EXCEL_LINKS = {}
|
||||
for facultet in facultets:
|
||||
url = RASP_PREFIX + facultet
|
||||
print("getting...")
|
||||
r = session.get(url)
|
||||
print(f"GET {url}")
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
@@ -51,3 +53,4 @@ def parse_links(facultets):
|
||||
print(f"+url {excel_url}")
|
||||
|
||||
return EXCEL_LINKS
|
||||
|
||||
|
||||
Reference in New Issue
Block a user