62 lines
2.2 KiB
Python
62 lines
2.2 KiB
Python
from datetime import datetime
|
||
import hashlib
|
||
import json
|
||
import os
|
||
from requests.adapters import HTTPAdapter
|
||
from urllib3 import Retry
|
||
from consts import *
|
||
import requests
|
||
|
||
def log(msg):
|
||
print(f"[{datetime.now().strftime('%d.%m.%Y %H:%M:%S')}] {msg}", flush=True)
|
||
|
||
def get_raw_hash(raw_list):
|
||
normalized = "|".join(sorted([str(i).strip() for i in raw_list]))
|
||
return hashlib.sha1(normalized.encode('utf-8')).hexdigest()
|
||
|
||
def load_json(filename, default):
|
||
if os.path.exists(filename):
|
||
with open(filename, 'r', encoding='utf-8') as f:
|
||
try: return json.load(f)
|
||
except: return default
|
||
return default
|
||
|
||
def save_json(filename, data, sort_keys=False):
|
||
with open(filename, 'w', encoding='utf-8') as f:
|
||
json.dump(data, f, ensure_ascii=False, indent=2, sort_keys=sort_keys)
|
||
|
||
|
||
def fetch_json_robust(url, timeout=120):
|
||
"""
|
||
Устойчивый HTTP-клиент с маскировкой под браузер и механизмом Retry.
|
||
Адаптирован для обхода базовых проверок Cloudflare.
|
||
"""
|
||
session = requests.Session()
|
||
|
||
# Маскировка под стандартный браузер
|
||
session.headers.update({
|
||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||
"Accept": "application/json",
|
||
"Accept-Encoding": "gzip, deflate", # Оптимизация получения 2.4 МБ
|
||
"Connection": "keep-alive"
|
||
})
|
||
|
||
# Настройка стратегии повторных попыток
|
||
# 3 попытки, задержки: 2с, 4с, 8с. Отработка ошибок таймаутов Cloudflare (522, 524)
|
||
retry_strategy = Retry(
|
||
total=3,
|
||
backoff_factor=2,
|
||
status_forcelist=[429, 500, 502, 503, 504, 522, 524],
|
||
allowed_methods=["GET"]
|
||
)
|
||
|
||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||
session.mount("http://", adapter)
|
||
session.mount("https://", adapter)
|
||
|
||
proxies = {"http": PROXY_URL, "https": PROXY_URL} if PROXY_URL else None
|
||
|
||
response = session.get(url, timeout=timeout, proxies=proxies)
|
||
response.raise_for_status()
|
||
|
||
return response.json() |