diff --git a/Dockerfile b/Dockerfile index 604a7d8..9f02ea1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,4 +3,4 @@ WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip && pip install --no-cache-dir -r requirements.txt COPY . . -CMD ["python", "-u", "main.py"] \ No newline at end of file +CMD ["sh", "-c", "while true; do echo '[sh] starting script...'; python -u main.py; echo '[sh] sleeping 900s'; sleep 900; done"] \ No newline at end of file diff --git a/links_parser.py b/links_parser.py index 212ba30..f6cccbf 100644 --- a/links_parser.py +++ b/links_parser.py @@ -1,7 +1,9 @@ # Copyright Stanislav Mironov +import random import re +import time from urllib.parse import urljoin import requests from requests.structures import CaseInsensitiveDict @@ -56,6 +58,9 @@ def parse_links(facultets): } print("Found in vstu.ru: ", record) EXCEL_LINKS.append(record) + st = random.randint(1, 10)/10 + print(f"sleep {st}s") + time.sleep(st) return sorted(EXCEL_LINKS, key=lambda x: x['url']) diff --git a/main.py b/main.py index 99eaad7..4b05a8e 100644 --- a/main.py +++ b/main.py @@ -10,6 +10,8 @@ import random import time import traceback import uuid + +from pika.exceptions import ChannelWrongStateError import parser import translations import utils @@ -21,6 +23,8 @@ load_dotenv() RABBITMQ_URL = os.environ.get("RABBITMQ_URL") EXCHANGE_NAME = os.environ.get("RABBITMQ_EXCHANGE", "vstu_schedule") +INFINITY_LOOP = os.environ.get("INFINITY_LOOP", "no").lower() in ['yes', "true"] +parser.LOGGING = LOGGING = os.environ.get("PARSER_LOGGING", "no").lower() in ['yes', "true"] try: connection = pika.BlockingConnection(pika.URLParameters(RABBITMQ_URL)) @@ -45,8 +49,6 @@ PARSED_DIR = "parsed" DEBUG_ONE_FAC = None #'fevt' DEBUG_NO_SAVE_STATES = False -parser.LOGGING = LOGGING = True - def parse_sheets(download_place): to_return = {} try: @@ -206,7 +208,7 @@ def run_session(): "excel_dict": excel_dict }, ensure_ascii=False).encode('utf-8') ) - print(f"RabbitMQ published r={r}") + print(f"RabbitMQ published 'parser.excel_found.new'") new_files += 1 else: @@ -266,7 +268,7 @@ def run_session(): delivery_mode=2 ), body=json.dumps({ - "type": "excel_file_parsed", + "type": "excel_file_parsed_not_same", "is_new": is_new, "state": state }, ensure_ascii=False).encode('utf-8') @@ -276,6 +278,9 @@ def run_session(): states.append(state) except Exception as e: + if isinstance(e, ChannelWrongStateError): + raise e + faileds.append({ "uuid": str(uuid.uuid4()), "exception": str(e), @@ -326,8 +331,7 @@ def run_session(): delivery_mode=2 ), body=json.dumps({ - "type": "schedule_result_v2", - "data": d + "type": "schedule_result_v2_changed", }, ensure_ascii=False).encode('utf-8') ) @@ -338,7 +342,7 @@ def run_session(): except Exception as e: print(f"Error deleting directory '{DIRNAME}': {e}") - return {"changed_files": changed_files, 'total_files': total_files, "changed": changed, "new_files": new_files} + return {"changed_files": changed_files, 'total_files': total_files, "changed": changed, "new_files": new_files, "faileds": faileds, "faileds_count": len(faileds)} def check_dirs(): @@ -346,7 +350,11 @@ def check_dirs(): os.mkdir(PARSED_DIR) def main(): - while True: + flag = True + while flag: + if not INFINITY_LOOP: + flag = False + t = utils.StepTimeCounter() err = None sess = None @@ -380,11 +388,12 @@ def main(): "session": sess }, ensure_ascii=False).encode('utf-8') ) - - sleep_time = random.randint(14*60, 21*60) - print(f"Sleep for {round(sleep_time/6)/10} minutes") - time.sleep(sleep_time) - print("Wake up!") + + if flag: + sleep_time = random.randint(14*60, 21*60) + print(f"Sleep for {round(sleep_time/6)/10} minutes") + time.sleep(sleep_time) + print("Wake up!")