refactor: modularize project structure and separate API from crawlers
- Introduce app/ package with config, services (storage, notifications), API server, and crawler modules - Add BaseCrawler and BarronsCrawler; extract notifications and storage - Keep enhanced_crawler.py as back-compat entry delegating to app.runner - Add template crawler for future sites - Update README with new structure and usage - Extend .env.template with DATA_DIR/LOG_DIR options
This commit is contained in:
96
app/config.py
Normal file
96
app/config.py
Normal file
@@ -0,0 +1,96 @@
|
||||
import os
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmailConfig:
|
||||
smtp_server: str
|
||||
smtp_port: int
|
||||
smtp_security: str # 'ssl' | 'starttls' | 'none'
|
||||
from_email: str
|
||||
to_email: str
|
||||
username: str
|
||||
password: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class AppConfig:
|
||||
check_interval: int
|
||||
log_level: str
|
||||
always_notify_on_startup: bool
|
||||
webhook_url: str | None
|
||||
discord_webhook: str | None
|
||||
data_dir: str
|
||||
log_dir: str
|
||||
email: EmailConfig | None
|
||||
|
||||
|
||||
def _resolve_dir(env_key: str, default_subdir: str) -> str:
|
||||
# Prefer explicit env var
|
||||
val = os.getenv(env_key)
|
||||
if val:
|
||||
return val
|
||||
# Prefer Docker paths if present
|
||||
docker_path = f"/app/{default_subdir}"
|
||||
if os.path.isdir(docker_path):
|
||||
return docker_path
|
||||
# Fallback to local ./subdir
|
||||
return os.path.join(os.getcwd(), default_subdir)
|
||||
|
||||
|
||||
def load_email_config() -> EmailConfig | None:
|
||||
required = [
|
||||
'EMAIL_SMTP_SERVER', 'EMAIL_FROM', 'EMAIL_TO', 'EMAIL_USERNAME', 'EMAIL_PASSWORD'
|
||||
]
|
||||
if not all(os.getenv(k) for k in required):
|
||||
return None
|
||||
|
||||
security = os.getenv('EMAIL_SMTP_SECURITY', 'starttls').lower()
|
||||
default_port = 465 if security == 'ssl' else 587 if security == 'starttls' else 25
|
||||
smtp_port = int(os.getenv('EMAIL_SMTP_PORT', default_port))
|
||||
|
||||
return EmailConfig(
|
||||
smtp_server=os.getenv('EMAIL_SMTP_SERVER', ''),
|
||||
smtp_port=smtp_port,
|
||||
smtp_security=security,
|
||||
from_email=os.getenv('EMAIL_FROM', ''),
|
||||
to_email=os.getenv('EMAIL_TO', ''),
|
||||
username=os.getenv('EMAIL_USERNAME', ''),
|
||||
password=os.getenv('EMAIL_PASSWORD', ''),
|
||||
)
|
||||
|
||||
|
||||
def setup_logging(level: str, log_dir: str) -> logging.Logger:
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, level.upper(), logging.INFO),
|
||||
format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler(os.path.join(log_dir, 'crawler.log')),
|
||||
logging.StreamHandler(),
|
||||
],
|
||||
)
|
||||
return logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_config() -> AppConfig:
|
||||
check_interval = int(os.getenv('CHECK_INTERVAL', 300))
|
||||
log_level = os.getenv('LOG_LEVEL', 'INFO')
|
||||
always_notify_on_startup = os.getenv('ALWAYS_NOTIFY_ON_STARTUP', 'false').lower() in ('1', 'true', 'yes')
|
||||
webhook_url = os.getenv('WEBHOOK_URL')
|
||||
discord_webhook = os.getenv('DISCORD_WEBHOOK')
|
||||
data_dir = _resolve_dir('DATA_DIR', 'data')
|
||||
log_dir = _resolve_dir('LOG_DIR', 'logs')
|
||||
|
||||
return AppConfig(
|
||||
check_interval=check_interval,
|
||||
log_level=log_level,
|
||||
always_notify_on_startup=always_notify_on_startup,
|
||||
webhook_url=webhook_url,
|
||||
discord_webhook=discord_webhook,
|
||||
data_dir=data_dir,
|
||||
log_dir=log_dir,
|
||||
email=load_email_config(),
|
||||
)
|
||||
|
Reference in New Issue
Block a user