Files
stock-info-crawler/app/runner.py
MH Hung f708f3bf1d feat: add OpenInsider Top-of-day crawler and multi-CRAWLER_TYPE support
New crawler: app/crawlers/openinsider_top.py\n- Scrapes three pages (sales/purchases/officer purchases)\n- Filters rows with Value/Amount >= ,000,000 (configurable via INSIDER_MIN_AMOUNT)\n- Builds concise notifications; saves to data/openinsider_top.json

Runner: support comma-separated CRAWLER_TYPE and new openinsider_top type\n- Accepts e.g., CRAWLER_TYPE=openinsider_top,openinsider,barrons\n- Preserves order, removes duplicates; warns on unknown types\n- Uses shared schedule: RUN_DAILY_AT or CHECK_INTERVAL; initial run per crawler

Entrypoint: rename enhanced_crawler.py -> main.py\n- Update Dockerfile CMD and README references

Config & docs:\n- Reorganize .env.template into clear sections with examples\n- Update .env with multi-crawler example and INSIDER_MIN_AMOUNT\n- README: document new crawler, usage, and multi-type CRAWLER_TYPE
2025-09-09 21:17:50 +08:00

89 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import os
import threading
import time
import schedule
from app.config import load_config, setup_logging
from app.crawlers.barrons import BarronsCrawler
from app.crawlers.openinsider import OpenInsiderCrawler
from app.crawlers.openinsider_top import OpenInsiderTopCrawler
from app.api.server import create_app
def start():
# Load configuration and setup logging
config = load_config()
logger = setup_logging(config.log_level, config.log_dir)
# Select crawler(s) via env var (supports comma-separated types)
types_raw = os.getenv('CRAWLER_TYPE') or 'barrons'
type_list = [t.strip().lower() for t in types_raw.split(',') if t.strip()]
# Preserve order, remove duplicates
seen = set()
crawler_types = []
for t in type_list:
if t not in seen:
seen.add(t)
crawler_types.append(t)
logger.info(f"選擇爬蟲類型: {crawler_types}")
crawlers = []
for ctype in crawler_types:
if ctype in ('openinsider', 'open_insider'):
symbols_raw = os.getenv('SYMBOLS') or os.getenv('SYMBOL', 'PLTR')
symbols = [s.strip().upper() for s in symbols_raw.split(',') if s.strip()]
logger.info(f"使用 OpenInsider 內部人交易爬蟲symbols={symbols}")
for sym in symbols:
crawlers.append(OpenInsiderCrawler(config, logger, symbol=sym))
elif ctype in ('openinsider_top', 'open_insider_top', 'openinsider_topday'):
logger.info("使用 OpenInsider 當日大額內部人交易爬蟲 (三頁合併,金額>=1,000,000)")
crawlers.append(OpenInsiderTopCrawler(config, logger))
elif ctype in ('barrons', "barron's", 'barrons_stock_picks'):
logger.info("使用 Barron's 股票推薦爬蟲")
crawlers.append(BarronsCrawler(config, logger))
else:
logger.warning(f"未知的 CRAWLER_TYPE: {ctype},忽略此項")
# Fallback when none recognized
if not crawlers:
logger.info("未選到任何爬蟲,預設使用 Barron's")
crawlers.append(BarronsCrawler(config, logger))
# Create and start API in background
app = create_app(crawlers if len(crawlers) > 1 else crawlers[0])
def run_api():
app.run(host='0.0.0.0', port=8080, debug=False)
flask_thread = threading.Thread(target=run_api, daemon=True)
flask_thread.start()
# Schedule checks for each crawler and run loop (blocking)
if getattr(config, 'run_daily_at', None):
for c in crawlers:
schedule.every().day.at(config.run_daily_at).do(c.run_check)
logger.info(f"🚀 多爬蟲已啟動,每天 {config.run_daily_at} 檢查一次:{[getattr(c, 'symbol', c.name) for c in crawlers]}")
else:
for c in crawlers:
schedule.every(config.check_interval).seconds.do(c.run_check)
logger.info(f"🚀 多爬蟲已啟動,每 {config.check_interval} 秒檢查一次:{[getattr(c, 'symbol', c.name) for c in crawlers]}")
# Initial run for each
for c in crawlers:
c.run_check()
# Mark first check done to respect ALWAYS_NOTIFY_ON_STARTUP logic afterwards
try:
c._first_check_done = True
except Exception:
pass
# Main loop
try:
while True:
schedule.run_pending()
time.sleep(1)
except KeyboardInterrupt:
logger.info("收到停止信號,正在關閉…")