New crawler: app/crawlers/openinsider_top.py\n- Scrapes three pages (sales/purchases/officer purchases)\n- Filters rows with Value/Amount >= ,000,000 (configurable via INSIDER_MIN_AMOUNT)\n- Builds concise notifications; saves to data/openinsider_top.json Runner: support comma-separated CRAWLER_TYPE and new openinsider_top type\n- Accepts e.g., CRAWLER_TYPE=openinsider_top,openinsider,barrons\n- Preserves order, removes duplicates; warns on unknown types\n- Uses shared schedule: RUN_DAILY_AT or CHECK_INTERVAL; initial run per crawler Entrypoint: rename enhanced_crawler.py -> main.py\n- Update Dockerfile CMD and README references Config & docs:\n- Reorganize .env.template into clear sections with examples\n- Update .env with multi-crawler example and INSIDER_MIN_AMOUNT\n- README: document new crawler, usage, and multi-type CRAWLER_TYPE
89 lines
3.4 KiB
Python
89 lines
3.4 KiB
Python
from __future__ import annotations
|
||
|
||
import os
|
||
import threading
|
||
import time
|
||
import schedule
|
||
|
||
from app.config import load_config, setup_logging
|
||
from app.crawlers.barrons import BarronsCrawler
|
||
from app.crawlers.openinsider import OpenInsiderCrawler
|
||
from app.crawlers.openinsider_top import OpenInsiderTopCrawler
|
||
from app.api.server import create_app
|
||
|
||
|
||
def start():
|
||
# Load configuration and setup logging
|
||
config = load_config()
|
||
logger = setup_logging(config.log_level, config.log_dir)
|
||
|
||
# Select crawler(s) via env var (supports comma-separated types)
|
||
types_raw = os.getenv('CRAWLER_TYPE') or 'barrons'
|
||
type_list = [t.strip().lower() for t in types_raw.split(',') if t.strip()]
|
||
# Preserve order, remove duplicates
|
||
seen = set()
|
||
crawler_types = []
|
||
for t in type_list:
|
||
if t not in seen:
|
||
seen.add(t)
|
||
crawler_types.append(t)
|
||
|
||
logger.info(f"選擇爬蟲類型: {crawler_types}")
|
||
crawlers = []
|
||
for ctype in crawler_types:
|
||
if ctype in ('openinsider', 'open_insider'):
|
||
symbols_raw = os.getenv('SYMBOLS') or os.getenv('SYMBOL', 'PLTR')
|
||
symbols = [s.strip().upper() for s in symbols_raw.split(',') if s.strip()]
|
||
logger.info(f"使用 OpenInsider 內部人交易爬蟲,symbols={symbols}")
|
||
for sym in symbols:
|
||
crawlers.append(OpenInsiderCrawler(config, logger, symbol=sym))
|
||
elif ctype in ('openinsider_top', 'open_insider_top', 'openinsider_topday'):
|
||
logger.info("使用 OpenInsider 當日大額內部人交易爬蟲 (三頁合併,金額>=1,000,000)")
|
||
crawlers.append(OpenInsiderTopCrawler(config, logger))
|
||
elif ctype in ('barrons', "barron's", 'barrons_stock_picks'):
|
||
logger.info("使用 Barron's 股票推薦爬蟲")
|
||
crawlers.append(BarronsCrawler(config, logger))
|
||
else:
|
||
logger.warning(f"未知的 CRAWLER_TYPE: {ctype},忽略此項")
|
||
|
||
# Fallback when none recognized
|
||
if not crawlers:
|
||
logger.info("未選到任何爬蟲,預設使用 Barron's")
|
||
crawlers.append(BarronsCrawler(config, logger))
|
||
|
||
# Create and start API in background
|
||
app = create_app(crawlers if len(crawlers) > 1 else crawlers[0])
|
||
|
||
def run_api():
|
||
app.run(host='0.0.0.0', port=8080, debug=False)
|
||
|
||
flask_thread = threading.Thread(target=run_api, daemon=True)
|
||
flask_thread.start()
|
||
|
||
# Schedule checks for each crawler and run loop (blocking)
|
||
if getattr(config, 'run_daily_at', None):
|
||
for c in crawlers:
|
||
schedule.every().day.at(config.run_daily_at).do(c.run_check)
|
||
logger.info(f"🚀 多爬蟲已啟動,每天 {config.run_daily_at} 檢查一次:{[getattr(c, 'symbol', c.name) for c in crawlers]}")
|
||
else:
|
||
for c in crawlers:
|
||
schedule.every(config.check_interval).seconds.do(c.run_check)
|
||
logger.info(f"🚀 多爬蟲已啟動,每 {config.check_interval} 秒檢查一次:{[getattr(c, 'symbol', c.name) for c in crawlers]}")
|
||
|
||
# Initial run for each
|
||
for c in crawlers:
|
||
c.run_check()
|
||
# Mark first check done to respect ALWAYS_NOTIFY_ON_STARTUP logic afterwards
|
||
try:
|
||
c._first_check_done = True
|
||
except Exception:
|
||
pass
|
||
|
||
# Main loop
|
||
try:
|
||
while True:
|
||
schedule.run_pending()
|
||
time.sleep(1)
|
||
except KeyboardInterrupt:
|
||
logger.info("收到停止信號,正在關閉…")
|