feat: add OpenInsider Top-of-day crawler and multi-CRAWLER_TYPE support

New crawler: app/crawlers/openinsider_top.py\n- Scrapes three pages (sales/purchases/officer purchases)\n- Filters rows with Value/Amount >= ,000,000 (configurable via INSIDER_MIN_AMOUNT)\n- Builds concise notifications; saves to data/openinsider_top.json

Runner: support comma-separated CRAWLER_TYPE and new openinsider_top type\n- Accepts e.g., CRAWLER_TYPE=openinsider_top,openinsider,barrons\n- Preserves order, removes duplicates; warns on unknown types\n- Uses shared schedule: RUN_DAILY_AT or CHECK_INTERVAL; initial run per crawler

Entrypoint: rename enhanced_crawler.py -> main.py\n- Update Dockerfile CMD and README references

Config & docs:\n- Reorganize .env.template into clear sections with examples\n- Update .env with multi-crawler example and INSIDER_MIN_AMOUNT\n- README: document new crawler, usage, and multi-type CRAWLER_TYPE
This commit is contained in:
2025-09-09 21:17:50 +08:00
parent b2c58c0560
commit f708f3bf1d
6 changed files with 308 additions and 27 deletions

View File

@@ -8,6 +8,7 @@ import schedule
from app.config import load_config, setup_logging
from app.crawlers.barrons import BarronsCrawler
from app.crawlers.openinsider import OpenInsiderCrawler
from app.crawlers.openinsider_top import OpenInsiderTopCrawler
from app.api.server import create_app
@@ -16,17 +17,38 @@ def start():
config = load_config()
logger = setup_logging(config.log_level, config.log_dir)
# Select crawler via env var
crawler_type = (os.getenv('CRAWLER_TYPE') or 'barrons').lower()
# Select crawler(s) via env var (supports comma-separated types)
types_raw = os.getenv('CRAWLER_TYPE') or 'barrons'
type_list = [t.strip().lower() for t in types_raw.split(',') if t.strip()]
# Preserve order, remove duplicates
seen = set()
crawler_types = []
for t in type_list:
if t not in seen:
seen.add(t)
crawler_types.append(t)
logger.info(f"選擇爬蟲類型: {crawler_types}")
crawlers = []
if crawler_type in ('openinsider', 'open_insider'):
symbols_raw = os.getenv('SYMBOLS') or os.getenv('SYMBOL', 'PLTR')
symbols = [s.strip().upper() for s in symbols_raw.split(',') if s.strip()]
logger.info(f"使用 OpenInsider 內部人交易爬蟲symbols={symbols}")
for sym in symbols:
crawlers.append(OpenInsiderCrawler(config, logger, symbol=sym))
else:
logger.info("使用 Barron's 股票推薦爬蟲")
for ctype in crawler_types:
if ctype in ('openinsider', 'open_insider'):
symbols_raw = os.getenv('SYMBOLS') or os.getenv('SYMBOL', 'PLTR')
symbols = [s.strip().upper() for s in symbols_raw.split(',') if s.strip()]
logger.info(f"使用 OpenInsider 內部人交易爬蟲symbols={symbols}")
for sym in symbols:
crawlers.append(OpenInsiderCrawler(config, logger, symbol=sym))
elif ctype in ('openinsider_top', 'open_insider_top', 'openinsider_topday'):
logger.info("使用 OpenInsider 當日大額內部人交易爬蟲 (三頁合併,金額>=1,000,000)")
crawlers.append(OpenInsiderTopCrawler(config, logger))
elif ctype in ('barrons', "barron's", 'barrons_stock_picks'):
logger.info("使用 Barron's 股票推薦爬蟲")
crawlers.append(BarronsCrawler(config, logger))
else:
logger.warning(f"未知的 CRAWLER_TYPE: {ctype},忽略此項")
# Fallback when none recognized
if not crawlers:
logger.info("未選到任何爬蟲,預設使用 Barron's")
crawlers.append(BarronsCrawler(config, logger))
# Create and start API in background