Files
stock-info-crawler/app/crawlers/base.py
MH Hung e89567643b feat(openinsider): 新增 OpenInsider 內部人交易爬蟲,支援多標的與每日排程
- 新增 app/crawlers/openinsider.py,來源 http://openinsider.com/search?q={symbol}

- 支援多標的:以 SYMBOLS=PLTR,NVDA,... 同時追多檔(或使用 SYMBOL 單一)

- runner: 多實例排程與啟動;/check 會依序觸發全部爬蟲

- API: /info、/stats、/check、/notify_test 支援多爬蟲回應

- config/base: 新增 RUN_DAILY_AT 每日固定時間;未設定則用 CHECK_INTERVAL

- notifications: 新增 send_custom_email、send_text_webhook、send_text_discord

- README 與 .env.template 更新;.env 改為 CRAWLER_TYPE=openinsider

- 移除 quiver_insiders 爬蟲與相關設定

BREAKING CHANGE: 不再支援 CRAWLER_TYPE=quiver_insiders;請改用 openinsider。
2025-09-04 22:32:29 +08:00

140 lines
5.0 KiB
Python

from __future__ import annotations
import hashlib
import time
import signal
from abc import ABC, abstractmethod
from datetime import datetime
from typing import List, Dict, Optional
import schedule
from app.config import AppConfig
from app.services import storage
from app.services import notifications as notif
class BaseCrawler(ABC):
def __init__(self, name: str, config: AppConfig, logger, data_filename: str):
self.name = name
self.config = config
self.logger = logger
self.data_path = storage.data_file_path(config.data_dir, data_filename)
self.running = True
self._first_check_done = False
self.stats = {
'start_time': datetime.now().isoformat(),
'total_checks': 0,
'new_picks_found': 0,
'last_check': None,
'last_notification': None,
'errors': 0,
}
# --- Abstract site-specific hooks ---
@abstractmethod
def fetch_page(self) -> Optional[str]:
...
@abstractmethod
def parse_items(self, html_content: str) -> List[Dict]:
...
# --- Generic helpers ---
def find_new(self, current: List[Dict], previous: List[Dict]) -> List[Dict]:
prev_hashes = {p.get('hash') for p in previous if 'hash' in p}
return [p for p in current if p.get('hash') not in prev_hashes]
# --- Main check ---
def run_check(self) -> Optional[List[Dict]]:
self.logger.info(f"開始檢查 {self.name}...")
self.stats['total_checks'] += 1
self.stats['last_check'] = datetime.now().isoformat()
try:
html = self.fetch_page()
if not html:
return []
current = self.parse_items(html)
if not current:
self.logger.warning("未找到內容")
return []
prev = storage.load_json(self.data_path).get('stock_picks', [])
new_items = self.find_new(current, prev)
if new_items:
self.logger.info(f"🚨 發現 {len(new_items)} 條新內容")
self.stats['new_picks_found'] += len(new_items)
self._send_notifications(new_items)
storage.save_json(self.data_path, {
'last_update': datetime.now().isoformat(),
'stock_picks': current,
'stats': self.stats,
})
return new_items
# Optionally notify on first run
if (not self._first_check_done) and self.config.always_notify_on_startup and current:
self.logger.info("🟢 啟動首次檢查:無新內容,但依設定寄出目前清單")
self._send_notifications(current)
storage.save_json(self.data_path, {
'last_update': datetime.now().isoformat(),
'stock_picks': current,
'stats': self.stats,
})
return current
self.logger.info("✅ 沒有發現新內容")
return []
except Exception as e:
self.logger.error(f"檢查過程錯誤: {e}")
self.stats['errors'] += 1
return None
def _send_notifications(self, items: List[Dict]) -> None:
sent = False
if self.config.email:
try:
notif.send_email(items, self.config.email)
sent = True
except Exception as e:
self.logger.error(f"電子郵件通知失敗: {e}")
if self.config.webhook_url:
try:
notif.send_webhook(items, self.config.webhook_url)
sent = True
except Exception as e:
self.logger.error(f"Webhook 通知失敗: {e}")
if self.config.discord_webhook:
try:
notif.send_discord(items, self.config.discord_webhook)
sent = True
except Exception as e:
self.logger.error(f"Discord 通知失敗: {e}")
if sent:
self.stats['last_notification'] = datetime.now().isoformat()
# --- Run loop ---
def _signal_handler(self, signum, frame):
self.logger.info("收到停止信號,正在關閉...")
self.running = False
def run(self):
signal.signal(signal.SIGINT, self._signal_handler)
signal.signal(signal.SIGTERM, self._signal_handler)
if getattr(self.config, 'run_daily_at', None):
schedule.every().day.at(self.config.run_daily_at).do(self.run_check)
self.logger.info(f"🚀 爬蟲已啟動,每天 {self.config.run_daily_at} 檢查一次")
else:
schedule.every(self.config.check_interval).seconds.do(self.run_check)
self.logger.info(f"🚀 爬蟲已啟動,每 {self.config.check_interval} 秒檢查一次")
self.run_check()
self._first_check_done = True
while self.running:
schedule.run_pending()
time.sleep(1)
self.logger.info("爬蟲已停止")