- 新增 app/crawlers/openinsider.py,來源 http://openinsider.com/search?q={symbol} - 支援多標的:以 SYMBOLS=PLTR,NVDA,... 同時追多檔(或使用 SYMBOL 單一) - runner: 多實例排程與啟動;/check 會依序觸發全部爬蟲 - API: /info、/stats、/check、/notify_test 支援多爬蟲回應 - config/base: 新增 RUN_DAILY_AT 每日固定時間;未設定則用 CHECK_INTERVAL - notifications: 新增 send_custom_email、send_text_webhook、send_text_discord - README 與 .env.template 更新;.env 改為 CRAWLER_TYPE=openinsider - 移除 quiver_insiders 爬蟲與相關設定 BREAKING CHANGE: 不再支援 CRAWLER_TYPE=quiver_insiders;請改用 openinsider。
140 lines
5.0 KiB
Python
140 lines
5.0 KiB
Python
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import time
|
|
import signal
|
|
from abc import ABC, abstractmethod
|
|
from datetime import datetime
|
|
from typing import List, Dict, Optional
|
|
|
|
import schedule
|
|
|
|
from app.config import AppConfig
|
|
from app.services import storage
|
|
from app.services import notifications as notif
|
|
|
|
|
|
class BaseCrawler(ABC):
|
|
def __init__(self, name: str, config: AppConfig, logger, data_filename: str):
|
|
self.name = name
|
|
self.config = config
|
|
self.logger = logger
|
|
self.data_path = storage.data_file_path(config.data_dir, data_filename)
|
|
|
|
self.running = True
|
|
self._first_check_done = False
|
|
|
|
self.stats = {
|
|
'start_time': datetime.now().isoformat(),
|
|
'total_checks': 0,
|
|
'new_picks_found': 0,
|
|
'last_check': None,
|
|
'last_notification': None,
|
|
'errors': 0,
|
|
}
|
|
|
|
# --- Abstract site-specific hooks ---
|
|
@abstractmethod
|
|
def fetch_page(self) -> Optional[str]:
|
|
...
|
|
|
|
@abstractmethod
|
|
def parse_items(self, html_content: str) -> List[Dict]:
|
|
...
|
|
|
|
# --- Generic helpers ---
|
|
def find_new(self, current: List[Dict], previous: List[Dict]) -> List[Dict]:
|
|
prev_hashes = {p.get('hash') for p in previous if 'hash' in p}
|
|
return [p for p in current if p.get('hash') not in prev_hashes]
|
|
|
|
# --- Main check ---
|
|
def run_check(self) -> Optional[List[Dict]]:
|
|
self.logger.info(f"開始檢查 {self.name}...")
|
|
self.stats['total_checks'] += 1
|
|
self.stats['last_check'] = datetime.now().isoformat()
|
|
try:
|
|
html = self.fetch_page()
|
|
if not html:
|
|
return []
|
|
current = self.parse_items(html)
|
|
if not current:
|
|
self.logger.warning("未找到內容")
|
|
return []
|
|
|
|
prev = storage.load_json(self.data_path).get('stock_picks', [])
|
|
new_items = self.find_new(current, prev)
|
|
|
|
if new_items:
|
|
self.logger.info(f"🚨 發現 {len(new_items)} 條新內容")
|
|
self.stats['new_picks_found'] += len(new_items)
|
|
self._send_notifications(new_items)
|
|
storage.save_json(self.data_path, {
|
|
'last_update': datetime.now().isoformat(),
|
|
'stock_picks': current,
|
|
'stats': self.stats,
|
|
})
|
|
return new_items
|
|
|
|
# Optionally notify on first run
|
|
if (not self._first_check_done) and self.config.always_notify_on_startup and current:
|
|
self.logger.info("🟢 啟動首次檢查:無新內容,但依設定寄出目前清單")
|
|
self._send_notifications(current)
|
|
storage.save_json(self.data_path, {
|
|
'last_update': datetime.now().isoformat(),
|
|
'stock_picks': current,
|
|
'stats': self.stats,
|
|
})
|
|
return current
|
|
|
|
self.logger.info("✅ 沒有發現新內容")
|
|
return []
|
|
except Exception as e:
|
|
self.logger.error(f"檢查過程錯誤: {e}")
|
|
self.stats['errors'] += 1
|
|
return None
|
|
|
|
def _send_notifications(self, items: List[Dict]) -> None:
|
|
sent = False
|
|
if self.config.email:
|
|
try:
|
|
notif.send_email(items, self.config.email)
|
|
sent = True
|
|
except Exception as e:
|
|
self.logger.error(f"電子郵件通知失敗: {e}")
|
|
if self.config.webhook_url:
|
|
try:
|
|
notif.send_webhook(items, self.config.webhook_url)
|
|
sent = True
|
|
except Exception as e:
|
|
self.logger.error(f"Webhook 通知失敗: {e}")
|
|
if self.config.discord_webhook:
|
|
try:
|
|
notif.send_discord(items, self.config.discord_webhook)
|
|
sent = True
|
|
except Exception as e:
|
|
self.logger.error(f"Discord 通知失敗: {e}")
|
|
if sent:
|
|
self.stats['last_notification'] = datetime.now().isoformat()
|
|
|
|
# --- Run loop ---
|
|
def _signal_handler(self, signum, frame):
|
|
self.logger.info("收到停止信號,正在關閉...")
|
|
self.running = False
|
|
|
|
def run(self):
|
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
|
|
if getattr(self.config, 'run_daily_at', None):
|
|
schedule.every().day.at(self.config.run_daily_at).do(self.run_check)
|
|
self.logger.info(f"🚀 爬蟲已啟動,每天 {self.config.run_daily_at} 檢查一次")
|
|
else:
|
|
schedule.every(self.config.check_interval).seconds.do(self.run_check)
|
|
self.logger.info(f"🚀 爬蟲已啟動,每 {self.config.check_interval} 秒檢查一次")
|
|
self.run_check()
|
|
self._first_check_done = True
|
|
while self.running:
|
|
schedule.run_pending()
|
|
time.sleep(1)
|
|
self.logger.info("爬蟲已停止")
|