feat(openinsider): 新增 OpenInsider 內部人交易爬蟲,支援多標的與每日排程

- 新增 app/crawlers/openinsider.py,來源 http://openinsider.com/search?q={symbol}

- 支援多標的:以 SYMBOLS=PLTR,NVDA,... 同時追多檔(或使用 SYMBOL 單一)

- runner: 多實例排程與啟動;/check 會依序觸發全部爬蟲

- API: /info、/stats、/check、/notify_test 支援多爬蟲回應

- config/base: 新增 RUN_DAILY_AT 每日固定時間;未設定則用 CHECK_INTERVAL

- notifications: 新增 send_custom_email、send_text_webhook、send_text_discord

- README 與 .env.template 更新;.env 改為 CRAWLER_TYPE=openinsider

- 移除 quiver_insiders 爬蟲與相關設定

BREAKING CHANGE: 不再支援 CRAWLER_TYPE=quiver_insiders;請改用 openinsider。
This commit is contained in:
2025-09-04 22:32:29 +08:00
parent 58cc979b5b
commit e89567643b
8 changed files with 368 additions and 40 deletions

View File

@@ -125,12 +125,15 @@ class BaseCrawler(ABC):
signal.signal(signal.SIGINT, self._signal_handler)
signal.signal(signal.SIGTERM, self._signal_handler)
schedule.every(self.config.check_interval).seconds.do(self.run_check)
self.logger.info(f"🚀 爬蟲已啟動,每 {self.config.check_interval} 秒檢查一次")
if getattr(self.config, 'run_daily_at', None):
schedule.every().day.at(self.config.run_daily_at).do(self.run_check)
self.logger.info(f"🚀 爬蟲已啟動,每天 {self.config.run_daily_at} 檢查一次")
else:
schedule.every(self.config.check_interval).seconds.do(self.run_check)
self.logger.info(f"🚀 爬蟲已啟動,每 {self.config.check_interval} 秒檢查一次")
self.run_check()
self._first_check_done = True
while self.running:
schedule.run_pending()
time.sleep(1)
self.logger.info("爬蟲已停止")

162
app/crawlers/openinsider.py Normal file
View File

@@ -0,0 +1,162 @@
from __future__ import annotations
import hashlib
from datetime import datetime
from typing import List, Dict, Optional
import requests
from bs4 import BeautifulSoup
from app.crawlers.base import BaseCrawler
from app.services import notifications as notif
class OpenInsiderCrawler(BaseCrawler):
"""Crawler for OpenInsider search results.
Source: http://openinsider.com/search?q={symbol}
Parses the HTML table and emits insider transactions.
"""
def __init__(self, config, logger, symbol: str = "PLTR"):
super().__init__(
name=f"OpenInsider 內部人交易:{symbol}",
config=config,
logger=logger,
data_filename=f"openinsider_{symbol}.json",
)
self.symbol = symbol.upper()
self.url = f"http://openinsider.com/search?q={self.symbol}"
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/114.0 Safari/537.36'
}
def fetch_page(self) -> Optional[str]:
try:
resp = requests.get(self.url, headers=self.headers, timeout=30)
resp.raise_for_status()
return resp.text
except requests.RequestException as e:
self.logger.error(f"獲取 OpenInsider 頁面失敗: {e}")
self.stats['errors'] += 1
return None
def parse_items(self, html_content: str) -> List[Dict]:
soup = BeautifulSoup(html_content, 'html.parser')
# Find the main results table by looking for expected headers
best_table = None
candidate_tables = soup.find_all('table')
self.logger.info(f"OpenInsider發現 {len(candidate_tables)} 個 <table>")
expected_headers = {'insider', 'insider name', 'ticker', 'trans type', 'transaction', 'trade date', 'filing date'}
for tbl in candidate_tables:
headers = [th.get_text(strip=True).lower() for th in tbl.find_all('th')]
if not headers:
continue
hset = set(headers)
if any(h in hset for h in expected_headers):
best_table = tbl
break
if not best_table and candidate_tables:
best_table = candidate_tables[0]
if not best_table:
self.logger.warning("OpenInsider找不到結果表格")
return []
# Build header index map (robust match)
header_map: Dict[str, int] = {}
header_texts = [th.get_text(strip=True).lower() for th in best_table.find_all('th')]
for idx, text in enumerate(header_texts):
header_map[text] = idx
def find_idx(possible: List[str]) -> Optional[int]:
for key in possible:
if key in header_map:
return header_map[key]
# fuzzy contains
for k, v in header_map.items():
if any(p in k for p in possible):
return v
return None
idx_insider = find_idx(['insider name', 'insider', 'name'])
idx_type = find_idx(['trans type', 'transaction', 'type'])
idx_qty = find_idx(['qty', 'quantity', 'shares'])
idx_price = find_idx(['price'])
idx_ticker = find_idx(['ticker'])
idx_trade_date = find_idx(['trade date', 'date'])
idx_filing_date = find_idx(['filing date', 'filed'])
rows = best_table.find_all('tr')
# Skip header rows (those that contain th)
data_rows = [r for r in rows if r.find('td')]
items: List[Dict] = []
for row in data_rows[:100]:
cols = row.find_all('td')
def col_text(i: Optional[int]) -> str:
if i is None or i >= len(cols):
return ''
return cols[i].get_text(strip=True)
insider = col_text(idx_insider) or 'Unknown Insider'
trans_type = col_text(idx_type) or 'N/A'
qty = col_text(idx_qty) or 'N/A'
price = col_text(idx_price) or 'N/A'
ticker = (col_text(idx_ticker) or '').upper()
trade_date = col_text(idx_trade_date)
filing_date = col_text(idx_filing_date)
if ticker and self.symbol not in ticker:
# Keep results aligned to symbol query
continue
title = f"{self.symbol} {trans_type} - {insider} qty {qty} @ {price} on {trade_date}"
if filing_date:
title += f" (filed {filing_date})"
hash_src = f"{self.symbol}|{insider}|{trans_type}|{qty}|{price}|{trade_date}|{filing_date}"
items.append({
'title': title,
'link': self.url,
'scraped_at': datetime.now().isoformat(),
'hash': hashlib.md5(hash_src.encode('utf-8')).hexdigest()[:12],
})
self.logger.info(f"OpenInsider解析完成擷取 {len(items)} 筆交易")
return items
def _send_notifications(self, items: List[Dict]) -> None:
subject = f"OpenInsider 內部人交易異動 - {self.symbol} ({len(items)}筆)"
lines = []
for it in items[:10]:
lines.append(f"{it['title']}")
body = (
f"發現 {len(items)} 筆新的內部人交易異動OpenInsider\n\n" + "\n".join(lines) + "\n\n"
f"抓取時間:{datetime.now().isoformat()}\n來源:{self.url}"
)
sent = False
if self.config.email:
try:
notif.send_custom_email(subject, body, self.config.email)
sent = True
except Exception as e:
self.logger.error(f"電子郵件通知失敗: {e}")
if self.config.webhook_url:
try:
notif.send_text_webhook(subject + "\n\n" + body, self.config.webhook_url)
sent = True
except Exception as e:
self.logger.error(f"Webhook 通知失敗: {e}")
if self.config.discord_webhook:
try:
notif.send_text_discord(title=subject, description=f"{self.symbol} 內部人交易更新OpenInsider", lines=lines[:10], webhook=self.config.discord_webhook)
sent = True
except Exception as e:
self.logger.error(f"Discord 通知失敗: {e}")
if sent:
self.stats['last_notification'] = datetime.now().isoformat()