Files
stock-info-crawler/app/crawlers/barrons.py

78 lines
3.1 KiB
Python

from __future__ import annotations
import hashlib
from datetime import datetime
from typing import List, Dict, Optional
import requests
from bs4 import BeautifulSoup
from app.crawlers.base import BaseCrawler
from app.services import notifications as notif
class BarronsCrawler(BaseCrawler):
def __init__(self, config, logger):
# Name used in generic notifications; include emoji to match previous subject
super().__init__(name="📈 Barron's 新股票推薦", config=config, logger=logger, data_filename='barrons_data.json')
self.url = "https://www.barrons.com/market-data/stocks/stock-picks?mod=BOL_TOPNAV"
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
def fetch_page(self) -> Optional[str]:
try:
resp = requests.get(self.url, headers=self.headers, timeout=30)
resp.raise_for_status()
return resp.text
except requests.RequestException as e:
self.logger.error(f"獲取網頁失敗: {e}")
self.stats['errors'] += 1
return None
def parse_items(self, html_content: str) -> List[Dict]:
soup = BeautifulSoup(html_content, 'html.parser')
stock_picks: List[Dict] = []
try:
selectors = [
'article[data-module="ArticleItem"]',
'.WSJTheme--headline',
'.MarketDataModule-headline',
'h3 a, h4 a',
'[data-module] a[href*="articles"]',
]
elements = []
for selector in selectors:
elements = soup.select(selector)
if elements:
self.logger.info(f"使用選擇器找到內容: {selector}")
break
for element in elements[:10]:
title = element.get_text(strip=True) if element.name != 'a' else element.get_text(strip=True)
link = element.get('href') if element.name == 'a' else element.find('a', href=True)
if isinstance(link, dict):
link = link.get('href')
elif hasattr(link, 'get'):
link = link.get('href')
if link and isinstance(link, str) and link.startswith('/'):
link = "https://www.barrons.com" + link
if title and len(title) > 10:
stock_picks.append({
'title': title,
'link': link,
'scraped_at': datetime.now().isoformat(),
'hash': hashlib.md5(title.encode()).hexdigest()[:8],
})
return stock_picks
except Exception as e:
self.logger.error(f"解析網頁內容失敗: {e}")
self.stats['errors'] += 1
return []
# Keep Barron's specific email formatting (subject + body)
def _build_email(self, items: List[Dict]):
subject = f"📈 Barron's 新股票推薦 ({len(items)}條)"
body = notif.format_email_body(items)
return subject, body