from __future__ import annotations import hashlib from datetime import datetime from typing import List, Dict, Optional import requests from bs4 import BeautifulSoup from app.crawlers.base import BaseCrawler from app.services import notifications as notif class BarronsCrawler(BaseCrawler): def __init__(self, config, logger): # Name used in generic notifications; include emoji to match previous subject super().__init__(name="📈 Barron's 新股票推薦", config=config, logger=logger, data_filename='barrons_data.json') self.url = "https://www.barrons.com/market-data/stocks/stock-picks?mod=BOL_TOPNAV" self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } def fetch_page(self) -> Optional[str]: try: resp = requests.get(self.url, headers=self.headers, timeout=30) resp.raise_for_status() return resp.text except requests.RequestException as e: self.logger.error(f"獲取網頁失敗: {e}") self.stats['errors'] += 1 return None def parse_items(self, html_content: str) -> List[Dict]: soup = BeautifulSoup(html_content, 'html.parser') stock_picks: List[Dict] = [] try: selectors = [ 'article[data-module="ArticleItem"]', '.WSJTheme--headline', '.MarketDataModule-headline', 'h3 a, h4 a', '[data-module] a[href*="articles"]', ] elements = [] for selector in selectors: elements = soup.select(selector) if elements: self.logger.info(f"使用選擇器找到內容: {selector}") break for element in elements[:10]: title = element.get_text(strip=True) if element.name != 'a' else element.get_text(strip=True) link = element.get('href') if element.name == 'a' else element.find('a', href=True) if isinstance(link, dict): link = link.get('href') elif hasattr(link, 'get'): link = link.get('href') if link and isinstance(link, str) and link.startswith('/'): link = "https://www.barrons.com" + link if title and len(title) > 10: stock_picks.append({ 'title': title, 'link': link, 'scraped_at': datetime.now().isoformat(), 'hash': hashlib.md5(title.encode()).hexdigest()[:8], }) return stock_picks except Exception as e: self.logger.error(f"解析網頁內容失敗: {e}") self.stats['errors'] += 1 return [] # Keep Barron's specific email formatting (subject + body) def _build_email(self, items: List[Dict]): subject = f"📈 Barron's 新股票推薦 ({len(items)}條)" body = notif.format_email_body(items) return subject, body