diff --git a/app/crawlers/openinsider_top.py b/app/crawlers/openinsider_top.py index 9a1d1ee..51a5608 100644 --- a/app/crawlers/openinsider_top.py +++ b/app/crawlers/openinsider_top.py @@ -64,26 +64,39 @@ class OpenInsiderTopCrawler(BaseCrawler): @staticmethod def _parse_money(val: str) -> Optional[int]: + """Parse money text into absolute integer dollars. + + Handles formats like: + - "$1,234,567" + - "($1,234,567)" (treat as negative but return magnitude) + - "-$1,234,567" (treat as negative but return magnitude) + - "1,234,567" + Returns None if no digits found. + """ if not val: return None s = val.strip() - # Remove $ and commas and any parentheses - for ch in ['$', ',', '(', ')', '+']: + # Detect negative indicators before stripping + is_negative = s.startswith('-') or '(' in s + # Normalize: remove currency symbols, commas, parentheses, plus/minus, spaces + for ch in ['$', ',', '(', ')', '+', '-', ' ']: s = s.replace(ch, '') - # Some cells may include text like "$1,234,567 (incl. options)" - # Keep only leading numeric part + # Keep only leading digits num = '' for c in s: if c.isdigit(): num += c - elif c in ' .': + elif c == '.': + # ignore decimal points; values appear to be whole dollars continue else: break if not num: return None try: - return int(num) + value = int(num) + # We return absolute magnitude; sign is not needed for threshold + return abs(value) except ValueError: return None @@ -227,4 +240,3 @@ class OpenInsiderTopCrawler(BaseCrawler): f"抓取時間:{datetime.now().isoformat()}\n來源:\n- " + "\n- ".join(self.urls) ) return subject, body -