fix(openinsider_top): treat negative Value on sales as absolute for threshold parsing

Handle strings like '-,234,567' by normalizing and parsing magnitude; ensures sales rows are included when exceeding INSIDER_MIN_AMOUNT.
This commit is contained in:
2025-09-09 21:26:52 +08:00
parent f708f3bf1d
commit e015eef61e

View File

@@ -64,26 +64,39 @@ class OpenInsiderTopCrawler(BaseCrawler):
@staticmethod @staticmethod
def _parse_money(val: str) -> Optional[int]: def _parse_money(val: str) -> Optional[int]:
"""Parse money text into absolute integer dollars.
Handles formats like:
- "$1,234,567"
- "($1,234,567)" (treat as negative but return magnitude)
- "-$1,234,567" (treat as negative but return magnitude)
- "1,234,567"
Returns None if no digits found.
"""
if not val: if not val:
return None return None
s = val.strip() s = val.strip()
# Remove $ and commas and any parentheses # Detect negative indicators before stripping
for ch in ['$', ',', '(', ')', '+']: is_negative = s.startswith('-') or '(' in s
# Normalize: remove currency symbols, commas, parentheses, plus/minus, spaces
for ch in ['$', ',', '(', ')', '+', '-', ' ']:
s = s.replace(ch, '') s = s.replace(ch, '')
# Some cells may include text like "$1,234,567 (incl. options)" # Keep only leading digits
# Keep only leading numeric part
num = '' num = ''
for c in s: for c in s:
if c.isdigit(): if c.isdigit():
num += c num += c
elif c in ' .': elif c == '.':
# ignore decimal points; values appear to be whole dollars
continue continue
else: else:
break break
if not num: if not num:
return None return None
try: try:
return int(num) value = int(num)
# We return absolute magnitude; sign is not needed for threshold
return abs(value)
except ValueError: except ValueError:
return None return None
@@ -227,4 +240,3 @@ class OpenInsiderTopCrawler(BaseCrawler):
f"抓取時間:{datetime.now().isoformat()}\n來源:\n- " + "\n- ".join(self.urls) f"抓取時間:{datetime.now().isoformat()}\n來源:\n- " + "\n- ".join(self.urls)
) )
return subject, body return subject, body