fix(openinsider_top): treat negative Value on sales as absolute for threshold parsing
Handle strings like '-,234,567' by normalizing and parsing magnitude; ensures sales rows are included when exceeding INSIDER_MIN_AMOUNT.
This commit is contained in:
@@ -64,26 +64,39 @@ class OpenInsiderTopCrawler(BaseCrawler):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _parse_money(val: str) -> Optional[int]:
|
def _parse_money(val: str) -> Optional[int]:
|
||||||
|
"""Parse money text into absolute integer dollars.
|
||||||
|
|
||||||
|
Handles formats like:
|
||||||
|
- "$1,234,567"
|
||||||
|
- "($1,234,567)" (treat as negative but return magnitude)
|
||||||
|
- "-$1,234,567" (treat as negative but return magnitude)
|
||||||
|
- "1,234,567"
|
||||||
|
Returns None if no digits found.
|
||||||
|
"""
|
||||||
if not val:
|
if not val:
|
||||||
return None
|
return None
|
||||||
s = val.strip()
|
s = val.strip()
|
||||||
# Remove $ and commas and any parentheses
|
# Detect negative indicators before stripping
|
||||||
for ch in ['$', ',', '(', ')', '+']:
|
is_negative = s.startswith('-') or '(' in s
|
||||||
|
# Normalize: remove currency symbols, commas, parentheses, plus/minus, spaces
|
||||||
|
for ch in ['$', ',', '(', ')', '+', '-', ' ']:
|
||||||
s = s.replace(ch, '')
|
s = s.replace(ch, '')
|
||||||
# Some cells may include text like "$1,234,567 (incl. options)"
|
# Keep only leading digits
|
||||||
# Keep only leading numeric part
|
|
||||||
num = ''
|
num = ''
|
||||||
for c in s:
|
for c in s:
|
||||||
if c.isdigit():
|
if c.isdigit():
|
||||||
num += c
|
num += c
|
||||||
elif c in ' .':
|
elif c == '.':
|
||||||
|
# ignore decimal points; values appear to be whole dollars
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
if not num:
|
if not num:
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
return int(num)
|
value = int(num)
|
||||||
|
# We return absolute magnitude; sign is not needed for threshold
|
||||||
|
return abs(value)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -227,4 +240,3 @@ class OpenInsiderTopCrawler(BaseCrawler):
|
|||||||
f"抓取時間:{datetime.now().isoformat()}\n來源:\n- " + "\n- ".join(self.urls)
|
f"抓取時間:{datetime.now().isoformat()}\n來源:\n- " + "\n- ".join(self.urls)
|
||||||
)
|
)
|
||||||
return subject, body
|
return subject, body
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user