feat(email): add SMTP security modes (starttls/ssl/none) with sensible default ports; add /notify_test endpoint; support ALWAYS_NOTIFY_ON_STARTUP to force first-run notification

chore(docker): run enhanced_crawler.py as entrypoint

ops(compose): load env via env_file and remove hardcoded secrets

docs: update README and .env.template for SMTP and startup notification
This commit is contained in:
2025-09-03 21:32:50 +08:00
parent 852f206d2e
commit 099f156e6f
5 changed files with 107 additions and 37 deletions

View File

@@ -1,10 +1,13 @@
# 基本設定
CHECK_INTERVAL=300
LOG_LEVEL=INFO
ALWAYS_NOTIFY_ON_STARTUP=false
# 電子郵件通知設定Gmail 範例)
EMAIL_SMTP_SERVER=smtp.gmail.com
EMAIL_SMTP_PORT=587
# 可選: starttls | ssl | none
EMAIL_SMTP_SECURITY=starttls
EMAIL_FROM=your_email@gmail.com
EMAIL_TO=notification@gmail.com
EMAIL_USERNAME=your_email@gmail.com
@@ -14,4 +17,4 @@ EMAIL_PASSWORD=your_app_specific_password
WEBHOOK_URL=https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK
# Discord Webhook可選
DISCORD_WEBHOOK=https://discord.com/api/webhooks/YOUR/DISCORD/WEBHOOK
DISCORD_WEBHOOK=https://discord.com/api/webhooks/YOUR/DISCORD/WEBHOOK

View File

@@ -32,5 +32,5 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
# 暴露端口(用於健康檢查 API
EXPOSE 8080
# 執行爬蟲
CMD ["python", "crawler.py"]
# 執行爬蟲主程式
CMD ["python", "enhanced_crawler.py"]

View File

@@ -28,13 +28,17 @@ barrons-crawler/
# 基本設定
CHECK_INTERVAL=300
LOG_LEVEL=INFO
ALWAYS_NOTIFY_ON_STARTUP=false # 啟動後第一次必定寄當前清單
# 電子郵件通知設定(Gmail 範例
EMAIL_SMTP_SERVER=smtp.gmail.com
EMAIL_SMTP_PORT=587
EMAIL_FROM=your_email@gmail.com
# 電子郵件通知設定(SMTP
# 對 Gmail建議使用應用程式密碼
# 對學校/企業信箱:請依管理者提供之 SMTP 主機與加密方式設定
EMAIL_SMTP_SERVER=smtp.gmail.com # 例mail.ntust.edu.tw
EMAIL_SMTP_PORT=587 # starttls 常用 587ssl 常用 465
EMAIL_SMTP_SECURITY=starttls # starttls | ssl | none
EMAIL_FROM=your_email@gmail.com # 例m10605505@mail.ntust.edu.tw
EMAIL_TO=notification@gmail.com
EMAIL_USERNAME=your_email@gmail.com
EMAIL_USERNAME=your_email@gmail.com # 有些伺服器需填完整信箱
EMAIL_PASSWORD=your_app_specific_password
# Slack Webhook可選
@@ -55,10 +59,19 @@ docker-compose logs -f barrons-crawler
## 📋 詳細設定選項
### 電子郵件設定(Gmail
1. 開啟 Gmail 的兩步驟驗證
2. 生成應用程式密碼https://myaccount.google.com/apppasswords
3.`.env` 中使用應用程式密碼,不是一般密碼
### 電子郵件設定(SMTP
1. 若使用 Gmail
- 開啟兩步驟驗證
- 生成應用程式密碼https://myaccount.google.com/apppasswords
-`.env` 使用應用程式密碼,而非一般密碼
2. 若使用學校/企業郵件(如 NTUST
- 向管理者確認 SMTP 主機、連接埠與加密方式starttls 或 ssl
- `EMAIL_USERNAME` 可能需要填完整信箱(例如 `m10605505@mail.ntust.edu.tw`
### 啟動後首次通知行為
- 環境變數 `ALWAYS_NOTIFY_ON_STARTUP`
- `true/1/yes`:服務啟動完成後,第一次檢查即使沒有新內容也會寄出目前清單;之後只在有更新時寄出
- `false`(預設):只有在偵測到新內容時才寄出
### Slack 通知設定
1. 建立 Slack App: https://api.slack.com/apps
@@ -265,4 +278,4 @@ services:
3. **定期備份資料**
4. **監控資源使用情況**
5. **設定適當的通知渠道**
6. **遵守網站使用條款**
6. **遵守網站使用條款**

View File

@@ -6,24 +6,10 @@ services:
build: .
container_name: barrons-crawler
restart: unless-stopped
environment:
# 爬蟲設定
- CHECK_INTERVAL=300 # 5分鐘
- LOG_LEVEL=INFO
# 電子郵件設定(可選)
- EMAIL_SMTP_SERVER=smtp.gmail.com
- EMAIL_SMTP_PORT=587
- EMAIL_FROM=your_email@gmail.com
- EMAIL_TO=notification@gmail.com
- EMAIL_USERNAME=your_email@gmail.com
- EMAIL_PASSWORD=your_app_password
# Webhook 設定(可選)
- WEBHOOK_URL=https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK
# Discord 設定(可選)
- DISCORD_WEBHOOK=https://discord.com/api/webhooks/YOUR/DISCORD/WEBHOOK
# 將同目錄的 .env 檔注入容器環境變數
env_file:
- .env
# 所有環境變數由 .env 提供
volumes:
- ./data:/app/data # 資料持久化
@@ -47,4 +33,4 @@ services:
command: redis-server --appendonly yes
volumes:
redis_data:
redis_data:

View File

@@ -10,7 +10,7 @@ from email.mime.multipart import MIMEMultipart
import logging
import os
import schedule
from flask import Flask, jsonify
from flask import Flask, jsonify, request
import threading
import signal
import sys
@@ -29,6 +29,9 @@ class EnhancedBarronsCrawler:
self.email_config = self.load_email_config()
self.webhook_url = os.getenv('WEBHOOK_URL')
self.discord_webhook = os.getenv('DISCORD_WEBHOOK')
# 啟動時是否強制寄出一次目前內容
self.always_notify_on_startup = os.getenv('ALWAYS_NOTIFY_ON_STARTUP', 'false').lower() in ('1', 'true', 'yes')
self._first_check_done = False
# 設定日誌
log_level = os.getenv('LOG_LEVEL', 'INFO')
@@ -55,9 +58,14 @@ class EnhancedBarronsCrawler:
def load_email_config(self):
"""從環境變數載入電子郵件設定"""
if all(os.getenv(key) for key in ['EMAIL_SMTP_SERVER', 'EMAIL_FROM', 'EMAIL_TO', 'EMAIL_USERNAME', 'EMAIL_PASSWORD']):
security = os.getenv('EMAIL_SMTP_SECURITY', 'starttls').lower()
# 根據安全機制推導預設連接埠
default_port = 465 if security == 'ssl' else 587 if security == 'starttls' else 25
smtp_port = int(os.getenv('EMAIL_SMTP_PORT', default_port))
return {
'smtp_server': os.getenv('EMAIL_SMTP_SERVER'),
'smtp_port': int(os.getenv('EMAIL_SMTP_PORT', 587)),
'smtp_port': smtp_port,
'smtp_security': security, # 'ssl' | 'starttls' | 'none'
'from_email': os.getenv('EMAIL_FROM'),
'to_email': os.getenv('EMAIL_TO'),
'username': os.getenv('EMAIL_USERNAME'),
@@ -190,8 +198,19 @@ class EnhancedBarronsCrawler:
msg.attach(MIMEText(body, 'plain', 'utf-8'))
server = smtplib.SMTP(self.email_config['smtp_server'], self.email_config['smtp_port'])
server.starttls()
smtp_server = self.email_config['smtp_server']
smtp_port = self.email_config['smtp_port']
security = self.email_config.get('smtp_security', 'starttls')
if security == 'ssl':
server = smtplib.SMTP_SSL(smtp_server, smtp_port)
else:
server = smtplib.SMTP(smtp_server, smtp_port)
server.ehlo()
if security == 'starttls':
server.starttls()
server.ehlo()
server.login(self.email_config['username'], self.email_config['password'])
server.send_message(msg)
server.quit()
@@ -272,6 +291,20 @@ class EnhancedBarronsCrawler:
return new_picks
else:
# 啟動後第一次且啟用 ALWAYS_NOTIFY_ON_STARTUP則寄出目前內容
if (not self._first_check_done) and self.always_notify_on_startup and current_picks:
self.logger.info("🟢 啟動首次檢查:沒有新內容,但已依設定寄出目前清單")
# 發送通知(使用全部目前項目)
self.send_notifications(current_picks)
# 儲存資料(仍以目前清單為準)
new_data = {
'last_update': datetime.now().isoformat(),
'stock_picks': current_picks,
'stats': self.stats
}
self.save_data(new_data)
return current_picks
self.logger.info("✅ 沒有發現新內容")
return []
@@ -298,6 +331,7 @@ class EnhancedBarronsCrawler:
# 立即執行一次檢查
self.run_check()
self._first_check_done = True
while self.running:
schedule.run_pending()
@@ -331,6 +365,40 @@ def manual_check():
return jsonify({"error": "Crawler not initialized"})
@app.route('/notify_test')
def notify_test():
"""手動測試通知(預設只寄 Email。可加參數 ?channel=email|webhook|discord"""
if not crawler_instance:
return jsonify({"error": "Crawler not initialized"}), 500
channel = (request.args.get('channel') or 'email').lower()
test_pick = [{
'title': f"[測試] Barron's 通知發送 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
'link': 'https://example.com/test',
'scraped_at': datetime.now().isoformat(),
'hash': hashlib.md5(str(datetime.now().timestamp()).encode()).hexdigest()[:8]
}]
try:
if channel == 'email':
if not crawler_instance.email_config:
return jsonify({"error": "Email config not set"}), 400
crawler_instance.send_email_notification(test_pick)
elif channel == 'webhook':
if not crawler_instance.webhook_url:
return jsonify({"error": "Webhook URL not set"}), 400
crawler_instance.send_webhook_notification(test_pick)
elif channel == 'discord':
if not crawler_instance.discord_webhook:
return jsonify({"error": "Discord webhook not set"}), 400
crawler_instance.send_discord_notification(test_pick)
else:
return jsonify({"error": f"Unsupported channel: {channel}"}), 400
return jsonify({"result": f"Test notification sent via {channel}"})
except Exception as e:
crawler_instance.logger.error(f"測試通知發送失敗: {e}")
return jsonify({"error": str(e)}), 500
def run_flask_app():
"""運行 Flask 應用"""
app.run(host='0.0.0.0', port=8080, debug=False)
@@ -345,4 +413,4 @@ if __name__ == "__main__":
flask_thread.start()
# 運行主爬蟲
crawler_instance.run()
crawler_instance.run()