From 099f156e6f7f4c7e812b3dca5a933100035b9149 Mon Sep 17 00:00:00 2001 From: MH Hung Date: Wed, 3 Sep 2025 21:32:50 +0800 Subject: [PATCH] feat(email): add SMTP security modes (starttls/ssl/none) with sensible default ports; add /notify_test endpoint; support ALWAYS_NOTIFY_ON_STARTUP to force first-run notification chore(docker): run enhanced_crawler.py as entrypoint ops(compose): load env via env_file and remove hardcoded secrets docs: update README and .env.template for SMTP and startup notification --- .env.template | 5 ++- Dockerfile | 4 +-- README.md | 33 +++++++++++++------ docker-compose.yml | 24 +++----------- enhanced_crawler.py | 78 ++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 107 insertions(+), 37 deletions(-) diff --git a/.env.template b/.env.template index 0ee00e3..02da85b 100644 --- a/.env.template +++ b/.env.template @@ -1,10 +1,13 @@ # 基本設定 CHECK_INTERVAL=300 LOG_LEVEL=INFO +ALWAYS_NOTIFY_ON_STARTUP=false # 電子郵件通知設定(Gmail 範例) EMAIL_SMTP_SERVER=smtp.gmail.com EMAIL_SMTP_PORT=587 +# 可選: starttls | ssl | none +EMAIL_SMTP_SECURITY=starttls EMAIL_FROM=your_email@gmail.com EMAIL_TO=notification@gmail.com EMAIL_USERNAME=your_email@gmail.com @@ -14,4 +17,4 @@ EMAIL_PASSWORD=your_app_specific_password WEBHOOK_URL=https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK # Discord Webhook(可選) -DISCORD_WEBHOOK=https://discord.com/api/webhooks/YOUR/DISCORD/WEBHOOK \ No newline at end of file +DISCORD_WEBHOOK=https://discord.com/api/webhooks/YOUR/DISCORD/WEBHOOK diff --git a/Dockerfile b/Dockerfile index a0d0169..6ebca2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,5 +32,5 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ # 暴露端口(用於健康檢查 API) EXPOSE 8080 -# 執行爬蟲 -CMD ["python", "crawler.py"] \ No newline at end of file +# 執行爬蟲主程式 +CMD ["python", "enhanced_crawler.py"] diff --git a/README.md b/README.md index e7ba03e..797e8fa 100644 --- a/README.md +++ b/README.md @@ -28,13 +28,17 @@ barrons-crawler/ # 基本設定 CHECK_INTERVAL=300 LOG_LEVEL=INFO +ALWAYS_NOTIFY_ON_STARTUP=false # 啟動後第一次必定寄當前清單 -# 電子郵件通知設定(Gmail 範例) -EMAIL_SMTP_SERVER=smtp.gmail.com -EMAIL_SMTP_PORT=587 -EMAIL_FROM=your_email@gmail.com +# 電子郵件通知設定(SMTP) +# 對 Gmail:建議使用應用程式密碼 +# 對學校/企業信箱:請依管理者提供之 SMTP 主機與加密方式設定 +EMAIL_SMTP_SERVER=smtp.gmail.com # 例:mail.ntust.edu.tw +EMAIL_SMTP_PORT=587 # starttls 常用 587;ssl 常用 465 +EMAIL_SMTP_SECURITY=starttls # starttls | ssl | none +EMAIL_FROM=your_email@gmail.com # 例:m10605505@mail.ntust.edu.tw EMAIL_TO=notification@gmail.com -EMAIL_USERNAME=your_email@gmail.com +EMAIL_USERNAME=your_email@gmail.com # 有些伺服器需填完整信箱 EMAIL_PASSWORD=your_app_specific_password # Slack Webhook(可選) @@ -55,10 +59,19 @@ docker-compose logs -f barrons-crawler ## 📋 詳細設定選項 -### 電子郵件設定(Gmail) -1. 開啟 Gmail 的兩步驟驗證 -2. 生成應用程式密碼:https://myaccount.google.com/apppasswords -3. 在 `.env` 中使用應用程式密碼,不是一般密碼 +### 電子郵件設定(SMTP) +1. 若使用 Gmail: + - 開啟兩步驟驗證 + - 生成應用程式密碼:https://myaccount.google.com/apppasswords + - 在 `.env` 使用應用程式密碼,而非一般密碼 +2. 若使用學校/企業郵件(如 NTUST): + - 向管理者確認 SMTP 主機、連接埠與加密方式(starttls 或 ssl) + - `EMAIL_USERNAME` 可能需要填完整信箱(例如 `m10605505@mail.ntust.edu.tw`) + +### 啟動後首次通知行為 +- 環境變數 `ALWAYS_NOTIFY_ON_STARTUP` + - `true/1/yes`:服務啟動完成後,第一次檢查即使沒有新內容也會寄出目前清單;之後只在有更新時寄出 + - `false`(預設):只有在偵測到新內容時才寄出 ### Slack 通知設定 1. 建立 Slack App: https://api.slack.com/apps @@ -265,4 +278,4 @@ services: 3. **定期備份資料** 4. **監控資源使用情況** 5. **設定適當的通知渠道** -6. **遵守網站使用條款** \ No newline at end of file +6. **遵守網站使用條款** diff --git a/docker-compose.yml b/docker-compose.yml index 34476af..5138616 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,24 +6,10 @@ services: build: . container_name: barrons-crawler restart: unless-stopped - environment: - # 爬蟲設定 - - CHECK_INTERVAL=300 # 5分鐘 - - LOG_LEVEL=INFO - - # 電子郵件設定(可選) - - EMAIL_SMTP_SERVER=smtp.gmail.com - - EMAIL_SMTP_PORT=587 - - EMAIL_FROM=your_email@gmail.com - - EMAIL_TO=notification@gmail.com - - EMAIL_USERNAME=your_email@gmail.com - - EMAIL_PASSWORD=your_app_password - - # Webhook 設定(可選) - - WEBHOOK_URL=https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK - - # Discord 設定(可選) - - DISCORD_WEBHOOK=https://discord.com/api/webhooks/YOUR/DISCORD/WEBHOOK + # 將同目錄的 .env 檔注入容器環境變數 + env_file: + - .env + # 所有環境變數由 .env 提供 volumes: - ./data:/app/data # 資料持久化 @@ -47,4 +33,4 @@ services: command: redis-server --appendonly yes volumes: - redis_data: \ No newline at end of file + redis_data: diff --git a/enhanced_crawler.py b/enhanced_crawler.py index 9bc554a..2f61c3b 100644 --- a/enhanced_crawler.py +++ b/enhanced_crawler.py @@ -10,7 +10,7 @@ from email.mime.multipart import MIMEMultipart import logging import os import schedule -from flask import Flask, jsonify +from flask import Flask, jsonify, request import threading import signal import sys @@ -29,6 +29,9 @@ class EnhancedBarronsCrawler: self.email_config = self.load_email_config() self.webhook_url = os.getenv('WEBHOOK_URL') self.discord_webhook = os.getenv('DISCORD_WEBHOOK') + # 啟動時是否強制寄出一次目前內容 + self.always_notify_on_startup = os.getenv('ALWAYS_NOTIFY_ON_STARTUP', 'false').lower() in ('1', 'true', 'yes') + self._first_check_done = False # 設定日誌 log_level = os.getenv('LOG_LEVEL', 'INFO') @@ -55,9 +58,14 @@ class EnhancedBarronsCrawler: def load_email_config(self): """從環境變數載入電子郵件設定""" if all(os.getenv(key) for key in ['EMAIL_SMTP_SERVER', 'EMAIL_FROM', 'EMAIL_TO', 'EMAIL_USERNAME', 'EMAIL_PASSWORD']): + security = os.getenv('EMAIL_SMTP_SECURITY', 'starttls').lower() + # 根據安全機制推導預設連接埠 + default_port = 465 if security == 'ssl' else 587 if security == 'starttls' else 25 + smtp_port = int(os.getenv('EMAIL_SMTP_PORT', default_port)) return { 'smtp_server': os.getenv('EMAIL_SMTP_SERVER'), - 'smtp_port': int(os.getenv('EMAIL_SMTP_PORT', 587)), + 'smtp_port': smtp_port, + 'smtp_security': security, # 'ssl' | 'starttls' | 'none' 'from_email': os.getenv('EMAIL_FROM'), 'to_email': os.getenv('EMAIL_TO'), 'username': os.getenv('EMAIL_USERNAME'), @@ -190,8 +198,19 @@ class EnhancedBarronsCrawler: msg.attach(MIMEText(body, 'plain', 'utf-8')) - server = smtplib.SMTP(self.email_config['smtp_server'], self.email_config['smtp_port']) - server.starttls() + smtp_server = self.email_config['smtp_server'] + smtp_port = self.email_config['smtp_port'] + security = self.email_config.get('smtp_security', 'starttls') + + if security == 'ssl': + server = smtplib.SMTP_SSL(smtp_server, smtp_port) + else: + server = smtplib.SMTP(smtp_server, smtp_port) + server.ehlo() + if security == 'starttls': + server.starttls() + server.ehlo() + server.login(self.email_config['username'], self.email_config['password']) server.send_message(msg) server.quit() @@ -272,6 +291,20 @@ class EnhancedBarronsCrawler: return new_picks else: + # 啟動後第一次且啟用 ALWAYS_NOTIFY_ON_STARTUP,則寄出目前內容 + if (not self._first_check_done) and self.always_notify_on_startup and current_picks: + self.logger.info("🟢 啟動首次檢查:沒有新內容,但已依設定寄出目前清單") + # 發送通知(使用全部目前項目) + self.send_notifications(current_picks) + # 儲存資料(仍以目前清單為準) + new_data = { + 'last_update': datetime.now().isoformat(), + 'stock_picks': current_picks, + 'stats': self.stats + } + self.save_data(new_data) + return current_picks + self.logger.info("✅ 沒有發現新內容") return [] @@ -298,6 +331,7 @@ class EnhancedBarronsCrawler: # 立即執行一次檢查 self.run_check() + self._first_check_done = True while self.running: schedule.run_pending() @@ -331,6 +365,40 @@ def manual_check(): return jsonify({"error": "Crawler not initialized"}) +@app.route('/notify_test') +def notify_test(): + """手動測試通知(預設只寄 Email)。可加參數 ?channel=email|webhook|discord""" + if not crawler_instance: + return jsonify({"error": "Crawler not initialized"}), 500 + + channel = (request.args.get('channel') or 'email').lower() + test_pick = [{ + 'title': f"[測試] Barron's 通知發送 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", + 'link': 'https://example.com/test', + 'scraped_at': datetime.now().isoformat(), + 'hash': hashlib.md5(str(datetime.now().timestamp()).encode()).hexdigest()[:8] + }] + + try: + if channel == 'email': + if not crawler_instance.email_config: + return jsonify({"error": "Email config not set"}), 400 + crawler_instance.send_email_notification(test_pick) + elif channel == 'webhook': + if not crawler_instance.webhook_url: + return jsonify({"error": "Webhook URL not set"}), 400 + crawler_instance.send_webhook_notification(test_pick) + elif channel == 'discord': + if not crawler_instance.discord_webhook: + return jsonify({"error": "Discord webhook not set"}), 400 + crawler_instance.send_discord_notification(test_pick) + else: + return jsonify({"error": f"Unsupported channel: {channel}"}), 400 + return jsonify({"result": f"Test notification sent via {channel}"}) + except Exception as e: + crawler_instance.logger.error(f"測試通知發送失敗: {e}") + return jsonify({"error": str(e)}), 500 + def run_flask_app(): """運行 Flask 應用""" app.run(host='0.0.0.0', port=8080, debug=False) @@ -345,4 +413,4 @@ if __name__ == "__main__": flask_thread.start() # 運行主爬蟲 - crawler_instance.run() \ No newline at end of file + crawler_instance.run()