From f25a95440a20b0cc937a3c7396f82489382294d4 Mon Sep 17 00:00:00 2001
From: JOUNGWOOK KWON <elikwon@JOUNGWOOKui-MacBookAir.local>
Date: Mon, 30 Mar 2026 15:36:01 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20/idea=20=ED=83=80=EC=9E=84=EC=95=84?=
 =?UTF-8?q?=EC=9B=83=20=E2=80=94=20=EB=A6=AC=EB=8B=A4=EC=9D=B4=EB=A0=89?=
 =?UTF-8?q?=ED=8A=B8/=ED=81=AC=EB=A1=A4=EB=A7=81=20=EC=A0=9C=EA=B1=B0?=
 =?UTF-8?q?=ED=95=98=EA=B3=A0=20RSS=EB=A7=8C=20=ED=8C=8C=EC=8B=B1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NAS→Google 뉴스 리다이렉트 추적이 매우 느려서 Telegram 타임아웃 발생.
RSS 피드 파싱만으로 제목/설명 수집, URL 변환은 글 작성 시점에 처리.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bots/scheduler.py | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/bots/scheduler.py b/bots/scheduler.py
index c4ede9a..3b385eb 100644
--- a/bots/scheduler.py
+++ b/bots/scheduler.py
@@ -783,12 +783,11 @@ async def cmd_idea(update: Update, context: ContextTypes.DEFAULT_TYPE):
 
 
 def _search_and_build_topic(keyword: str, corner: str = '') -> dict:
-    """키워드로 Google 뉴스 검색 → 관련 기사 수집 → topic_data 생성"""
-    import requests
+    """키워드로 Google 뉴스 검색 → 관련 기사 수집 → topic_data 생성 (빠른 버전)"""
     import feedparser
     from urllib.parse import quote
 
-    # Google 뉴스 RSS로 검색
+    # Google 뉴스 RSS로 검색 (리다이렉트 추적 없이 빠르게)
     search_url = f"https://news.google.com/rss/search?q={quote(keyword)}&hl=ko&gl=KR&ceid=KR:ko"
     sources = []
     best_description = ''
@@ -800,35 +799,18 @@ def _search_and_build_topic(keyword: str, corner: str = '') -> dict:
             title = entry.get('title', '')
             link = entry.get('link', '')
             pub_date = entry.get('published', '')
+            # RSS description에서 설명 추출
+            desc = entry.get('summary', '') or entry.get('description', '')
+            if desc and not best_description:
+                # HTML 태그 제거
+                import re as _re
+                best_description = _re.sub(r'<[^>]+>', '', desc).strip()[:300]
 
-            # Google 뉴스 RSS 제목에서 "- 매체명" 분리
             sources.append({
                 'url': link,
                 'title': title,
                 'date': pub_date,
             })
-
-        # 첫 번째 기사만 리다이렉트 추적 + 크롤링 (속도 최적화)
-        if sources and 'news.google.com' in sources[0]['url']:
-            try:
-                resp = requests.head(sources[0]['url'], timeout=5, allow_redirects=True,
-                                     headers={'User-Agent': 'Mozilla/5.0'})
-                if resp.url and 'news.google.com' not in resp.url:
-                    sources[0]['url'] = resp.url
-                    # og:description, og:image 크롤링
-                    from bs4 import BeautifulSoup
-                    page = requests.get(resp.url, timeout=5,
-                                        headers={'User-Agent': 'Mozilla/5.0'})
-                    if page.status_code == 200:
-                        soup = BeautifulSoup(page.text, 'lxml')
-                        og_desc = soup.find('meta', property='og:description')
-                        if og_desc and og_desc.get('content'):
-                            best_description = og_desc['content'].strip()[:300]
-                        og_img = soup.find('meta', property='og:image')
-                        if og_img and og_img.get('content', '').startswith('http'):
-                            best_image = og_img['content']
-            except Exception:
-                pass
     except Exception:
         pass