fix: feedparser URL 직접 호출 → requests + feedparser 텍스트 파싱

feedparser.parse(URL)이 NAS에서 15초+ 소요되어 타임아웃 발생. requests.get()으로 1초에 가져온 후 feedparser.parse(text)로 파싱하면 총 1.3초. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-30 17:16:02 +09:00
parent 0e72e6de88
commit 45a352c343
1 changed files with 8 additions and 4 deletions
--- a/bots/scheduler.py
+++ b/bots/scheduler.py
@@ -265,7 +265,8 @@ def _fetch_sources_content(topic_data: dict) -> dict:
        try:
            search_url = f"https://news.google.com/rss/search?q={quote(topic)}&hl=ko&gl=KR&ceid=KR:ko"
            logger.info(f"[fetch_sources] RSS 검색: {topic[:40]}")
-            feed = feedparser.parse(search_url)
+            rss_resp = requests.get(search_url, timeout=8, headers={'User-Agent': 'Mozilla/5.0'})
            feed = feedparser.parse(rss_resp.text)
            logger.info(f"[fetch_sources] RSS 결과: {len(feed.entries)}개")
            existing_sources = [{'url': e.get('link', ''), 'title': e.get('title', ''), 'date': e.get('published', '')}
                                 for e in feed.entries[:5]]
@@ -911,18 +912,21 @@ async def cmd_idea(update: Update, context: ContextTypes.DEFAULT_TYPE):
 def _search_and_build_topic(keyword: str, corner: str = '') -> dict:
-    """키워드로 Google 뉴스 검색 → 관련 기사 수집 → topic_data 생성 (빠른 버전)"""
+    """키워드로 Google 뉴스 검색 → 관련 기사 수집 → topic_data 생성"""
    import requests
    import feedparser
    from urllib.parse import quote
-    # Google 뉴스 RSS로 검색 (리다이렉트 추적 없이 빠르게)
+    # Google 뉴스 RSS로 검색 (requests로 빠르게 가져온 후 feedparser 파싱)
    search_url = f"https://news.google.com/rss/search?q={quote(keyword)}&hl=ko&gl=KR&ceid=KR:ko"
    sources = []
    best_description = ''
    best_image = ''
    try:
-        feed = feedparser.parse(search_url)
+        resp = requests.get(search_url, timeout=8, headers={'User-Agent': 'Mozilla/5.0'})
        feed = feedparser.parse(resp.text)
        logger.info(f"[_search] RSS 결과: {len(feed.entries)}개 ({keyword[:30]})")
        for entry in feed.entries[:5]:
            title = entry.get('title', '')
            link = entry.get('link', '')