diff --git a/bots/collector_bot.py b/bots/collector_bot.py index 586a295..88d077f 100644 --- a/bots/collector_bot.py +++ b/bots/collector_bot.py @@ -402,6 +402,20 @@ def _extract_rss_image(entry) -> str: return '' +def _resolve_google_news_url(url: str) -> str: + """Google 뉴스 RSS 인코딩 URL을 실제 기사 URL로 변환""" + if not url or 'news.google.com' not in url: + return url + try: + resp = requests.head(url, timeout=10, allow_redirects=True, + headers={'User-Agent': 'Mozilla/5.0'}) + if resp.url and 'news.google.com' not in resp.url: + return resp.url + except Exception: + pass + return url + + def collect_rss_feeds(sources_cfg: dict) -> list[dict]: """설정된 RSS 피드 수집""" items = [] @@ -428,7 +442,7 @@ def collect_rss_feeds(sources_cfg: dict) -> list[dict]: 'description': desc_text, 'source': 'rss', 'source_name': feed_cfg.get('name', ''), - 'source_url': entry.get('link', ''), + 'source_url': _resolve_google_news_url(entry.get('link', '')), 'published_at': pub_at, 'search_demand_score': 8, 'topic_type': 'trending',