From 2fcb2d353d0bb15075cd16bd51d456b006586079 Mon Sep 17 00:00:00 2001 From: JOUNGWOOK KWON Date: Mon, 30 Mar 2026 13:46:14 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20Google=20=EB=89=B4=EC=8A=A4=20RSS=20URL?= =?UTF-8?q?=EC=9D=84=20=EC=8B=A4=EC=A0=9C=20=EA=B8=B0=EC=82=AC=20URL?= =?UTF-8?q?=EB=A1=9C=20=EB=B3=80=ED=99=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 수집 시 news.google.com/rss/articles/CBMi... 형태의 인코딩 URL을 리다이렉트 따라가서 실제 기사 URL로 저장. 출처 링크 클릭 시 원본 기사로 이동 가능. Co-Authored-By: Claude Opus 4.6 --- bots/collector_bot.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/bots/collector_bot.py b/bots/collector_bot.py index 586a295..88d077f 100644 --- a/bots/collector_bot.py +++ b/bots/collector_bot.py @@ -402,6 +402,20 @@ def _extract_rss_image(entry) -> str: return '' +def _resolve_google_news_url(url: str) -> str: + """Google 뉴스 RSS 인코딩 URL을 실제 기사 URL로 변환""" + if not url or 'news.google.com' not in url: + return url + try: + resp = requests.head(url, timeout=10, allow_redirects=True, + headers={'User-Agent': 'Mozilla/5.0'}) + if resp.url and 'news.google.com' not in resp.url: + return resp.url + except Exception: + pass + return url + + def collect_rss_feeds(sources_cfg: dict) -> list[dict]: """설정된 RSS 피드 수집""" items = [] @@ -428,7 +442,7 @@ def collect_rss_feeds(sources_cfg: dict) -> list[dict]: 'description': desc_text, 'source': 'rss', 'source_name': feed_cfg.get('name', ''), - 'source_url': entry.get('link', ''), + 'source_url': _resolve_google_news_url(entry.get('link', '')), 'published_at': pub_at, 'search_demand_score': 8, 'topic_type': 'trending',