From ee91d83d37e2272895276e161b8152506afe2c84 Mon Sep 17 00:00:00 2001 From: JOUNGWOOK KWON Date: Mon, 30 Mar 2026 17:33:24 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20lh3.googleusercontent.com=20=ED=95=84?= =?UTF-8?q?=ED=84=B0=20=EC=B6=94=EA=B0=80,=20Google=20News=20=EB=A6=AC?= =?UTF-8?q?=EB=8B=A4=EC=9D=B4=EB=A0=89=ED=8A=B8=20head=E2=86=92get=20?= =?UTF-8?q?=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _is_platform_logo(): lh3.googleusercontent.com (Google News CDN 썸네일) 스킵 패턴 추가 - _fetch_og_image(): requests.head() → requests.get() (head는 리다이렉트 미작동) Co-Authored-By: Claude Sonnet 4.6 --- bots/publisher_bot.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bots/publisher_bot.py b/bots/publisher_bot.py index f909a06..edfaf47 100644 --- a/bots/publisher_bot.py +++ b/bots/publisher_bot.py @@ -211,6 +211,7 @@ def _is_platform_logo(image_url: str) -> bool: skip_patterns = [ 'logo', 'icon', 'avatar', 'banner', '/ad/', 'google.com/images/branding', 'googlenews', 'google-news', + 'lh3.googleusercontent.com', # Google News CDN 썸네일 'facebook.com', 'twitter.com', 'naver.com/favicon', 'default_image', 'placeholder', 'noimage', 'no-image', 'og-default', 'share-default', 'sns_', 'common/', @@ -223,11 +224,11 @@ def _fetch_og_image(url: str) -> str: """원본 기사 URL에서 og:image 메타태그 크롤링""" if not url or not url.startswith('http'): return '' - # Google 뉴스 리다이렉트인 경우 실제 기사 URL 추출 시도 + # Google 뉴스 리다이렉트인 경우 실제 기사 URL 추출 시도 (head는 리다이렉트 안됨 → get 사용) if 'news.google.com' in url: try: - resp = requests.head(url, timeout=10, allow_redirects=True, - headers={'User-Agent': 'Mozilla/5.0'}) + resp = requests.get(url, timeout=15, allow_redirects=True, + headers={'User-Agent': 'Mozilla/5.0 (compatible; BlogBot/1.0)'}) if resp.url and 'news.google.com' not in resp.url: url = resp.url except Exception: