From f3526bbcdd6b3ed83a3c764fbd71dc8e85a897db Mon Sep 17 00:00:00 2001
From: JOUNGWOOK KWON <elikwon@JOUNGWOOKui-MacBookAir.local>
Date: Mon, 30 Mar 2026 17:46:45 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20source=5Fimage=EC=97=90=EB=8F=84=20?=
 =?UTF-8?q?=ED=94=8C=EB=9E=AB=ED=8F=BC=20=EB=A1=9C=EA=B3=A0=20=ED=95=84?=
 =?UTF-8?q?=ED=84=B0=20=EC=A0=81=EC=9A=A9=20(=EA=B7=BC=EB=B3=B8=20?=
 =?UTF-8?q?=EC=9B=90=EC=9D=B8=20=EC=88=98=EC=A0=95)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NAS IP에서 Google News RSS URL이 200 응답하며 og:image에 lh3.googleusercontent.com
썸네일을 반환하는 문제. 두 곳 모두 차단:
- fetch_featured_image(): source_image에 _is_platform_logo() 체크 추가
- _fetch_sources_content(): og:image 저장 전 플랫폼 로고 패턴 필터 추가

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 bots/publisher_bot.py |  4 ++--
 bots/scheduler.py     | 10 +++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/bots/publisher_bot.py b/bots/publisher_bot.py
index edfaf47..36dbfb7 100644
--- a/bots/publisher_bot.py
+++ b/bots/publisher_bot.py
@@ -262,9 +262,9 @@ def _fetch_og_image(url: str) -> str:
 
 def fetch_featured_image(article: dict) -> str:
     """대표 이미지: RSS 이미지 → og:image 크롤링 → Pexels 순으로 시도"""
-    # 1) RSS 수집 시 가져온 소스 이미지
+    # 1) RSS 수집 시 가져온 소스 이미지 (플랫폼 로고 제외)
     source_image = article.get('source_image', '')
-    if source_image and source_image.startswith('http'):
+    if source_image and source_image.startswith('http') and not _is_platform_logo(source_image):
         try:
             resp = requests.head(source_image, timeout=5, allow_redirects=True)
             if resp.status_code == 200:
diff --git a/bots/scheduler.py b/bots/scheduler.py
index 777b2c9..fe10a3a 100644
--- a/bots/scheduler.py
+++ b/bots/scheduler.py
@@ -302,11 +302,15 @@ def _fetch_sources_content(topic_data: dict) -> dict:
                 og_title = soup.find('meta', property='og:title')
                 if og_title and og_title.get('content'):
                     title = og_title['content'].strip()
-                # og:image
+                # og:image (플랫폼 로고/Google News 썸네일 제외)
                 if not topic_data.get('source_image'):
                     og_img = soup.find('meta', property='og:image')
-                    if og_img and og_img.get('content', '').startswith('http'):
-                        topic_data['source_image'] = og_img['content']
+                    img_url = og_img.get('content', '') if og_img else ''
+                    skip_patterns = ['lh3.googleusercontent', 'google.com/images', 'logo', 'icon',
+                                     'googlenews', 'google-news', 'placeholder', 'noimage']
+                    is_platform = any(p in img_url.lower() for p in skip_patterns)
+                    if img_url.startswith('http') and not is_platform:
+                        topic_data['source_image'] = img_url
         except Exception:
             pass