feat: 원본 RSS 소스 이미지를 대표 이미지로 우선 사용

- RSS 수집 시 media:thumbnail, media:content, enclosure, <img> 태그에서 이미지 추출
- source_image를 topic → article → publisher로 전달
- 발행 시 우선순위: 원본 소스 이미지 → Pexels → Unsplash

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
JOUNGWOOK KWON
2026-03-30 12:22:48 +09:00
parent 02484679e2
commit 9280be7e52
3 changed files with 45 additions and 2 deletions
+32
View File
@@ -373,6 +373,35 @@ def collect_product_hunt(sources_cfg: dict) -> list[dict]:
return items
def _extract_rss_image(entry) -> str:
"""RSS entry에서 대표 이미지 URL 추출"""
# 1) media:thumbnail
if hasattr(entry, 'media_thumbnail') and entry.media_thumbnail:
return entry.media_thumbnail[0].get('url', '')
# 2) media:content (type이 image인 것)
if hasattr(entry, 'media_content') and entry.media_content:
for mc in entry.media_content:
if 'image' in mc.get('type', '') or mc.get('medium') == 'image':
return mc.get('url', '')
# type 없어도 url이 이미지 확장자면
url = entry.media_content[0].get('url', '')
if any(ext in url.lower() for ext in ['.jpg', '.jpeg', '.png', '.webp']):
return url
# 3) enclosures
if hasattr(entry, 'enclosures') and entry.enclosures:
for enc in entry.enclosures:
if 'image' in enc.get('type', ''):
return enc.get('href', '') or enc.get('url', '')
# 4) summary/description 안의 <img> 태그
desc = entry.get('summary', '') or entry.get('description', '')
if '<img' in desc:
import re
match = re.search(r'<img[^>]+src=["\']([^"\']+)["\']', desc)
if match:
return match.group(1)
return ''
def collect_rss_feeds(sources_cfg: dict) -> list[dict]:
"""설정된 RSS 피드 수집"""
items = []
@@ -392,6 +421,8 @@ def collect_rss_feeds(sources_cfg: dict) -> list[dict]:
combined = title_text + desc_text
kr_chars = sum(1 for c in combined if '\uac00' <= c <= '\ud7a3')
is_english = kr_chars / max(len(combined), 1) < 0.05
# 원본 기사 대표 이미지 추출
image_url = _extract_rss_image(entry)
items.append({
'topic': title_text,
'description': desc_text,
@@ -404,6 +435,7 @@ def collect_rss_feeds(sources_cfg: dict) -> list[dict]:
'_trust_override': trust,
'_rss_category': feed_cfg.get('category', ''),
'is_english': is_english,
'source_image': image_url,
})
except Exception as e:
logger.warning(f"RSS 수집 실패 ({url}): {e}")