fix: 목차 완전 제거, Wikipedia 이미지 fallback 추가

1. publisher_bot.py + blog_converter.py: 목차(TOC) 완전 비활성화
2. fetch_featured_image(): Wikipedia REST API로 무료 이미지 fallback
   - 제목/태그로 한국어 Wikipedia 검색 → 썸네일 추출
   - 실패 시 영문 Wikipedia 시도 (최대 4개 키워드)
   - 200px 썸네일 → 800px 고해상도로 교체

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
JOUNGWOOK KWON
2026-03-30 17:54:07 +09:00
parent f3526bbcdd
commit b98d694b65
2 changed files with 38 additions and 6 deletions

View File

@@ -96,8 +96,7 @@ def build_full_html(article: dict, body_html: str, toc_html: str,
json_ld = build_json_ld(article, post_url)
disclaimer = article.get('disclaimer', '')
parts = [json_ld]
if toc_html:
parts.append(f'<div class="toc-wrapper">{toc_html}</div>')
# 목차 비활성화 — 독자 경험 개선
parts.append(body_html)
if disclaimer:
parts.append(f'<hr/><p class="disclaimer"><small>{disclaimer}</small></p>')

View File

@@ -299,6 +299,42 @@ def fetch_featured_image(article: dict) -> str:
except Exception as e:
logger.warning(f"Pexels 이미지 검색 실패: {e}")
# 4) Wikipedia 썸네일 (무료, API 키 불필요)
title = article.get('title', '')
tags = article.get('tags', [])
if isinstance(tags, str):
tags = [t.strip() for t in tags.split(',')]
for kw in ([title] + tags)[:4]:
if not kw:
continue
try:
from urllib.parse import quote
wiki_url = f'https://ko.wikipedia.org/api/rest_v1/page/summary/{quote(kw)}'
resp = requests.get(wiki_url, timeout=6,
headers={'User-Agent': 'Mozilla/5.0 (compatible; BlogBot/1.0)'})
if resp.status_code == 200:
data = resp.json()
thumb = data.get('thumbnail', {}).get('source', '')
if thumb and thumb.startswith('http') and not _is_platform_logo(thumb):
# 더 큰 해상도로 변환 (200px → 800px)
thumb = thumb.replace('/200px-', '/800px-').replace('/320px-', '/800px-')
return thumb
except Exception:
pass
# 영문 Wikipedia fallback
try:
wiki_url = f'https://en.wikipedia.org/api/rest_v1/page/summary/{quote(kw)}'
resp = requests.get(wiki_url, timeout=6,
headers={'User-Agent': 'Mozilla/5.0 (compatible; BlogBot/1.0)'})
if resp.status_code == 200:
data = resp.json()
thumb = data.get('thumbnail', {}).get('source', '')
if thumb and thumb.startswith('http') and not _is_platform_logo(thumb):
thumb = thumb.replace('/200px-', '/800px-').replace('/320px-', '/800px-')
return thumb
except Exception:
pass
return ''
@@ -323,10 +359,7 @@ def build_full_html(article: dict, body_html: str, toc_html: str) -> str:
)
html_parts.append(json_ld)
# 목차는 h2가 3개 이상일 때만 표시 (짧은 글에선 불필요)
h2_count = body_html.lower().count('<h2')
if toc_html and h2_count >= 3:
html_parts.append(f'<div class="toc-wrapper">{toc_html}</div>')
# 목차 비활성화 — 독자 경험 개선 (사진 아래 목차 제거)
html_parts.append(body_html)
# 원문 출처 링크