diff --git a/bots/converters/blog_converter.py b/bots/converters/blog_converter.py index a6a4e5a..4c4cdd2 100644 --- a/bots/converters/blog_converter.py +++ b/bots/converters/blog_converter.py @@ -96,7 +96,10 @@ def build_full_html(article: dict, body_html: str, toc_html: str, json_ld = build_json_ld(article, post_url) disclaimer = article.get('disclaimer', '') parts = [json_ld] - # 목차 비활성화 — 독자 경험 개선 + # 목차: h2가 3개 이상인 긴 글에서만 표시 + h2_count = body_html.lower().count('
{disclaimer}
') diff --git a/bots/publisher_bot.py b/bots/publisher_bot.py index 52604af..a5eb87b 100644 --- a/bots/publisher_bot.py +++ b/bots/publisher_bot.py @@ -299,38 +299,39 @@ def fetch_featured_image(article: dict) -> str: except Exception as e: logger.warning(f"Pexels 이미지 검색 실패: {e}") - # 4) Wikipedia 썸네일 (무료, API 키 불필요) - title = article.get('title', '') + # 4) Wikipedia 썸네일 (무료, API 키 불필요) — 태그 전체 시도 tags = article.get('tags', []) if isinstance(tags, str): tags = [t.strip() for t in tags.split(',')] - for kw in ([title] + tags)[:4]: - if not kw: - continue + # 태그만 사용 (제목은 너무 길어 Wikipedia에서 매칭 안됨) + search_keywords = [t for t in tags if t and len(t) <= 15][:8] + from urllib.parse import quote as _quote + for kw in search_keywords: + # 한국어 Wikipedia try: - from urllib.parse import quote - wiki_url = f'https://ko.wikipedia.org/api/rest_v1/page/summary/{quote(kw)}' + wiki_url = f'https://ko.wikipedia.org/api/rest_v1/page/summary/{_quote(kw)}' resp = requests.get(wiki_url, timeout=6, headers={'User-Agent': 'Mozilla/5.0 (compatible; BlogBot/1.0)'}) if resp.status_code == 200: data = resp.json() thumb = data.get('thumbnail', {}).get('source', '') if thumb and thumb.startswith('http') and not _is_platform_logo(thumb): - # 더 큰 해상도로 변환 (200px → 800px) - thumb = thumb.replace('/200px-', '/800px-').replace('/320px-', '/800px-') + thumb = re.sub(r'/\d+px-', '/800px-', thumb) + logger.info(f"Wikipedia 이미지 사용: {kw} → {thumb[:60]}") return thumb except Exception: pass - # 영문 Wikipedia fallback + # 영문 Wikipedia try: - wiki_url = f'https://en.wikipedia.org/api/rest_v1/page/summary/{quote(kw)}' + wiki_url = f'https://en.wikipedia.org/api/rest_v1/page/summary/{_quote(kw)}' resp = requests.get(wiki_url, timeout=6, headers={'User-Agent': 'Mozilla/5.0 (compatible; BlogBot/1.0)'}) if resp.status_code == 200: data = resp.json() thumb = data.get('thumbnail', {}).get('source', '') if thumb and thumb.startswith('http') and not _is_platform_logo(thumb): - thumb = thumb.replace('/200px-', '/800px-').replace('/320px-', '/800px-') + thumb = re.sub(r'/\d+px-', '/800px-', thumb) + logger.info(f"Wikipedia(EN) 이미지 사용: {kw} → {thumb[:60]}") return thumb except Exception: pass @@ -359,7 +360,10 @@ def build_full_html(article: dict, body_html: str, toc_html: str) -> str: ) html_parts.append(json_ld) - # 목차 비활성화 — 독자 경험 개선 (사진 아래 목차 제거) + # 목차: h2가 3개 이상인 긴 글에서만 표시 + h2_count = body_html.lower().count('