diff --git a/bots/scheduler.py b/bots/scheduler.py index 519b7a5..6383fdc 100644 --- a/bots/scheduler.py +++ b/bots/scheduler.py @@ -245,7 +245,9 @@ def _build_openclaw_prompt(topic_data: dict) -> tuple[str, str]: def _fetch_sources_content(topic_data: dict) -> dict: """idea/manual 소스의 경우 글 작성 전 실제 기사 내용 크롤링""" + logger.info(f"[fetch_sources] source={topic_data.get('source')}, sources={len(topic_data.get('sources', []))}") if topic_data.get('source') not in ('idea', 'manual'): + logger.info(f"[fetch_sources] 스킵 (source={topic_data.get('source')})") return topic_data import requests @@ -258,14 +260,17 @@ def _fetch_sources_content(topic_data: dict) -> dict: # 소스가 없거나 Google 뉴스 URL만 있는 경우 → 키워드로 재검색 need_search = not existing_sources or all('news.google.com' in s.get('url', '') for s in existing_sources) + logger.info(f"[fetch_sources] need_search={need_search}, existing={len(existing_sources)}") if need_search: try: search_url = f"https://news.google.com/rss/search?q={quote(topic)}&hl=ko&gl=KR&ceid=KR:ko" + logger.info(f"[fetch_sources] RSS 검색: {topic[:40]}") feed = feedparser.parse(search_url) + logger.info(f"[fetch_sources] RSS 결과: {len(feed.entries)}개") existing_sources = [{'url': e.get('link', ''), 'title': e.get('title', ''), 'date': e.get('published', '')} for e in feed.entries[:5]] - except Exception: - pass + except Exception as e: + logger.warning(f"[fetch_sources] RSS 검색 실패: {e}") # 각 소스 URL 변환 + 내용 크롤링 (최대 3개, 각 5초 타임아웃) enriched_sources = []