diff --git a/bots/scheduler.py b/bots/scheduler.py index c524fb3..7275817 100644 --- a/bots/scheduler.py +++ b/bots/scheduler.py @@ -718,6 +718,159 @@ async def cmd_write(update: Update, context: ContextTypes.DEFAULT_TYPE): await update.message.reply_text(f"❌ 글 작성 오류: {e}") +async def cmd_topic(update: Update, context: ContextTypes.DEFAULT_TYPE): + """URL을 글감으로 등록: /topic [카테고리]""" + args = context.args + if not args: + await update.message.reply_text( + "사용법: /topic [카테고리]\n" + "예: /topic https://example.com/article\n" + "예: /topic https://example.com/article AI인사이트" + ) + return + + url = args[0] + if not url.startswith('http'): + await update.message.reply_text("❌ 유효한 URL을 입력하세요. (http로 시작)") + return + + VALID_CORNERS = {"AI인사이트", "여행맛집", "스타트업", "TV로보는세상", "제품리뷰", "생활꿀팁", "건강정보", "재테크", "팩트체크"} + corner = '' + if len(args) > 1 and args[1] in VALID_CORNERS: + corner = args[1] + + await update.message.reply_text(f"🔍 기사 분석 중...\n{url[:80]}") + + loop = asyncio.get_event_loop() + try: + topic_data = await loop.run_in_executor(None, _crawl_url_to_topic, url, corner) + except Exception as e: + await update.message.reply_text(f"❌ 크롤링 실패: {e}") + return + + # topics 폴더에 저장 + topics_dir = DATA_DIR / 'topics' + topics_dir.mkdir(parents=True, exist_ok=True) + today = datetime.now().strftime('%Y%m%d') + ts = datetime.now().strftime('%H%M%S') + filename = f"{today}_{ts}_manual.json" + topic_path = topics_dir / filename + topic_path.write_text(json.dumps(topic_data, ensure_ascii=False, indent=2), encoding='utf-8') + + # 오늘 글감 목록에서 몇 번인지 확인 + all_files = sorted(topics_dir.glob(f'{today}_*.json')) + idx = next((i for i, f in enumerate(all_files, 1) if f.name == filename), len(all_files)) + + corner_display = topic_data.get('corner', '미지정') + await update.message.reply_text( + f"✅ 글감 등록 완료! (#{idx})\n\n" + f"제목: {topic_data.get('topic', '')[:60]}\n" + f"카테고리: {corner_display}\n" + f"출처: {topic_data.get('source_name', '')}\n\n" + f"👉 /write {idx} 로 바로 글 작성 가능\n" + f"👉 /write {idx} AI인사이트 로 카테고리 변경 발행 가능" + ) + + +def _crawl_url_to_topic(url: str, corner: str = '') -> dict: + """URL을 크롤링해서 topic_data 형태로 변환""" + import requests + from bs4 import BeautifulSoup + + # Google 뉴스 URL이면 실제 기사로 리다이렉트 + if 'news.google.com' in url: + try: + resp = requests.head(url, timeout=10, allow_redirects=True, + headers={'User-Agent': 'Mozilla/5.0'}) + if resp.url and 'news.google.com' not in resp.url: + url = resp.url + except Exception: + pass + + resp = requests.get(url, timeout=15, headers={ + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36', + }) + resp.raise_for_status() + soup = BeautifulSoup(resp.text, 'lxml') + + # 제목 추출 + title = '' + og_title = soup.find('meta', property='og:title') + if og_title and og_title.get('content'): + title = og_title['content'].strip() + if not title: + title_tag = soup.find('title') + title = title_tag.text.strip() if title_tag else url + + # 설명 추출 + description = '' + og_desc = soup.find('meta', property='og:description') + if og_desc and og_desc.get('content'): + description = og_desc['content'].strip() + if not description: + meta_desc = soup.find('meta', attrs={'name': 'description'}) + description = meta_desc['content'].strip() if meta_desc and meta_desc.get('content') else '' + + # 이미지 추출 + image_url = '' + og_img = soup.find('meta', property='og:image') + if og_img and og_img.get('content', '').startswith('http'): + image_url = og_img['content'] + + # 사이트명 추출 + site_name = '' + og_site = soup.find('meta', property='og:site_name') + if og_site and og_site.get('content'): + site_name = og_site['content'].strip() + if not site_name: + from urllib.parse import urlparse + site_name = urlparse(url).netloc + + # 카테고리 자동 판별 (미지정 시) + if not corner: + corner = _guess_corner(title, description) + + return { + 'topic': title, + 'description': description[:300], + 'source': 'manual', + 'source_name': site_name, + 'source_url': url, + 'published_at': datetime.now().strftime('%Y-%m-%d'), + 'corner': corner, + 'quality_score': 90, + 'search_demand_score': 8, + 'topic_type': 'trending', + 'source_image': image_url, + 'is_english': not any('\uAC00' <= c <= '\uD7A3' for c in title), + 'sources': [{'url': url, 'title': title, 'date': datetime.now().strftime('%Y-%m-%d')}], + } + + +def _guess_corner(title: str, description: str) -> str: + """제목+설명으로 카테고리 자동 추정""" + text = (title + ' ' + description).lower() + corner_keywords = { + 'AI인사이트': ['ai', '인공지능', 'chatgpt', 'claude', 'gemini', '딥러닝', '머신러닝', 'llm', 'gpt'], + '스타트업': ['스타트업', '투자', '유치', 'vc', '창업', '엑셀러레이터', 'series'], + '여행맛집': ['여행', '맛집', '관광', '호텔', '항공', '카페', '맛있'], + 'TV로보는세상': ['드라마', '예능', '방송', '시청률', '넷플릭스', '출연', '배우'], + '제품리뷰': ['리뷰', '출시', '스펙', '성능', '가격', '아이폰', '갤럭시', '제품'], + '생활꿀팁': ['꿀팁', '절약', '생활', '정리', '청소', '인테리어'], + '건강정보': ['건강', '의료', '병원', '치료', '질환', '운동', '다이어트', '영양'], + '재테크': ['주식', '부동산', '금리', '투자', '적금', '연금', '재테크', '경제'], + '팩트체크': ['팩트체크', '가짜뉴스', '확인', '사실', '검증'], + } + best_corner = 'AI인사이트' + best_score = 0 + for c, keywords in corner_keywords.items(): + score = sum(1 for k in keywords if k in text) + if score > best_score: + best_score = score + best_corner = c + return best_corner + + async def cmd_show_topics(update: Update, context: ContextTypes.DEFAULT_TYPE): topics_dir = DATA_DIR / 'topics' today = datetime.now().strftime('%Y%m%d') @@ -1324,6 +1477,7 @@ async def main(): app.add_handler(CommandHandler('pending', cmd_pending)) app.add_handler(CallbackQueryHandler(callback_approve_reject, pattern=r'^(approve|reject):')) app.add_handler(CommandHandler('report', cmd_report)) + app.add_handler(CommandHandler('topic', cmd_topic)) app.add_handler(CommandHandler('topics', cmd_show_topics)) app.add_handler(CommandHandler('convert', cmd_convert))