""" 분석봇 (analytics_bot.py) 역할: 블로그 성과 데이터 수집 및 리포트 생성 5대 핵심 지표: 1. 색인률 (Search Console) 2. 검색 CTR (Search Console) 3. 발행 후 14일 성과 4. 어필리에이트 클릭률 (수동 입력) 5. 체류시간 (Blogger 통계) """ import json import logging import os import re from datetime import datetime, timedelta, timezone from pathlib import Path import requests from dotenv import load_dotenv from google.oauth2.credentials import Credentials from google.auth.transport.requests import Request from googleapiclient.discovery import build load_dotenv() BASE_DIR = Path(__file__).parent.parent DATA_DIR = BASE_DIR / 'data' LOG_DIR = BASE_DIR / 'logs' TOKEN_PATH = BASE_DIR / 'token.json' LOG_DIR.mkdir(exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.FileHandler(LOG_DIR / 'analytics.log', encoding='utf-8'), logging.StreamHandler(), ] ) logger = logging.getLogger(__name__) TELEGRAM_BOT_TOKEN = os.getenv('TELEGRAM_BOT_TOKEN', '') TELEGRAM_CHAT_ID = os.getenv('TELEGRAM_CHAT_ID', '') BLOG_MAIN_ID = os.getenv('BLOG_MAIN_ID', '') SCOPES = [ 'https://www.googleapis.com/auth/blogger.readonly', 'https://www.googleapis.com/auth/webmasters.readonly', ] def get_google_credentials() -> Credentials: creds = None if TOKEN_PATH.exists(): creds = Credentials.from_authorized_user_file(str(TOKEN_PATH), SCOPES) if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: creds.refresh(Request()) with open(TOKEN_PATH, 'w') as f: f.write(creds.to_json()) return creds def load_published_records() -> list[dict]: """발행 이력 전체 로드""" records = [] published_dir = DATA_DIR / 'published' for f in published_dir.glob('*.json'): try: records.append(json.loads(f.read_text(encoding='utf-8'))) except Exception: pass return sorted(records, key=lambda x: x.get('published_at', ''), reverse=True) def send_telegram(text: str): if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID: logger.warning("Telegram 설정 없음") print(text) return url = f'https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage' try: requests.post(url, json={ 'chat_id': TELEGRAM_CHAT_ID, 'text': text, 'parse_mode': 'HTML', }, timeout=10) except Exception as e: logger.error(f"Telegram 전송 실패: {e}") # ─── Search Console 데이터 ──────────────────────────── def get_search_console_data(site_url: str, start_date: str, end_date: str, creds: Credentials) -> dict: """Search Console API로 검색 성과 조회""" try: service = build('searchconsole', 'v1', credentials=creds) request_body = { 'startDate': start_date, 'endDate': end_date, 'dimensions': ['page'], 'rowLimit': 1000, } resp = service.searchanalytics().query( siteUrl=site_url, body=request_body ).execute() return resp except Exception as e: logger.warning(f"Search Console API 오류: {e}") return {} def calc_index_rate(published_records: list[dict], sc_data: dict) -> float: """색인률 계산: 발행 글 중 Search Console에 데이터가 있는 비율""" if not published_records: return 0.0 sc_urls = set() for row in sc_data.get('rows', []): sc_urls.add(row.get('keys', [''])[0]) indexed = sum(1 for r in published_records if r.get('url', '') in sc_urls) return round(indexed / len(published_records) * 100, 1) def calc_average_ctr(sc_data: dict) -> float: """평균 CTR 계산""" rows = sc_data.get('rows', []) if not rows: return 0.0 total_clicks = sum(r.get('clicks', 0) for r in rows) total_impressions = sum(r.get('impressions', 0) for r in rows) if total_impressions == 0: return 0.0 return round(total_clicks / total_impressions * 100, 2) def get_14day_performance(published_records: list[dict], sc_data: dict) -> list[dict]: """발행 후 14일 경과한 글들의 성과""" now = datetime.now(timezone.utc) cutoff = now - timedelta(days=14) sc_rows_by_url = {} for row in sc_data.get('rows', []): url = row.get('keys', [''])[0] sc_rows_by_url[url] = row results = [] for record in published_records: pub_str = record.get('published_at', '') try: pub_dt = datetime.fromisoformat(pub_str) if pub_dt.tzinfo is None: pub_dt = pub_dt.replace(tzinfo=timezone.utc) except Exception: continue if pub_dt > cutoff: continue # 14일 미경과 url = record.get('url', '') sc_row = sc_rows_by_url.get(url, {}) clicks = sc_row.get('clicks', 0) impressions = sc_row.get('impressions', 0) results.append({ 'title': record.get('title', ''), 'corner': record.get('corner', ''), 'published_at': pub_str, 'clicks_14d': clicks, 'impressions_14d': impressions, 'url': url, }) return results # ─── 리포트 생성 ────────────────────────────────────── def format_daily_report( today_published: list[dict], index_rate: float, avg_ctr: float, total_published: int, ) -> str: today_str = datetime.now().strftime('%Y-%m-%d') today_count = len(today_published) today_titles = '\n'.join( f" • [{r.get('corner', '')}] {r.get('title', '')}" for r in today_published ) return ( f"📊 일일 리포트 — {today_str}\n\n" f"📝 오늘 발행: {today_count}개\n" f"{today_titles}\n\n" f"📈 누적 발행: {total_published}개\n" f"🔍 색인률: {index_rate}%\n" f"🖱 평균 CTR: {avg_ctr}%\n\n" f"Phase 1 목표: 색인률 80%+, CTR 3%+" ) def format_weekly_report( index_rate: float, avg_ctr: float, by_corner: dict, low_performers: list[dict], ) -> str: today_str = datetime.now().strftime('%Y-%m-%d') corner_lines = '\n'.join( f" • {corner}: {count}개" for corner, count in by_corner.items() ) low_lines = '\n'.join( f" ⚠ {r['title']} (클릭 {r['clicks_14d']}회)" for r in low_performers[:5] ) or ' 없음' return ( f"📊 주간 리포트 — {today_str}\n\n" f"🔍 색인률: {index_rate}%\n" f"🖱 평균 CTR: {avg_ctr}%\n\n" f"📁 코너별 발행 수:\n{corner_lines}\n\n" f"⚠ 14일 성과 부진 글 (클릭 0):\n{low_lines}\n\n" f"💡 피드백 루프 적용 완료 → 다음 주 글감 조정" ) def save_analytics(data: dict, filename: str): analytics_dir = DATA_DIR / 'analytics' analytics_dir.mkdir(exist_ok=True) with open(analytics_dir / filename, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) def generate_feedback_json(index_rate: float, avg_ctr: float, low_performers: list[dict], by_corner: dict) -> dict: """수집봇에 피드백할 데이터 생성""" feedback = { 'generated_at': datetime.now().isoformat(), 'metrics': { 'index_rate': index_rate, 'avg_ctr': avg_ctr, }, 'adjustments': [], } if index_rate < 50: feedback['adjustments'].append({ 'type': 'warning', 'message': '색인률 50% 미만 — 글 구조/Schema 점검 필요', }) if avg_ctr < 1: feedback['adjustments'].append({ 'type': 'title_meta', 'message': 'CTR 1% 미만 — 제목/메타 설명 스타일 변경 권고', }) # 성과 좋은 코너 확대 max_corner = max(by_corner, key=by_corner.get) if by_corner else None if max_corner: feedback['adjustments'].append({ 'type': 'corner_boost', 'corner': max_corner, 'message': f'{max_corner} 코너 성과 우수 — 비율 확대 권고', }) # 14일 성과 0인 글감 유형 축소 if low_performers: bad_corners = list({r['corner'] for r in low_performers if r['clicks_14d'] == 0}) for corner in bad_corners: feedback['adjustments'].append({ 'type': 'corner_reduce', 'corner': corner, 'message': f'{corner} 코너 14일 성과 부진 — 주제 유형 축소 권고', }) return feedback # ─── 메인 실행 ─────────────────────────────────────── def daily_report(): """일일 리포트 생성 및 Telegram 전송""" logger.info("=== 분석봇 일일 리포트 시작 ===") published_records = load_published_records() # 오늘 발행 글 today_str = datetime.now().strftime('%Y-%m-%d') today_published = [ r for r in published_records if r.get('published_at', '').startswith(today_str) ] # Search Console 데이터 (최근 7일) sc_data = {} try: creds = get_google_credentials() if creds and creds.valid: end_date = datetime.now().strftime('%Y-%m-%d') start_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') # site_url은 블로그 URL (예: https://techinsider-kr.blogspot.com/) # 설정에서 읽어오거나 환경변수로 관리 site_url = os.getenv('BLOG_SITE_URL', '') if site_url: sc_data = get_search_console_data(site_url, start_date, end_date, creds) except Exception as e: logger.warning(f"Search Console 조회 실패: {e}") index_rate = calc_index_rate(published_records, sc_data) avg_ctr = calc_average_ctr(sc_data) report_text = format_daily_report( today_published, index_rate, avg_ctr, len(published_records) ) send_telegram(report_text) # 저장 save_analytics({ 'date': today_str, 'today_published': len(today_published), 'total_published': len(published_records), 'index_rate': index_rate, 'avg_ctr': avg_ctr, }, f'{today_str}_daily.json') logger.info("=== 분석봇 일일 리포트 완료 ===") def weekly_report(): """주간 리포트 생성 및 Telegram 전송""" logger.info("=== 분석봇 주간 리포트 시작 ===") published_records = load_published_records() # Search Console 데이터 (최근 28일) sc_data = {} try: creds = get_google_credentials() if creds and creds.valid: end_date = datetime.now().strftime('%Y-%m-%d') start_date = (datetime.now() - timedelta(days=28)).strftime('%Y-%m-%d') site_url = os.getenv('BLOG_SITE_URL', '') if site_url: sc_data = get_search_console_data(site_url, start_date, end_date, creds) except Exception as e: logger.warning(f"Search Console 조회 실패: {e}") index_rate = calc_index_rate(published_records, sc_data) avg_ctr = calc_average_ctr(sc_data) perf_14d = get_14day_performance(published_records, sc_data) # 코너별 발행 수 by_corner: dict[str, int] = {} for r in published_records: corner = r.get('corner', '기타') by_corner[corner] = by_corner.get(corner, 0) + 1 # 14일 성과 부진 글 low_performers = [r for r in perf_14d if r['clicks_14d'] == 0] report_text = format_weekly_report(index_rate, avg_ctr, by_corner, low_performers) send_telegram(report_text) # 피드백 JSON 생성 feedback = generate_feedback_json(index_rate, avg_ctr, low_performers, by_corner) save_analytics(feedback, f"{datetime.now().strftime('%Y%m%d')}_feedback.json") logger.info("=== 분석봇 주간 리포트 완료 ===") return feedback if __name__ == '__main__': import sys if len(sys.argv) > 1 and sys.argv[1] == 'weekly': weekly_report() else: daily_report()