"""
Скрипт для парсинга постов из Instagram и сохранения их в базу данных
Использует ту же логику, что и scheduler, но запускается напрямую
"""

import asyncio
import sys
import os
import logging
from datetime import datetime
from dotenv import load_dotenv

# Fix encoding for Windows
if sys.platform == 'win32':
    os.environ['PYTHONIOENCODING'] = 'utf-8'
    if hasattr(sys.stdout, 'reconfigure'):
        sys.stdout.reconfigure(encoding='utf-8')
        sys.stderr.reconfigure(encoding='utf-8')

load_dotenv()

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('seabot.log', encoding='utf-8'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

from src.database.db import init_db, session_scope
from src.database.models import Source, Post, PostMetrics
from src.parsers.instagram_parser import InstagramParser
from src.filters.topic_filter import TopicFilter
from src.metrics.calculator import MetricsCalculator
from src.config_loader import ConfigLoader, SourceConfig
from src.constants import DEFAULT_PARSE_DAYS, MIN_KEYWORDS_COUNT


async def parse_and_save_source(source_config: SourceConfig, max_sources: int = 10):
    """
    Парсит посты из источника и сохраняет их в базу данных
    
    Args:
        source_config: Конфигурация источника
        max_sources: Максимальное количество источников для парсинга
    """
    platform = source_config.platform
    username = source_config.username
    subscriber_count = source_config.subscriber_count
    
    # Пропускаем X.com
    if platform == 'x':
        logger.info(f"Skipping X.com @{username} - X.com parsing disabled")
        return 0, 0, 0
    
    # Парсим только Instagram
    if platform != 'instagram':
        return 0, 0, 0
    
    logger.info(f"Parsing {platform}/@{username}...")
    
    try:
        # Create parser
        parser = InstagramParser(username, subscriber_count)
        
        # Parse posts
        parsed_posts = await parser.parse_posts(days=DEFAULT_PARSE_DAYS)
        parsed_count = len(parsed_posts)
        
        # Try to get real subscriber count
        try:
            real_subscriber_count = await parser.get_subscriber_count()
            if real_subscriber_count and real_subscriber_count > 0:
                if real_subscriber_count != subscriber_count:
                    logger.info(f"  📊 Real subscriber count: {real_subscriber_count:,} (was {subscriber_count:,})")
                    subscriber_count = real_subscriber_count
                    source_config.subscriber_count = real_subscriber_count
        except Exception as e:
            logger.debug(f"  ⚠ Could not get real subscriber count: {e}")
        
        if parsed_count > 0:
            logger.info(f"  Sample post metrics: views={parsed_posts[0].views}, likes={parsed_posts[0].likes}, "
                      f"comments={parsed_posts[0].comments}, shares={parsed_posts[0].shares}")
        else:
            logger.warning(f"  ⚠ No posts parsed for {platform}/@{username}")
            return 0, 0, 0
        
        # Filter by topic
        topic_filter = TopicFilter(min_keyword_count=MIN_KEYWORDS_COUNT)
        filtered_posts = topic_filter.filter_posts(parsed_posts)
        filtered_count = len(filtered_posts)
        
        # Log why posts were filtered out
        if parsed_count > 0 and filtered_count == 0:
            logger.warning(f"  ⚠ All {parsed_count} posts were filtered out by topic filter!")
            if parsed_posts:
                sample = parsed_posts[0]
                logger.warning(f"  Sample post content: {sample.content[:100] if sample.content else 'No content'}...")
                matching_keywords = topic_filter.get_matching_keywords(sample.content)
                logger.warning(f"  Matching keywords: {matching_keywords}")
        
        logger.info(f"  Parsed: {parsed_count}, After filter: {filtered_count}")
        
        # Save to database
        saved_count = await save_posts(source_config, filtered_posts)
        
        if saved_count > 0:
            logger.info(f"  ✓ Saved {saved_count} posts to database")
        
        return parsed_count, filtered_count, saved_count
        
    except Exception as e:
        logger.error(f"  ✗ Parsing failed for {platform}/@{username}: {e}")
        import traceback
        logger.error(f"  Traceback: {traceback.format_exc()}")
        return 0, 0, 0


async def save_posts(source_config: SourceConfig, posts: list) -> int:
    """
    Save posts to database and calculate metrics
    Использует ту же логику, что и scheduler
    """
    if not posts:
        return 0
    
    saved_count = 0
    
    with session_scope() as session:
        # Get or create source
        source = session.query(Source).filter_by(
            platform=source_config.platform,
            identifier=source_config.username
        ).first()
        
        if not source:
            source = session.query(Source).filter_by(
                platform=source_config.platform,
                username=source_config.username
            ).first()
        
        if not source:
            source = Source(
                platform=source_config.platform,
                username=source_config.username,
                identifier=source_config.username,
                subscriber_count=source_config.subscriber_count,
                followers_count=source_config.subscriber_count,
                is_active=1
            )
            session.add(source)
            session.flush()
            logger.info(f"  ✓ Created source: {source_config.platform}/@{source_config.username}")
        else:
            # Update subscriber count
            new_subscriber_count = source_config.subscriber_count
            if new_subscriber_count != source.subscriber_count:
                logger.info(f"  📊 Updating subscriber count: {source.subscriber_count:,} -> {new_subscriber_count:,}")
                source.subscriber_count = new_subscriber_count
                source.followers_count = new_subscriber_count
            source.last_updated = datetime.utcnow()
        
        # Process each post
        for parsed_post in posts:
            # Check if post already exists
            existing_post = session.query(Post).filter_by(
                source_id=source.id,
                external_id=parsed_post.external_id
            ).first()
            
            if existing_post:
                # Update existing post metrics
                updated = False
                has_any_metrics = (parsed_post.views > 0 or parsed_post.likes > 0 or 
                                  parsed_post.comments > 0 or parsed_post.shares > 0)
                existing_has_no_metrics = (existing_post.views == 0 and existing_post.likes == 0 and 
                                           existing_post.comments == 0 and existing_post.shares == 0)
                
                if existing_has_no_metrics and has_any_metrics:
                    existing_post.views = parsed_post.views
                    existing_post.likes = parsed_post.likes
                    existing_post.comments = parsed_post.comments
                    existing_post.shares = parsed_post.shares
                    updated = True
                else:
                    if parsed_post.views > existing_post.views:
                        existing_post.views = parsed_post.views
                        updated = True
                    if parsed_post.likes > existing_post.likes:
                        existing_post.likes = parsed_post.likes
                        updated = True
                    if parsed_post.comments > existing_post.comments:
                        existing_post.comments = parsed_post.comments
                        updated = True
                    if parsed_post.shares > existing_post.shares:
                        existing_post.shares = parsed_post.shares
                        updated = True
                
                if parsed_post.content and parsed_post.content != existing_post.content:
                    existing_post.content = parsed_post.content
                if parsed_post.url and parsed_post.url != existing_post.url:
                    existing_post.url = parsed_post.url
                
                existing_post.parsed_at = datetime.utcnow()
                post = existing_post
                
                if updated:
                    logger.info(f"  ↻ Updated post {post.external_id}: views={post.views}, likes={post.likes}, comments={post.comments}")
            else:
                # Create new post
                post = Post(
                    source_id=source.id,
                    external_id=parsed_post.external_id,
                    content=parsed_post.content,
                    url=parsed_post.url,
                    views=parsed_post.views,
                    reactions=parsed_post.reactions,
                    likes=parsed_post.likes,
                    comments=parsed_post.comments,
                    shares=parsed_post.shares,
                    created_at=parsed_post.created_at,
                    parsed_at=datetime.utcnow()
                )
                session.add(post)
                session.flush()
                saved_count += 1
                logger.info(f"  ✓ Saved new post {post.external_id}: views={post.views}, likes={post.likes}, comments={post.comments}")
            
            # Calculate and save metrics
            try:
                metrics = MetricsCalculator.calculate_post_metrics(post, source)
                
                if post.metrics:
                    post.metrics.engagement_rate = metrics.engagement_rate
                    post.metrics.normalized_score = metrics.normalized_score
                    post.metrics.subscriber_ratio = metrics.subscriber_ratio
                    post.metrics.total_engagement = metrics.total_engagement
                    post.metrics.calculated_at = datetime.utcnow()
                else:
                    metrics.post_id = post.id
                    session.add(metrics)
            except Exception as e:
                logger.error(f"  ✗ Error calculating metrics for post {post.external_id}: {e}")
    
    logger.info(f"  Saved: {saved_count} new posts")
    return saved_count


async def main():
    """Главная функция"""
    print("="*60)
    print("ПАРСИНГ INSTAGRAM И СОХРАНЕНИЕ В БАЗУ ДАННЫХ")
    print("="*60)
    print(f"Время запуска: {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}")
    print()
    
    # Initialize database
    if not init_db():
        print("❌ Ошибка инициализации базы данных")
        return
    
    # Load sources from config
    try:
        config_loader = ConfigLoader('config/sources.yaml')
        sources = config_loader.load()
        stats = config_loader.get_stats()
        
        print(f"📋 Загружено источников:")
        print(f"   - Instagram: {stats['instagram']}")
        print(f"   - Всего: {stats['total']}")
        print()
        
    except Exception as e:
        logger.error(f"❌ Ошибка загрузки конфигурации: {e}")
        return
    
    if not sources:
        print("⚠️ Нет источников для парсинга")
        return
    
    # Parse only Instagram sources (first 10 for testing)
    instagram_sources = [s for s in sources if s.platform == 'instagram'][:10]
    
    print(f"📷 Парсинг {len(instagram_sources)} Instagram источников...")
    print()
    
    total_parsed = 0
    total_filtered = 0
    total_saved = 0
    
    for i, source_config in enumerate(instagram_sources, 1):
        print(f"[{i}/{len(instagram_sources)}] Обработка @{source_config.username}...")
        
        parsed, filtered, saved = await parse_and_save_source(source_config)
        total_parsed += parsed
        total_filtered += filtered
        total_saved += saved
        
        # Delay between sources to avoid rate limiting
        if i < len(instagram_sources):
            await asyncio.sleep(3)
    
    print()
    print("="*60)
    print("РЕЗУЛЬТАТЫ ПАРСИНГА")
    print("="*60)
    print(f"📊 Всего спарсено: {total_parsed}")
    print(f"✅ Прошло фильтр: {total_filtered}")
    print(f"💾 Сохранено новых: {total_saved}")
    print()
    print("Теперь запустите /top в боте, чтобы увидеть посты!")


if __name__ == '__main__':
    asyncio.run(main())
