#!/usr/bin/env python3
"""
АГРЕССИВНЫЙ ПОИСК СОВМЕСТИМОСТИ
"""

import requests
import mysql.connector
from bs4 import BeautifulSoup
import re
import time
import sys

DB_CONFIG = {
    'host': 'localhost',
    'user': 'catalog_user',
    'password': 'Catalog2026',
    'database': 'cartridge_catalog'
}

HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}

def extract_compatibility_aggressive(soup, title):
    """Агрессивный поиск совместимости"""
    
    all_text = soup.get_text()
    
    # 1. Ищем в заголовке
    compatibility = ""
    compatible_models = []
    
    # Паттерны для принтеров в названии
    title_patterns = [
        r'для ([^,\.]*(?:принтер|printer|laserjet|deskjet|pixma|officejet)[^,\.]*)',
        r'для ([^,\.]*(?:HP|Canon|Brother|Xerox|Epson|Samsung)[^,\.]*)',
        r'\(для ([^)]+)\)',
        r'\/\s*([^\/]*принтер[^\/]*)'
    ]
    
    for pattern in title_patterns:
        match = re.search(pattern, title, re.IGNORECASE)
        if match:
            compatibility = match.group(1).strip()
            break
    
    # 2. Ищем во всем тексте страницы
    if not compatibility:
        # Паттерны совместимости во всем тексте
        text_patterns = [
            r'Совместимость[:\s]+([^\n\.]+)',
            r'Подходит для[:\s]+([^\n\.]+)',
            r'Для принтеров[:\s]+([^\n\.]+)',
            r'Для моделей[:\s]+([^\n\.]+)',
            r'Используется в[:\s]+([^\n\.]+)',
            r'Работает с[:\s]+([^\n\.]+)',
            r'Применение[:\s]+([^\n\.]+)'
        ]
        
        for pattern in text_patterns:
            matches = re.findall(pattern, all_text, re.IGNORECASE)
            if matches:
                compatibility = '. '.join(matches)[:300]
                break
    
    # 3. Если все еще не нашли, пытаемся извлечь из характеристик
    if not compatibility:
        # Ищем таблицы
        tables = soup.find_all('table')
        for table in tables:
            rows = table.find_all('tr')
            for row in rows:
                cells = row.find_all(['td', 'th'])
                if len(cells) >= 2:
                    label = cells[0].get_text().strip().lower()
                    value = cells[1].get_text().strip()
                    
                    keywords = ['совместимость', 'compatibility', 'подходит', 'для', 'принтер', 'модель']
                    if any(keyword in label for keyword in keywords):
                        compatibility = value
                        break
            if compatibility:
                break
    
    # 4. Извлекаем конкретные модели принтеров
    # Паттерны для моделей принтеров
    printer_patterns = [
        r'\b(?:HP|Hewlett[\s-]?Packard)[\s-]([A-Za-z]+\s*\d+\w*)\b',  # HP LaserJet 1018
        r'\b(Canon)[\s-]([A-Za-z]+\s*\d+\w*)\b',                      # Canon Pixma MG2540
        r'\b(Brother)[\s-]([A-Za-z]+\s*\d+\w*)\b',                    # Brother DCP-T420
        r'\b(Xerox)[\s-]([A-Za-z]+\s*\d+\w*)\b',                      # Xerox Phaser 3020
        r'\b(Epson)[\s-]([A-Za-z]+\s*\d+\w*)\b',                      # Epson L805
        r'\b(Samsung)[\s-]([A-Za-z]+\s*\d+\w*)\b',                    # Samsung ML-2165
        r'\b(LaserJet\s*\d+\w*)\b',                                   # LaserJet 1018
        r'\b(Deskjet\s*\d+\w*)\b',                                    # Deskjet 2130
        r'\b(Pixma\s*\d+\w*)\b',                                      # Pixma MG2540
        r'\b(OfficeJet\s*\d+\w*)\b',                                  # OfficeJet 3830
    ]
    
    found_models = []
    search_text = (title + " " + compatibility + " " + all_text).lower()
    
    for pattern in printer_patterns:
        matches = re.findall(pattern, search_text, re.IGNORECASE)
        for match in matches:
            if isinstance(match, tuple):
                model = ' '.join(filter(None, match)).strip()
            else:
                model = match.strip()
            
            if model and len(model) > 3 and model not in found_models:
                found_models.append(model)
    
    # Уникальные модели
    if found_models:
        compatible_with = ', '.join(found_models[:15])  # Ограничиваем
    else:
        compatible_with = ""
    
    # 5. Если совместимость не найдена, создаем из модели картриджа
    if not compatibility and 'совместим' not in title.lower():
        # Пытаемся определить по модели картриджа
        cartridge_models = {
            'CE285A': 'Для HP LaserJet Pro MFP M426-M429, M428-M431',
            '410X': 'Для HP LaserJet Pro M402-M404, M425-M428',
            'Q2612A': 'Для HP LaserJet 1018, 1020, 1022, P1005, P1006',
            'TN-2310': 'Для Brother HL-1112, HL-1110, HL-1202',
            'CF280A': 'Для Canon i-SENSYS LBP-1120, 1130, 2900',
            'CLI-526': 'Для Canon Pixma MG2440, MG2540, MG2940',
            'PG-540': 'Для Canon Pixma iP2840, iP2845, MG2440',
            'LC-223': 'Для Brother DCP-T300, T500, T700'
        }
        
        for model_pattern, compat_text in cartridge_models.items():
            if model_pattern.lower() in title.lower():
                compatibility = compat_text
                break
    
    return compatibility[:500], compatible_with[:300]

def update_all_products():
    """Обновляет все товары"""
    print("🔍 АГРЕССИВНЫЙ ПОИСК СОВМЕСТИМОСТИ")
    print("=" * 60)
    
    conn = mysql.connector.connect(**DB_CONFIG)
    cursor = conn.cursor()
    
    # Берем ВСЕ товары
    cursor.execute("SELECT id, source_url, title FROM cartridges ORDER BY id")
    products = cursor.fetchall()
    
    print(f"📦 Всего товаров: {len(products)}")
    
    session = requests.Session()
    session.headers.update(HEADERS)
    
    updated = 0
    
    for i, (product_id, url, title) in enumerate(products, 1):
        print(f"\n[{i}/{len(products)}] {title[:50]}...")
        
        try:
            time.sleep(1)
            resp = session.get(url, timeout=10)
            if resp.status_code != 200:
                print("   ⚠️ HTTP ошибка")
                continue
            
            soup = BeautifulSoup(resp.text, 'html.parser')
            
            # Ищем совместимость агрессивно
            compatibility, compatible_with = extract_compatibility_aggressive(soup, title)
            
            if compatibility:
                cursor.execute("""
                    UPDATE cartridges 
                    SET compatibility = %s, 
                        compatible_with = %s,
                        updated_at = CURRENT_TIMESTAMP
                    WHERE id = %s
                """, (compatibility, compatible_with, product_id))
                
                updated += 1
                print(f"   ✅ Найдена: {compatibility[:80]}...")
                if compatible_with:
                    print(f"      Модели: {compatible_with}")
            else:
                # Ставим заглушку
                cursor.execute("""
                    UPDATE cartridges 
                    SET compatibility = 'Совместимость уточняйте у производителя',
                        updated_at = CURRENT_TIMESTAMP
                    WHERE id = %s
                """, (product_id,))
                print("   ⚠️ Не найдено, установлена заглушка")
            
        except Exception as e:
            print(f"   ❌ Ошибка: {e}")
            continue
    
    conn.commit()
    
    cursor.execute("""
        INSERT INTO parser_logs (parser_name, action, items_count, status, message)
        VALUES ('compatibility_aggressive', 'update', %s, 'success', 'Агрессивный поиск совместимости')
    """, (updated,))
    conn.commit()
    
    cursor.close()
    conn.close()
    
    print(f"\n{'='*60}")
    print(f"📊 Обновлено: {updated}/{len(products)} товаров")
    print("✅ Готово!")

if __name__ == "__main__":
    update_all_products()