<?php
/**
 * MisterTranslate Language Detection & Re-translation Class
 * 
 * Handles:
 * - Source language detection (minimal token usage)
 * - Tracking original content for re-translation
 * - Change detection
 * 
 * @package MisterTranslate
 * @since 4.2.0
 */

if (!defined('ABSPATH')) {
    exit;
}

class MTP_Language_Detector {
    
    /** @var array Common words by language for local detection */
    private static $language_patterns = array(
        'en' => array('the', 'and', 'is', 'are', 'was', 'were', 'have', 'has', 'with', 'for', 'this', 'that', 'from', 'your', 'will'),
        'es' => array('el', 'la', 'los', 'las', 'de', 'del', 'en', 'que', 'por', 'para', 'con', 'una', 'uno', 'esto', 'esta', 'como'),
        'fr' => array('le', 'la', 'les', 'de', 'du', 'des', 'un', 'une', 'et', 'en', 'que', 'pour', 'avec', 'dans', 'est', 'sont'),
        'de' => array('der', 'die', 'das', 'und', 'ist', 'sind', 'ein', 'eine', 'von', 'mit', 'für', 'auf', 'dem', 'den', 'nicht'),
        'it' => array('il', 'la', 'lo', 'le', 'di', 'da', 'in', 'che', 'per', 'con', 'una', 'uno', 'del', 'della', 'sono', 'essere'),
        'pt' => array('o', 'a', 'os', 'as', 'de', 'do', 'da', 'em', 'que', 'para', 'com', 'uma', 'um', 'por', 'não', 'mais'),
        'ca' => array('el', 'la', 'els', 'les', 'de', 'del', 'en', 'que', 'per', 'amb', 'una', 'un', 'són', 'està', 'com'),
        'nl' => array('de', 'het', 'een', 'van', 'en', 'in', 'op', 'met', 'voor', 'dat', 'zijn', 'te', 'naar', 'ook', 'niet'),
        'ru' => array('и', 'в', 'не', 'на', 'с', 'что', 'как', 'это', 'для', 'по', 'из', 'от', 'то', 'за', 'все'),
        'zh' => array('的', '是', '在', '了', '和', '有', '我', '他', '这', '为', '不', '你', '就', '人', '都'),
        'ja' => array('の', 'に', 'は', 'を', 'た', 'が', 'で', 'て', 'と', 'し', 'れ', 'さ', 'ある', 'いる', 'も'),
        'ko' => array('이', '가', '은', '는', '을', '를', '에', '의', '로', '와', '과', '도', '하다', '있다', '되다'),
        'ar' => array('في', 'من', 'على', 'إلى', 'أن', 'هذا', 'التي', 'الذي', 'كان', 'لا', 'ما', 'هو', 'مع', 'عن'),
        'pl' => array('i', 'w', 'na', 'do', 'z', 'że', 'to', 'nie', 'się', 'jest', 'o', 'jak', 'ale', 'po', 'tak'),
        'sv' => array('och', 'i', 'att', 'en', 'det', 'som', 'på', 'är', 'av', 'för', 'med', 'till', 'den', 'har', 'de')
    );
    
    /** @var array Language names */
    private static $language_names = array(
        'en' => 'English', 'es' => 'Spanish', 'fr' => 'French',
        'de' => 'German', 'it' => 'Italian', 'pt' => 'Portuguese',
        'ca' => 'Catalan', 'nl' => 'Dutch', 'ru' => 'Russian',
        'zh' => 'Chinese', 'ja' => 'Japanese', 'ko' => 'Korean',
        'ar' => 'Arabic', 'pl' => 'Polish', 'sv' => 'Swedish'
    );
    
    /**
     * Detect source language from content (LOCAL - no API call)
     * Uses word frequency analysis
     * 
     * @param string $content Content to analyze
     * @return array ['code' => string, 'name' => string, 'confidence' => float]
     */
    public static function detect($content) {
        // Clean content
        $content = strip_tags($content);
        $content = preg_replace('/\[[^\]]+\]/', '', $content); // Remove shortcodes
        $content = strtolower($content);
        
        // Get words
        preg_match_all('/[\p{L}]+/u', $content, $matches);
        $words = $matches[0];
        
        if (empty($words)) {
            return array(
                'code' => 'en',
                'name' => 'English',
                'confidence' => 0
            );
        }
        
        $word_count = count($words);
        $word_freq = array_count_values($words);
        
        // Score each language
        $scores = array();
        
        foreach (self::$language_patterns as $lang => $patterns) {
            $score = 0;
            foreach ($patterns as $pattern) {
                if (isset($word_freq[$pattern])) {
                    $score += $word_freq[$pattern];
                }
            }
            $scores[$lang] = $score;
        }
        
        // Find best match
        arsort($scores);
        $best_lang = key($scores);
        $best_score = current($scores);
        
        // Calculate confidence (0-1)
        $confidence = min(1, $best_score / max(1, $word_count * 0.1));
        
        return array(
            'code' => $best_lang,
            'name' => isset(self::$language_names[$best_lang]) ? self::$language_names[$best_lang] : $best_lang,
            'confidence' => round($confidence, 2)
        );
    }
    
    /**
     * Get language name from code
     * 
     * @param string $code Language code
     * @return string Language name
     */
    public static function get_language_name($code) {
        return isset(self::$language_names[$code]) ? self::$language_names[$code] : ucfirst($code);
    }
}

/**
 * Translation Tracker Class
 * Manages original content tracking for re-translation
 */
class MTP_Translation_Tracker {
    
    /** @var string Meta key for source content hash */
    const META_SOURCE_HASH = '_mtp_source_content_hash';
    
    /** @var string Meta key for source post ID */
    const META_SOURCE_POST = '_mtp_source_page';
    
    /** @var string Meta key for translation date */
    const META_TRANSLATED_DATE = '_mtp_translated_date';
    
    /** @var string Meta key for target language */
    const META_TARGET_LANG = '_mtp_target_lang';
    
    /**
     * Store source content hash when creating translation
     * 
     * @param int $translated_id Translated post ID
     * @param int $source_id Source post ID
     * @param string $source_content Original content
     */
    public static function store_source_hash($translated_id, $source_id, $source_content) {
        $hash = self::generate_content_hash($source_content);
        update_post_meta($translated_id, self::META_SOURCE_HASH, $hash);
    }
    
    /**
     * Generate content hash
     * 
     * @param string $content Content to hash
     * @return string MD5 hash
     */
    public static function generate_content_hash($content) {
        // Normalize content before hashing
        $content = preg_replace('/\s+/', ' ', $content);
        $content = trim($content);
        return md5($content);
    }
    
    /**
     * Check if source content has changed since translation
     * 
     * @param int $translated_id Translated post ID
     * @return array ['changed' => bool, 'source_id' => int, 'details' => array]
     */
    public static function check_for_changes($translated_id) {
        $source_id = get_post_meta($translated_id, self::META_SOURCE_POST, true);
        
        if (empty($source_id)) {
            return array(
                'changed' => false,
                'source_id' => 0,
                'details' => array('error' => 'No source post linked')
            );
        }
        
        $source_post = get_post($source_id);
        if (!$source_post) {
            return array(
                'changed' => false,
                'source_id' => $source_id,
                'details' => array('error' => 'Source post not found')
            );
        }
        
        // Get stored hash
        $stored_hash = get_post_meta($translated_id, self::META_SOURCE_HASH, true);
        
        if (empty($stored_hash)) {
            // No hash stored - assume changed for safety
            return array(
                'changed' => true,
                'source_id' => $source_id,
                'details' => array('reason' => 'No original hash stored')
            );
        }
        
        // Get current source content
        $current_content = $source_post->post_content;
        
        // Check for page builder content
        $elementor_data = get_post_meta($source_id, '_elementor_data', true);
        if (!empty($elementor_data)) {
            $current_content = $elementor_data;
        } else {
            $avia_data = get_post_meta($source_id, '_aviaLayoutBuilderCleanData', true);
            if (!empty($avia_data)) {
                $current_content = $avia_data;
            }
        }
        
        // Generate current hash
        $current_hash = self::generate_content_hash($current_content);
        
        // Compare
        $changed = ($stored_hash !== $current_hash);
        
        return array(
            'changed' => $changed,
            'source_id' => $source_id,
            'details' => array(
                'stored_hash' => $stored_hash,
                'current_hash' => $current_hash,
                'source_title' => $source_post->post_title,
                'source_modified' => $source_post->post_modified,
                'translated_date' => get_post_meta($translated_id, self::META_TRANSLATED_DATE, true)
            )
        );
    }
    
    /**
     * Get all translations of a source post
     * 
     * @param int $source_id Source post ID
     * @return array Array of translated post IDs with language info
     */
    public static function get_translations($source_id) {
        global $wpdb;
        
        $results = $wpdb->get_results($wpdb->prepare(
            "SELECT post_id, meta_value as target_lang
             FROM {$wpdb->postmeta} pm
             INNER JOIN {$wpdb->posts} p ON pm.post_id = p.ID
             WHERE pm.meta_key = %s
               AND pm.meta_value = %d
               AND p.post_status != 'trash'",
            self::META_SOURCE_POST,
            $source_id
        ), ARRAY_A);
        
        $translations = array();
        foreach ($results as $row) {
            $translations[] = array(
                'id' => intval($row['post_id']),
                'lang' => get_post_meta($row['post_id'], self::META_TARGET_LANG, true),
                'date' => get_post_meta($row['post_id'], self::META_TRANSLATED_DATE, true)
            );
        }
        
        return $translations;
    }
    
    /**
     * Find translations that need updating (source changed)
     * 
     * @param int $limit Maximum results
     * @return array Posts needing re-translation
     */
    public static function find_outdated_translations($limit = 50) {
        global $wpdb;
        
        // Get all translations
        $translations = $wpdb->get_results($wpdb->prepare(
            "SELECT post_id 
             FROM {$wpdb->postmeta} 
             WHERE meta_key = %s 
             LIMIT %d",
            self::META_SOURCE_POST,
            $limit * 3 // Get more, will filter
        ), ARRAY_A);
        
        $outdated = array();
        
        foreach ($translations as $row) {
            $check = self::check_for_changes($row['post_id']);
            if ($check['changed']) {
                $post = get_post($row['post_id']);
                if ($post) {
                    // Safely get source title
                    $source_title = isset($check['details']['source_title']) 
                        ? $check['details']['source_title'] 
                        : 'Unknown source';
                    
                    $outdated[] = array(
                        'id' => $row['post_id'],
                        'title' => $post->post_title,
                        'source_id' => $check['source_id'],
                        'source_title' => $source_title,
                        'lang' => get_post_meta($row['post_id'], self::META_TARGET_LANG, true)
                    );
                }
            }
            
            if (count($outdated) >= $limit) {
                break;
            }
        }
        
        return $outdated;
    }
}
