<?php
/**
 * Word Estimator Class
 * Token-based word estimation system
 * 
 * Compatible with PHP 5.6+
 */

if (!defined('ABSPATH')) {
    exit;
}

class MTP_Word_Estimator {
    
    /**
     * Default ratios (tokens per word)
     * Can be overridden from database
     */
    private static $default_ratios = array(
        'en' => array('pred' => 3.9, 'max' => 4.9, 'label' => 'English'),
        'es' => array('pred' => 4.0, 'max' => 5.0, 'label' => 'Spanish'),
        'fr' => array('pred' => 4.1, 'max' => 5.1, 'label' => 'French'),
        'de' => array('pred' => 4.1, 'max' => 5.1, 'label' => 'Deutsch'),
        'it' => array('pred' => 4.0, 'max' => 5.0, 'label' => 'Italiano'),
        'pt' => array('pred' => 4.0, 'max' => 5.0, 'label' => 'Portuguese'),
        'ca' => array('pred' => 4.0, 'max' => 5.0, 'label' => 'Catalan'),
        'nl' => array('pred' => 4.2, 'max' => 5.2, 'label' => 'Nederlands'),
        'pl' => array('pred' => 4.3, 'max' => 5.4, 'label' => 'Polski'),
        'ru' => array('pred' => 4.5, 'max' => 5.7, 'label' => 'Russian'),
        'zh' => array('pred' => 5.5, 'max' => 7.0, 'label' => 'Chinese'),
        'ja' => array('pred' => 5.8, 'max' => 7.4, 'label' => 'Japanese'),
        'ko' => array('pred' => 5.5, 'max' => 7.0, 'label' => 'Korean'),
        'ar' => array('pred' => 4.8, 'max' => 6.0, 'label' => 'Arabic'),
        'sv' => array('pred' => 4.1, 'max' => 5.1, 'label' => 'Svenska'),
        'da' => array('pred' => 4.1, 'max' => 5.1, 'label' => 'Dansk'),
        'no' => array('pred' => 4.1, 'max' => 5.1, 'label' => 'Norsk'),
        'fi' => array('pred' => 4.3, 'max' => 5.4, 'label' => 'Suomi')
    );
    
    /**
     * Global ratio (fallback)
     */
    const GLOBAL_RATIO_PRED = 4.0;
    const GLOBAL_RATIO_MAX = 5.5;
    
    /**
     * Purchase ratio (conservative)
     */
    const PURCHASE_RATIO = 5.0;
    
    /**
     * Get all ratios
     */
    public static function get_all_ratios() {
        return self::$default_ratios;
    }
    
    /**
     * Get ratios for a specific language
     * 
     * @param string $lang_code Language code
     * @return array ['pred' => float, 'max' => float, 'label' => string]
     */
    public static function get_language_ratios($lang_code) {
        if (isset(self::$default_ratios[$lang_code])) {
            return self::$default_ratios[$lang_code];
        }
        
        // Global fallback
        return array(
            'pred' => self::GLOBAL_RATIO_PRED,
            'max' => self::GLOBAL_RATIO_MAX,
            'label' => ucfirst($lang_code)
        );
    }
    
    /**
     * Get language label
     * 
     * @param string $lang_code Language code
     * @return string
     */
    public static function get_language_label($lang_code) {
        $ratios = self::get_language_ratios($lang_code);
        return $ratios['label'];
    }
    
    /**
     * Count translatable words in content
     * Excludes HTML, shortcodes and non-translatable elements
     * 
     * @param string $content Content to analyze
     * @return int Word count
     */
    public static function count_words($content) {
        if (empty($content)) {
            return 0;
        }
        
        // 1. Remove WordPress shortcodes
        $content = strip_shortcodes($content);
        
        // 2. Remove page builder shortcodes (Avia, Divi, WPBakery, etc.)
        $content = preg_replace('/\[[^\]]+\]/', ' ', $content);
        
        // 3. Remove HTML tags
        $content = wp_strip_all_tags($content);
        
        // 4. Decode HTML entities
        $content = html_entity_decode($content, ENT_QUOTES, 'UTF-8');
        
        // 5. Normalize whitespace and line breaks
        $content = preg_replace('/[\r\n\t]+/', ' ', $content);
        $content = preg_replace('/\s+/', ' ', $content);
        $content = trim($content);
        
        if (empty($content)) {
            return 0;
        }
        
        // 6. Count words
        $words = preg_split('/\s+/', $content, -1, PREG_SPLIT_NO_EMPTY);
        
        return count($words);
    }
    
    /**
     * Calculate word range from tokens
     * 
     * @param int $tokens Available tokens
     * @param string|null $lang_code Language code (null = global)
     * @return array ['min' => int, 'max' => int, 'tokens' => int, 'ratio_pred' => float, 'ratio_max' => float]
     */
    public static function tokens_to_words_range($tokens, $lang_code = null) {
        if ($lang_code) {
            $ratios = self::get_language_ratios($lang_code);
        } else {
            $ratios = array(
                'pred' => self::GLOBAL_RATIO_PRED,
                'max' => self::GLOBAL_RATIO_MAX
            );
        }
        
        // words_min = tokens / ratio_MAX (worst case = fewer words)
        // words_max = tokens / ratio_PRED (best case = more words)
        $words_min = floor($tokens / $ratios['max']);
        $words_max = floor($tokens / $ratios['pred']);
        
        return array(
            'min' => max(0, intval($words_min)),
            'max' => max(0, intval($words_max)),
            'tokens' => intval($tokens),
            'ratio_pred' => $ratios['pred'],
            'ratio_max' => $ratios['max']
        );
    }
    
    /**
     * Estimate token cost for a translation
     * 
     * @param int $words Words to translate
     * @param string $target_lang Target language
     * @return array Cost estimate
     */
    public static function estimate_cost($words, $target_lang) {
        $ratios = self::get_language_ratios($target_lang);
        
        // Predicted estimate (optimistic)
        $tokens_pred = intval(ceil($words * $ratios['pred']));
        
        // Maximum estimate (conservative - for risk control)
        $tokens_max = intval(ceil($words * $ratios['max']));
        
        return array(
            'words_input' => intval($words),
            'tokens_pred' => $tokens_pred,
            'tokens_max' => $tokens_max,
            'ratio_pred' => $ratios['pred'],
            'ratio_max' => $ratios['max'],
            'target_lang' => $target_lang,
            'lang_label' => $ratios['label']
        );
    }
    
    /**
     * Check if there is sufficient balance for a translation
     * ALWAYS uses worst case (MAX) for risk control
     * 
     * @param int $tokens_available Available tokens
     * @param array $estimate Result from estimate_cost()
     * @return array ['allowed' => bool, ...]
     */
    public static function check_balance($tokens_available, $estimate) {
        $tokens_needed = $estimate['tokens_max'];
        
        if ($tokens_available >= $tokens_needed) {
            return array(
                'allowed' => true,
                'tokens_available' => $tokens_available,
                'tokens_needed' => $tokens_needed,
                'margin' => $tokens_available - $tokens_needed
            );
        }
        
        // Calculate how many words CAN be translated
        $words_possible = floor($tokens_available / $estimate['ratio_max']);
        
        return array(
            'allowed' => false,
            'tokens_available' => $tokens_available,
            'tokens_needed' => $tokens_needed,
            'deficit' => $tokens_needed - $tokens_available,
            'words_possible' => intval($words_possible),
            'message' => sprintf(
                'Insufficient balance. You need ~%s tokens but have %s. You can translate up to %s words.',
                number_format($tokens_needed),
                number_format($tokens_available),
                number_format($words_possible)
            )
        );
    }
    
    /**
     * Convert word purchase to tokens
     * Used in checkout/WooCommerce
     * 
     * @param int $words Words to purchase
     * @return int Tokens to reserve
     */
    public static function words_to_tokens_purchase($words) {
        return intval(ceil($words * self::PURCHASE_RATIO));
    }
    
    /**
     * Convert purchased tokens to word range (for checkout display)
     * 
     * @param int $tokens Purchased tokens
     * @return array ['min' => int, 'max' => int]
     */
    public static function tokens_to_words_purchase_range($tokens) {
        // Global range since we don't know target language
        return self::tokens_to_words_range($tokens, null);
    }
    
    /**
     * Generate explanatory text for range
     * 
     * @param array $range Result from tokens_to_words_range()
     * @param string|null $lang_code Language code
     * @return string
     */
    public static function get_range_explanation($range, $lang_code = null) {
        $lang_note = '';
        if ($lang_code) {
            $lang_note = ' for ' . self::get_language_label($lang_code);
        }
        
        return sprintf(
            'Between %s and %s words%s. Exact balance: %s tokens. ' .
            'Consumption varies by language and content (ratio: %s-%s tokens/word).',
            number_format($range['min']),
            number_format($range['max']),
            $lang_note,
            number_format($range['tokens']),
            $range['ratio_pred'],
            $range['ratio_max']
        );
    }
    
    /**
     * Get ratios as JSON for JavaScript
     * 
     * @return array
     */
    public static function get_ratios_for_js() {
        $result = array();
        foreach (self::$default_ratios as $code => $data) {
            $result[$code] = array(
                'pred' => $data['pred'],
                'max' => $data['max'],
                'label' => $data['label']
            );
        }
        return $result;
    }
}
