import os
import json
import re
import random
import time
import multiprocessing
from datetime import datetime, timedelta
from bs4 import BeautifulSoup

# --- Setup for Performance ---
try:
    from tqdm import tqdm
    USE_TQDM = True
except ImportError:
    USE_TQDM = False

# CONFIGURATION
JOBS_DIR = 'jobs'
FUTURE_DAYS = 180 

# Default Fallbacks
DEFAULT_MONTHS_EXPERIENCE = 3   
DEFAULT_SALARY_MIN = 19.00
DEFAULT_SALARY_MAX = 26.00            
DEFAULT_SALARY_UNIT = "HOUR"
DEFAULT_EDUCATION_CATEGORY = "High School" # Must be a valid Schema enum

# Valid Google Schema Education Levels (Lowercase for matching)
VALID_EDU_LEVELS = [
    "high school", 
    "associate degree", 
    "bachelor degree", 
    "master degree", 
    "doctorate degree", 
    "professional certificate"
]

# --- INTELLIGENT HELPER FUNCTIONS ---

def get_jittered_dates():
    """Returns a tuple (current_iso, readable, future_iso)"""
    jitter_minutes = random.randint(0, 240)
    now_jittered = datetime.now() - timedelta(minutes=jitter_minutes)
    future_date = now_jittered + timedelta(days=FUTURE_DAYS)
    return (
        now_jittered.strftime('%Y-%m-%dT%H:%M:%S-05:00'),
        now_jittered.strftime('%B %d, %Y'),
        future_date.strftime('%Y-%m-%dT%H:%M:%S-05:00')
    )

def get_smart_experience(job_title):
    if not job_title: return DEFAULT_MONTHS_EXPERIENCE
    title_lower = job_title.lower()
    if 'senior' in title_lower or 'sr.' in title_lower or 'lead' in title_lower: return 60
    elif 'manager' in title_lower or 'director' in title_lower: return 48
    elif 'junior' in title_lower or 'entry' in title_lower or 'intern' in title_lower: return 0
    else: return DEFAULT_MONTHS_EXPERIENCE

def force_float(val):
    """Aggressively converts strings/garbage to a clean Python float."""
    if val is None: return None
    if isinstance(val, (int, float)): return float(val)
    try:
        clean_str = re.sub(r'[^\d.]', '', str(val))
        return float(clean_str) if clean_str else None
    except:
        return None

def fix_employment_type(data):
    """Fixes 'HOURLY' -> 'PART_TIME' for Schema compliance."""
    if 'employmentType' in data:
        etype = data['employmentType']
        if isinstance(etype, str):
            if 'HOURLY' in etype.upper():
                data['employmentType'] = 'PART_TIME'
        elif isinstance(etype, list):
            new_list = []
            for t in etype:
                if 'HOURLY' in t.upper():
                    new_list.append('PART_TIME')
                else:
                    new_list.append(t)
            data['employmentType'] = new_list
    return data

def fix_education(data):
    """Ensures credentialCategory is a Valid Enum."""
    if data.get('educationRequirements') and isinstance(data['educationRequirements'], dict):
        edu = data['educationRequirements']
        current_cat = edu.get('credentialCategory', '').lower()
        
        # Check if current category matches any valid enum (loose match)
        is_valid = False
        for valid_level in VALID_EDU_LEVELS:
            if valid_level in current_cat:
                # Normalize to the clean valid string
                edu['credentialCategory'] = valid_level.title() 
                is_valid = True
                break
        
        # If invalid (e.g., 'Commercial Driving School'), force default
        if not is_valid:
            edu['credentialCategory'] = DEFAULT_EDUCATION_CATEGORY
            
    return data

# --- WORKER PROCESS ---

def process_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            html_content = f.read()
    except Exception as e:
        return {'status': 'ERROR_READING', 'message': str(e), 'file': file_path}

    soup = BeautifulSoup(html_content, 'lxml') 
    current_iso, current_readable, future_iso = get_jittered_dates()
    
    # 1. UPDATE HTML META TAGS
    for meta_prop in ['article:published_time', 'article:modified_time']:
        meta_tag = soup.find('meta', property=meta_prop)
        if meta_tag:
            meta_tag['content'] = current_iso

    posted_date_tag = soup.find('p', class_='posted-date')
    if posted_date_tag:
        posted_date_tag.string = f"Posted on {current_readable}"

    # 2. FIX JSON-LD SCHEMA
    scripts = soup.find_all('script', type='application/ld+json')

    for script in scripts:
        try:
            data = json.loads(script.string)
            
            if data.get('@type') == 'JobPosting':
                data['datePosted'] = current_iso
                data['validThrough'] = future_iso
                
                # --- APPLY ALL FIXES ---
                data = fix_employment_type(data)
                data = fix_education(data)

                # Fix Experience
                if data.get('experienceRequirements') and isinstance(data['experienceRequirements'], dict):
                    exp_req = data['experienceRequirements']
                    current_months = exp_req.get('monthsOfExperience')
                    if current_months is None or (isinstance(current_months, (int, float)) and current_months <= 0):
                        exp_req['monthsOfExperience'] = get_smart_experience(data.get('title', ''))

                # Fix Salary (The "None" killer)
                if 'baseSalary' in data:
                    if not data['baseSalary'] or not isinstance(data['baseSalary'], dict):
                        data['baseSalary'] = {"@type": "MonetaryAmount", "currency": "USD", "value": {}}

                    bs = data['baseSalary']
                    if 'value' not in bs or not isinstance(bs['value'], dict):
                        bs['value'] = {}
                    
                    val_obj = bs['value']

                    # Extract & Clean
                    f_min = force_float(val_obj.get('minValue'))
                    f_max = force_float(val_obj.get('maxValue'))
                    f_val = force_float(val_obj.get('value'))

                    # Logic
                    if f_val is not None and f_min is None: f_min = f_val
                    if f_max is not None and f_min is None: f_min = f_max
                    if f_min is not None and f_max is None: f_max = round(f_min * 1.25, 2)

                    # Defaults
                    if f_min is None: f_min = DEFAULT_SALARY_MIN
                    if f_max is None: f_max = DEFAULT_SALARY_MAX

                    # Rebuild
                    new_val_obj = {
                        "@type": "QuantitativeValue",
                        "unitText": val_obj.get('unitText', DEFAULT_SALARY_UNIT),
                        "minValue": float(f_min),
                        "maxValue": float(f_max)
                    }
                    data['baseSalary']['value'] = new_val_obj

                script.string = json.dumps(data, indent=2)
                
            elif data.get('@type') in ['Article', 'BreadcrumbList']:
                data['dateModified'] = current_iso
                script.string = json.dumps(data, indent=2)

        except json.JSONDecodeError:
            return {'status': 'ERROR_JSON', 'message': 'Invalid JSON', 'file': file_path}
        except Exception as e:
            return {'status': 'ERROR_SCHEMA', 'message': str(e), 'file': file_path}

    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(str(soup))
        return {'status': 'FIXED'}
    except Exception as e:
        return {'status': 'ERROR_WRITING', 'message': str(e), 'file': file_path}


# --- MAIN EXECUTION ---

def main():
    print("⚙️ Starting UNIVERSAL Job Editor...")
    multiprocessing.freeze_support()
    
    if not os.path.exists(JOBS_DIR):
        print(f"❌ Error: Directory '{JOBS_DIR}' not found.")
        return

    print("🔍 Scanning directory...")
    target_files = []
    
    with os.scandir(JOBS_DIR) as entries:
        for entry in entries:
            if not entry.name.endswith('.html') or entry.name == 'index.html':
                continue
            if re.match(r'^page-\d+\.html$', entry.name):
                continue
            target_files.append(entry.path)

    total_files = len(target_files)
    print(f"🎯 Found {total_files} job posts.")

    if total_files == 0: return

    num_cores = multiprocessing.cpu_count()
    print(f"🚀 Processing with {num_cores} Cores.")

    start_time = time.time()
    results = {'FIXED': 0, 'ERRORS': 0}
    error_details = []

    with multiprocessing.Pool(processes=num_cores) as pool:
        iterator = pool.imap_unordered(process_file, target_files, chunksize=50)
        if USE_TQDM: iterator = tqdm(iterator, total=total_files)
        
        for result in iterator:
            if result['status'] == 'FIXED': results['FIXED'] += 1
            else: 
                results['ERRORS'] += 1
                error_details.append(f"{result['file']} -> {result.get('message')}")

    print("\n" + "="*50)
    print("✨ Complete.")
    print(f"Files Updated: {results['FIXED']}")
    print(f"Errors: {results['ERRORS']}")
    
    if error_details:
        print("\n⚠️ Error Report:")
        for err in error_details[:5]: print(err)
    print("="*50)

if __name__ == "__main__":
    main()