#!/usr/bin/env python3
"""
PDF Compression Script for WordPress Plugin
Compresses PDFs to target 4.8MB using progressive compression strategy:
1. Try normal compression on all pages first
2. If > 4.8MB, progressively compress last pages more heavily
3. Preserves quality of early pages (most important content)
"""

import sys
import os
import subprocess
import tempfile
import shutil
from pathlib import Path

TARGET_SIZE_MB = 4.8
TARGET_SIZE_BYTES = TARGET_SIZE_MB * 1024 * 1024

def get_file_size(filepath):
    """Get file size in bytes"""
    return os.path.getsize(filepath)

def get_pdf_page_count(filepath):
    """Get number of pages in PDF"""
    try:
        result = subprocess.run(
            ['gs', '-q', '-dNODISPLAY', '-dNOSAFER', 
             '-c', f'({filepath}) (r) file runpdfbegin pdfpagecount = quit'],
            capture_output=True, text=True, check=True
        )
        return int(result.stdout.strip())
    except:
        # Fallback method using pdfinfo if available
        try:
            result = subprocess.run(
                ['pdfinfo', filepath], 
                capture_output=True, text=True, check=True
            )
            for line in result.stdout.split('\n'):
                if 'Pages:' in line:
                    return int(line.split(':')[1].strip())
        except:
            return 0
    return 0

def compress_pdf_uniform(input_file, output_file, quality='medium'):
    """Compress entire PDF with uniform quality settings"""
    quality_settings = {
        'maximum': '/screen',      # Maximum compression, lowest quality
        'high': '/ebook',         # High compression, medium quality  
        'medium': '/default',     # Medium compression, good quality
        'low': '/printer',        # Low compression, high quality
        'minimum': '/prepress'    # Minimum compression, highest quality
    }
    
    pdfsettings = quality_settings.get(quality, '/default')
    
    # Adjust resolution based on quality
    resolution_map = {
        'maximum': 72,
        'high': 100,
        'medium': 150,
        'low': 200,
        'minimum': 300
    }
    
    resolution = resolution_map.get(quality, 150)
    
    cmd = [
        'gs',
        '-sDEVICE=pdfwrite',
        '-dCompatibilityLevel=1.4',
        f'-dPDFSETTINGS={pdfsettings}',
        '-dNOPAUSE',
        '-dQUIET',
        '-dBATCH',
        '-dDetectDuplicateImages',
        '-dCompressFonts=true',
        '-dEmbedAllFonts=false',
        '-dSubsetFonts=true',
        '-dColorImageDownsampleType=/Bicubic',
        f'-dColorImageResolution={resolution}',
        '-dGrayImageDownsampleType=/Bicubic', 
        f'-dGrayImageResolution={resolution}',
        '-dMonoImageDownsampleType=/Bicubic',
        f'-dMonoImageResolution={resolution}',
        f'-sOutputFile={output_file}',
        input_file
    ]
    
    result = subprocess.run(cmd, capture_output=True, text=True)
    return result.returncode == 0

def compress_pdf_progressive(input_file, output_file, heavy_start_page, total_pages, base_quality='medium'):
    """Compress PDF with different quality for different page ranges"""
    with tempfile.TemporaryDirectory() as tmpdir:
        # Extract pages into two groups
        normal_pages = f"1-{heavy_start_page-1}" if heavy_start_page > 1 else None
        heavy_pages = f"{heavy_start_page}-{total_pages}"
        
        parts = []
        
        # Get base resolution based on quality
        resolution_map = {
            'maximum': 72,
            'high': 100,
            'medium': 150,
            'low': 200,
            'minimum': 300
        }
        base_resolution = resolution_map.get(base_quality, 150)
        
        # Process normal quality pages if they exist
        if normal_pages and heavy_start_page > 1:
            normal_file = os.path.join(tmpdir, 'normal.pdf')
            cmd = [
                'gs',
                '-sDEVICE=pdfwrite',
                '-dNOPAUSE',
                '-dBATCH',
                '-dQUIET',
                f'-dFirstPage=1',
                f'-dLastPage={heavy_start_page-1}',
                '-dPDFSETTINGS=/ebook',  # Medium quality for first pages
                f'-dColorImageResolution={base_resolution}',
                f'-dGrayImageResolution={base_resolution}',
                f'-sOutputFile={normal_file}',
                input_file
            ]
            subprocess.run(cmd, capture_output=True)
            if os.path.exists(normal_file):
                parts.append(normal_file)
        
        # Process heavily compressed pages
        heavy_file = os.path.join(tmpdir, 'heavy.pdf')
        cmd = [
            'gs',
            '-sDEVICE=pdfwrite',
            '-dNOPAUSE',
            '-dBATCH',
            '-dQUIET',
            f'-dFirstPage={heavy_start_page}',
            f'-dLastPage={total_pages}',
            '-dPDFSETTINGS=/screen',  # Heavy compression for last pages
            '-dColorImageResolution=72',
            '-dGrayImageResolution=72',
            '-dColorConversionStrategy=/Gray',  # Convert to grayscale for more compression
            '-dProcessColorModel=/DeviceGray',
            f'-sOutputFile={heavy_file}',
            input_file
        ]
        subprocess.run(cmd, capture_output=True)
        if os.path.exists(heavy_file):
            parts.append(heavy_file)
        
        # Merge the parts
        if len(parts) > 1:
            cmd = ['gs', '-dNOPAUSE', '-dBATCH', '-dQUIET', '-sDEVICE=pdfwrite',
                   f'-sOutputFile={output_file}'] + parts
            subprocess.run(cmd, capture_output=True)
        elif len(parts) == 1:
            shutil.copy(parts[0], output_file)
        else:
            return False
            
    return os.path.exists(output_file)

def compress_pdf(input_file, output_file, quality='medium'):
    """Main compression function with progressive strategy"""
    
    # Verify input file exists
    if not os.path.exists(input_file):
        print(f"Error: Input file {input_file} not found", file=sys.stderr)
        return False
    
    # Get original size
    original_size = get_file_size(input_file)
    print(f"Original size: {original_size / 1024 / 1024:.2f} MB")
    
    # If already under target, just copy
    if original_size <= TARGET_SIZE_BYTES:
        shutil.copy(input_file, output_file)
        print(f"File already under {TARGET_SIZE_MB}MB, no compression needed")
        return True
    
    # Get page count
    page_count = get_pdf_page_count(input_file)
    if page_count == 0:
        print("Warning: Could not determine page count, using uniform compression")
        page_count = 1
    
    print(f"PDF has {page_count} pages")
    print(f"Using quality setting: {quality}")
    
    # Try uniform compression first with selected quality
    with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as tmp:
        tmp_file = tmp.name
    
    print(f"Trying uniform compression with {quality} quality...")
    if compress_pdf_uniform(input_file, tmp_file, quality):
        size = get_file_size(tmp_file)
        print(f"Uniform compression result: {size / 1024 / 1024:.2f} MB")
        if size <= TARGET_SIZE_BYTES:
            shutil.move(tmp_file, output_file)
            print(f"Success! Compressed to {size / 1024 / 1024:.2f} MB")
            return True
    
    # If not enough, try more aggressive compression
    if quality != 'maximum':
        print("Trying more aggressive uniform compression...")
        if compress_pdf_uniform(input_file, tmp_file, 'maximum'):
            size = get_file_size(tmp_file)
            print(f"Aggressive compression result: {size / 1024 / 1024:.2f} MB")
            if size <= TARGET_SIZE_BYTES:
                shutil.move(tmp_file, output_file)
                print(f"Success! Compressed to {size / 1024 / 1024:.2f} MB")
                return True
    
    # Progressive compression - start compressing from page 20, then earlier
    print("Starting progressive compression...")
    start_pages = []
    
    # Determine which pages to try heavy compression on
    if page_count >= 20:
        start_pages = [20, 19, 18, 17, 15, 12, 10, 8, 5, 3, 2]
    elif page_count >= 10:
        start_pages = [max(10, page_count-5), max(8, page_count-7), 
                      max(5, page_count-10), 3, 2]
    elif page_count >= 5:
        start_pages = [max(3, page_count-2), 2]
    else:
        start_pages = [2]
    
    # Filter out pages beyond document length
    start_pages = [p for p in start_pages if p <= page_count]
    
    for heavy_start in start_pages:
        print(f"Trying heavy compression from page {heavy_start} onwards...")
        
        if os.path.exists(tmp_file):
            os.remove(tmp_file)
            
        if compress_pdf_progressive(input_file, tmp_file, heavy_start, page_count, quality):
            if os.path.exists(tmp_file):
                size = get_file_size(tmp_file)
                print(f"Progressive compression (from page {heavy_start}): {size / 1024 / 1024:.2f} MB")
                if size <= TARGET_SIZE_BYTES:
                    shutil.move(tmp_file, output_file)
                    print(f"Success! Compressed to {size / 1024 / 1024:.2f} MB")
                    print(f"Heavy compression applied from page {heavy_start} to {page_count}")
                    return True
    
    # If we still couldn't compress enough, use the best attempt
    if os.path.exists(tmp_file):
        shutil.move(tmp_file, output_file)
        final_size = get_file_size(output_file)
        print(f"Warning: Could not achieve target size. Final: {final_size / 1024 / 1024:.2f} MB")
        return True
    
    print("Error: Compression failed completely", file=sys.stderr)
    return False

def main():
    if len(sys.argv) < 3:
        print("Usage: python3 compress_pdf.py input.pdf output.pdf [quality]", file=sys.stderr)
        print("Quality options: minimum, low, medium, high, maximum", file=sys.stderr)
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    quality = sys.argv[3] if len(sys.argv) > 3 else 'medium'
    
    # Validate quality setting
    valid_qualities = ['minimum', 'low', 'medium', 'high', 'maximum']
    if quality not in valid_qualities:
        print(f"Invalid quality: {quality}. Using 'medium'", file=sys.stderr)
        quality = 'medium'
    
    # Ensure ghostscript is available
    try:
        subprocess.run(['gs', '--version'], capture_output=True, check=True)
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("Error: Ghostscript (gs) is not installed or not in PATH", file=sys.stderr)
        sys.exit(1)
    
    # Perform compression
    if compress_pdf(input_file, output_file, quality):
        # Output final size for the WordPress plugin to parse
        if os.path.exists(output_file):
            final_size = get_file_size(output_file)
            print(f"FINAL_SIZE:{final_size}")
        sys.exit(0)
    else:
        sys.exit(1)

if __name__ == "__main__":
    main()
