#!/usr/bin/env python3
"""
Document conversion and compression script
Converts various document formats to PDF and compresses them to 4.8MB or less
Supported formats: DOCX, XLSX, PPTX, TXT, RTF, ODT, ODS, ODP
"""

import sys
import os
import subprocess
import tempfile
from pathlib import Path

def get_file_size_mb(filepath):
    """Get file size in MB"""
    return os.path.getsize(filepath) / (1024 * 1024)

def convert_to_pdf(input_file, output_pdf):
    """Convert various document formats to PDF using LibreOffice"""
    input_path = Path(input_file)
    file_ext = input_path.suffix.lower()
    
    # Map of file extensions to conversion methods
    converters = {
        '.pdf': lambda i, o: subprocess.run(['cp', i, o], check=True),
        '.txt': convert_text_to_pdf,
        '.docx': convert_office_to_pdf,
        '.doc': convert_office_to_pdf,
        '.xlsx': convert_office_to_pdf,
        '.xls': convert_office_to_pdf,
        '.pptx': convert_office_to_pdf,
        '.ppt': convert_office_to_pdf,
        '.rtf': convert_office_to_pdf,
        '.odt': convert_office_to_pdf,
        '.ods': convert_office_to_pdf,
        '.odp': convert_office_to_pdf,
    }
    
    if file_ext not in converters:
        raise ValueError(f"Unsupported file format: {file_ext}")
    
    # Convert to PDF
    converters[file_ext](input_file, output_pdf)
    
    return True

def convert_text_to_pdf(input_file, output_pdf):
    """Convert plain text file to PDF using LibreOffice"""
    convert_office_to_pdf(input_file, output_pdf)

def convert_office_to_pdf(input_file, output_pdf):
    """Convert office documents to PDF using LibreOffice"""
    with tempfile.TemporaryDirectory() as temp_dir:
        # Convert to PDF using LibreOffice
        try:
            subprocess.run([
                'libreoffice', '--headless', '--invisible', '--nodefault',
                '--nolockcheck', '--nologo', '--norestore',
                '--convert-to', 'pdf',
                '--outdir', temp_dir,
                input_file
            ], check=True, capture_output=True, timeout=60)
            
            # Find the generated PDF
            input_name = Path(input_file).stem
            temp_pdf = os.path.join(temp_dir, f"{input_name}.pdf")
            
            if not os.path.exists(temp_pdf):
                # Try with soffice command
                subprocess.run([
                    'soffice', '--headless', '--convert-to', 'pdf',
                    '--outdir', temp_dir,
                    input_file
                ], check=True, timeout=60)
                
            if not os.path.exists(temp_pdf):
                raise FileNotFoundError(f"Conversion failed - PDF not created")
            
            # Move to output location
            subprocess.run(['mv', temp_pdf, output_pdf], check=True)
        except subprocess.TimeoutExpired:
            raise Exception("Document conversion timed out")
        except subprocess.CalledProcessError as e:
            raise Exception(f"Document conversion failed: {str(e)}")

def compress_pdf_progressive(input_pdf, output_pdf, target_mb=4.8):
    """Compress PDF using progressive quality reduction"""
    
    # First, check if compression is needed
    if get_file_size_mb(input_pdf) <= target_mb:
        subprocess.run(['cp', input_pdf, output_pdf], check=True)
        return True
    
    # Try standard compression first
    temp_output = output_pdf + '.temp'
    
    # Standard compression settings
    settings = [
        '/default',
        '/ebook', 
        '/screen',
    ]
    
    for setting in settings:
        try:
            subprocess.run([
                'gs', '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.4',
                f'-dPDFSETTINGS={setting}',
                '-dNOPAUSE', '-dQUIET', '-dBATCH',
                '-sOutputFile=' + temp_output,
                input_pdf
            ], check=True, capture_output=True)
            
            if get_file_size_mb(temp_output) <= target_mb:
                subprocess.run(['mv', temp_output, output_pdf], check=True)
                return True
        except subprocess.CalledProcessError:
            continue
    
    # If still too large, use aggressive compression
    try:
        subprocess.run([
            'gs', '-sDEVICE=pdfwrite', '-dCompatibilityLevel=1.4',
            '-dPDFSETTINGS=/screen',
            '-dDownsampleColorImages=true',
            '-dColorImageResolution=72',
            '-dDownsampleGrayImages=true',
            '-dGrayImageResolution=72',
            '-dDownsampleMonoImages=true',
            '-dMonoImageResolution=72',
            '-dNOPAUSE', '-dQUIET', '-dBATCH',
            '-sOutputFile=' + output_pdf,
            input_pdf
        ], check=True, capture_output=True)
        return True
    except subprocess.CalledProcessError:
        # Last resort - copy original if compression fails
        subprocess.run(['cp', input_pdf, output_pdf], check=True)
        return False

def main():
    if len(sys.argv) != 3:
        print("Usage: python3 convert_document.py <input_file> <output_pdf>")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    
    # Check if input file exists
    if not os.path.exists(input_file):
        print(f"Error: Input file {input_file} not found")
        sys.exit(1)
    
    try:
        # Determine if we need conversion
        file_ext = Path(input_file).suffix.lower()
        
        if file_ext == '.pdf':
            # Just compress the PDF
            compress_pdf_progressive(input_file, output_file)
        else:
            # Convert to PDF first, then compress
            with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp_pdf:
                temp_pdf_path = temp_pdf.name
            
            try:
                # Convert document to PDF
                convert_to_pdf(input_file, temp_pdf_path)
                
                # Compress the PDF
                compress_pdf_progressive(temp_pdf_path, output_file)
            finally:
                # Clean up temp file
                if os.path.exists(temp_pdf_path):
                    os.unlink(temp_pdf_path)
        
        print("Success")
        sys.exit(0)
        
    except Exception as e:
        print(f"Error: {str(e)}")
        sys.exit(1)

if __name__ == "__main__":
    main()