#!/bin/bash

# --- CONFIGURATION ---
MODEL="farhangan_v1_best.mlmodel"
INPUT_DIR="manuscripts"
OUTPUT_DIR="ocr_results"
TEMP_DIR="prepped_images"
KRAKEN_BIN="$HOME/kraken_env/bin/kraken"

# Point this to your local binary
MAGICK="./magick"

# Threshold for upscaling (in pixels)
WIDTH_THRESHOLD=3000

mkdir -p "$OUTPUT_DIR"
mkdir -p "$TEMP_DIR"

if [ ! -f "$MODEL" ]; then
    echo "❌ ERROR: $MODEL not found!"
    exit 1
fi

if [ ! -f "$MAGICK" ]; then
    echo "❌ ERROR: Local 'magick' binary not found at $MAGICK"
    exit 1
fi

echo "🚀 Starting Multi-Scale Adaptive OCR..."
echo "🧠 Model: $MODEL"
echo "--------------------------------------------------------"

shopt -s nullglob
for img in "$INPUT_DIR"/*.{png,jpg,jpeg,tiff}; do
    
    base_name=$(basename "$img")
    file_no_ext="${base_name%.*}"
    prepped_img="$TEMP_DIR/${file_no_ext}_prepped.png"
    
    # 1. DETECT RESOLUTION
    # Use the local magick binary to identify width
    width=$($MAGICK identify -format "%w" "$img" 2>/dev/null)
    
    # Fix the integer error: if width is empty or not a number, default to 0
    if [[ ! "$width" =~ ^[0-9]+$ ]]; then
        width=0
    fi

    # 2. ADAPTIVE RESIZING LOGIC
    RESIZE_FLAG=""
    if [ "$width" -gt 0 ] && [ "$width" -lt "$WIDTH_THRESHOLD" ]; then
        echo "🔍 Low res detected (${width}px). Applying 150% upscale..."
        RESIZE_FLAG="-resize 150%"
    else
        echo "📸 Resolution okay (${width}px). Skipping upscale."
    fi

    echo "🎨 Normalizing: $base_name"

    # 3. IMAGE PREPROCESSING
    # Call the local magick binary
    $MAGICK "$img" $RESIZE_FLAG \
        -colorspace gray \
        -level 10%,90% \
        -unsharp 0x5+0.5+0 \
        -deskew 40% \
        "$prepped_img"

    # 4. RUN KRAKEN
    if [ -f "$prepped_img" ]; then
        echo "📄 OCR Processing: $file_no_ext"
        
        # Primary Attempt: Neural Baseline (Best for standard books)
        $KRAKEN_BIN -i "$prepped_img" "$OUTPUT_DIR/${file_no_ext}.txt" \
            binarize segment -bl ocr -m "$MODEL"
        
        # Secondary Attempt: Box Segmenter (The "Nasta'liq Rescue")
        if [ ! -s "$OUTPUT_DIR/${file_no_ext}.txt" ]; then
            echo "⚠️  Neural segmenter failed. Retrying with Box Segmenter..."
            $KRAKEN_BIN -i "$prepped_img" "$OUTPUT_DIR/${file_no_ext}.txt" \
                binarize segment ocr -m "$MODEL"
        fi

        if [ -s "$OUTPUT_DIR/${file_no_ext}.txt" ]; then
            echo "✅ Success: $OUTPUT_DIR/${file_no_ext}.txt"
        else
            echo "❌ Error: Resulting file is still empty for $base_name"
        fi
    else
        echo "❌ Error: Failed to create $prepped_img"
    fi
    echo "--------------------------------------------------------"
done

echo "🎉 Batch complete. Results in '$OUTPUT_DIR'."
