#!/bin/bash

# Site Crawler Run Script
# This script runs the website navigation crawler with common configurations

set -e

# Default values
URL=""
MAX_DEPTH=3
MAX_PAGES=100
DELAY=1.0
OUTPUT="navigation_structure.json"

# Function to show usage
show_usage() {
    echo "🌐 Website Navigation Crawler"
    echo "============================="
    echo ""
    echo "Usage: $0 [OPTIONS] <URL>"
    echo ""
    echo "Options:"
    echo "  -d, --depth <number>     Maximum crawl depth (default: 3)"
    echo "  -p, --pages <number>     Maximum pages to crawl (default: 100)"
    echo "  -t, --delay <seconds>    Delay between requests (default: 1.0)"
    echo "  -o, --output <file>      Output JSON file (default: navigation_structure.json)"
    echo "  -h, --help              Show this help message"
    echo ""
    echo "Examples:"
    echo "  $0 https://example.com"
    echo "  $0 -d 2 -p 50 https://example.com"
    echo "  $0 --depth 4 --pages 200 --delay 2.0 --output my_site.json https://example.com"
    echo ""
}

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        -d|--depth)
            MAX_DEPTH="$2"
            shift 2
            ;;
        -p|--pages)
            MAX_PAGES="$2"
            shift 2
            ;;
        -t|--delay)
            DELAY="$2"
            shift 2
            ;;
        -o|--output)
            OUTPUT="$2"
            shift 2
            ;;
        -h|--help)
            show_usage
            exit 0
            ;;
        -*)
            echo "❌ Unknown option: $1"
            show_usage
            exit 1
            ;;
        *)
            if [ -z "$URL" ]; then
                URL="$1"
            else
                echo "❌ Multiple URLs provided. Please specify only one URL."
                exit 1
            fi
            shift
            ;;
    esac
done

# Check if URL is provided
if [ -z "$URL" ]; then
    echo "❌ No URL provided."
    show_usage
    exit 1
fi

# Validate URL format
if [[ ! "$URL" =~ ^https?:// ]]; then
    echo "❌ Invalid URL format. Please provide a URL starting with http:// or https://"
    exit 1
fi

# Check if virtual environment exists
if [ ! -d "crawler_env" ]; then
    echo "❌ Virtual environment not found. Please run setup.sh first."
    exit 1
fi

echo "🌐 Starting Website Navigation Crawler..."
echo "========================================"
echo "URL: $URL"
echo "Max Depth: $MAX_DEPTH"
echo "Max Pages: $MAX_PAGES"
echo "Delay: $DELAY seconds"
echo "Output: $OUTPUT"
echo ""

# Activate virtual environment and run crawler

echo "🚀 Running crawler..."
crawler_env/bin/python site_crawler.py "$URL" \
    --max-depth "$MAX_DEPTH" \
    --max-pages "$MAX_PAGES" \
    --delay "$DELAY" \
    --output "$OUTPUT"

echo ""
echo "✅ Crawl completed! Results saved to: $OUTPUT"
echo ""
echo "📊 To analyze the results:"
echo "   python example_usage.py"
echo "   python demo_crawler.py" 