#!/bin/sh -vx

# ----------------------------- #
# Static definitions
# ----------------------------- #

PDF2HTML="/usr/local/bin/pdftohtml"
OPTION="-p -q -i -noframes"
OUTDIR=/data/htdig/tmp

LOGFILE=/logs/htdig/tmp/konverter/pdftohtml

# ----------------------------- #
# Catching input parameters
# ----------------------------- #

infile="$1"
mimetype="$2"
url="$3"
conf="$4"

# ------------------------------------------------------------------ #
# pdftohtml does not set the document title so we change this to the
# documents name
# ------------------------------------------------------------------ #
name="`basename \"$3\" | cut -d. -f1`"

# ----------------------------- #
# Check the mime type
# ----------------------------- #

if [ "$mimetype" != "application/pdf" ]
then
        echo "$url is $mimetype instead of application/pdf" >> ${LOGFILE} 2>&1
        exit 1
fi

# ----------------------------- #
# Create temporary output file
# ----------------------------- #

outfile=${OUTDIR}/pdftohtml.out-$$
rm -f "$outfile"*

# ----------------------------- #
# Start conversion
# ----------------------------- #

"$PDF2HTML" $OPTION $infile $outfile >> ${LOGFILE} 2>&1

# ----------------------------- #
# Modify HTML title tag
# ----------------------------- #

sed -e 's#\(<TITLE>\).*\(</TITLE>\)#\1'"${name}"'\2#' "$outfile.html" > "$outfile.html.new" && mv "$outfile.html.new" "$outfile.html"

# ----------------------------- #
# cat file to stdout
# ----------------------------- #

if [ -s "${outfile}.html" ]
then
        cat "${outfile}.html"
        retcode=0
else
       	(
        echo "$0: $infile couldn't be converted to HTML (Outfile: ${outfile}.html, URL: $3)"
        ) >> ${LOGFILE} 2>&1 
        retcode=1
fi

# ----------------------------- #
# Clean up
# ----------------------------- #
rm -f "$outfile"*
rm -f "/tmp/pdftohtml"*
exit $retcode

# EOF

