#!/bin/bash
# refdbxml - creates formatted output from XML documents
# Markus Hoenicka <markus@mhoenicka.de> 2001-10-10
# $Id: refdbxml.in,v 1.12.2.7 2006/04/08 21:30:21 mhoenicka Exp $
# OPTIONS: -s (stylesheet) -h (invoke help), -p (xslt processor),
#          -t (output format), -f (fo processor)

# relies on these external programs: SUN JRE, tex, passivetex, xmltex, JFOR
# at least one of: xerces/xalan, xp/xt, xsltproc, saxon
# and at least one of: passivetex, fop, xep

# use the refdbxmlrc configuration file to adapt this script to your needs

## initialise variables
# location of the stock DocBook and TEI XSL stylesheets
# DocBook V4
htmldb="http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl"
xhtmldb="http://docbook.sourceforge.net/release/xsl/current/xhtml/docbook.xsl"
fodb="http://docbook.sourceforge.net/release/xsl/current/fo/docbook.xsl"

# DocBook V5
htmldbns="http://docbook.sourceforge.net/release/xsl-ns/current/html/docbook.xsl"
xhtmldbns="http://docbook.sourceforge.net/release/xsl-ns/current/xhtml/docbook.xsl"
fodbns="http://docbook.sourceforge.net/release/xsl-ns/current/fo/docbook.xsl"

# TEI P4
htmltei="http://www.tei-c.org/Stylesheets/p4/html/tei.xsl"
xhtmltei="http://www.tei-c.org/Stylesheets/p4/html/tei.xsl"
fotei="http://www.tei-c.org/Stylesheets/p4/fo/tei.xsl"

# TEI P5
htmlteins="http://www.tei-c.org/Stylesheets/p5/html/tei.xsl"
xhtmlteins="http://www.tei-c.org/Stylesheets/p5/html/tei.xsl"
foteins="http://www.tei-c.org/Stylesheets/p5/fo/tei.xsl"

# the default xslt processor: xalan, xt, saxon, saxon-xerces or xsltproc
xslt_processor="xsltproc"

# the default fo processor: passivetex, fop, jfor, or xep
fo_processor="fop"

# the path to the Java class repository. This option is DEPRECATED. Please
# set xslt_classpath and fo_classpath in your refdbxmlrc config file instead
# This assumes that all necessary .jar
# files are in this directory. If this is not the case, adding a few symlinks
# might be the simplest solution
classpath_root=""

# CLASSPATH settings for your xslt and fo processors
xslt_classpath=""
fo_classpath=""

# FOP can accept a user configuration file parameter
fop_config_file=""

# FOP command. Usually a shell script provided by FOP
fop_command="fop"

# this option is required to find xep.sh which usually is not in your
# PATH. Specify the directory containing xep.sh including the trailing
# slash. Leave this option empty if xep.sh *is* in your PATH.
xep_path=""

# the path of the global configuration file

globalconfig="/etc/refdb/refdbxmlrc"

# some defaults
stylesheet=""
outformat="html"

# associate fo processors with their output format
rtf_generators="fop jfor"
pdf_generators="fop xep passivetex"

# determine configuration files
if [ ! -r "$globalconfig" ] && [ -n "$REFDBLIB" ]; then
    globalconfig=$REFDBLIB/refdbxmlrc
fi
userconfig=$HOME/.refdbxmlrc

if [ -n "$globalconfig" ] && [ -r "$globalconfig" ]; then
    allconfigs=$globalconfig
fi

if [ -r "$userconfig" ]; then
    allconfigs=$allconfigs" "$userconfig
fi

# read the settings in the configure file(s)
for config in $allconfigs; do
    while read refdbvar refdbval; do
	if [ -n "$refdbvar" ]; then
	    if [ $refdbvar = xslt_processor ]; then
		xslt_processor=$refdbval
	    fi
	    if [ $refdbvar = xslt_classpath ]; then
		xslt_classpath=$refdbval
	    fi
	    if [ $refdbvar = fo_processor ]; then
		fo_processor=$refdbval
	    fi
	    if [ $refdbvar = fo_classpath ]; then
		fo_classpath=$refdbval
	    fi
	    if [ $refdbvar = stylesheet ]; then
		stylesheet=$refdbval
	    fi
	    if [ $refdbvar = outformat ]; then
		outformat=$refdbval
	    fi
            if [ $refdbvar = fop_config_file ]; then
                fop_config_file="-c $refdbval"
            fi
            if [ $refdbvar = fop_command ]; then
                fop_command=$refdbval
            fi
	fi
    done < $config
done

# set processor launch commands for functions which follow
# more maintainable (and less ugly -- check out saxon-xerces!)
# note: saxon|saxon-xerces|xalan have command-line options for catalog
#       resolving
case $xslt_processor in
	xalan    ) xslt_launch="org.apache.xalan.xslt.Process -ENTITYRESOLVER org.apache.xml.resolver.tools.CatalogResolver -URIRESOLVER org.apache.xml.resolver.tools.CatalogResolver ";;
	xt       ) xslt_launch="com.jclark.xsl.sax.Driver";;
	saxon    ) xslt_launch="com.icl.saxon.StyleSheet -x org.apache.xml.resolver.tools.ResolvingXMLReader -y org.apache.xml.resolver.tools.ResolvingXMLReader -r org.apache.xml.resolver.tools.CatalogResolver -u";;
	saxon-xerces ) xslt_launch="-Djavax.xml.parsers.DocumentBuilderFactory=org.apache.xerces.jaxp.DocumentBuilderFactoryImpl -Djavax.xml.parsers.SAXParserFactory=org.apache.xerces.jaxp.SAXParserFactoryImpl com.icl.saxon.StyleSheet -x org.apache.xml.resolver.tools.ResolvingXMLReader -y org.apache.xml.resolver.tools.ResolvingXMLReader -r org.apache.xml.resolver.tools.CatalogResolver -u";;
	xsltproc ) xslt_launch="xsltproc --xinclude";;
esac
	

## function definitions
# creates fo output from xml. Arguments: input_filename out_filename
process_fo () {
    case $xslt_processor in
	xalan    ) java -cp "$xslt_classpath" ${xslt_launch} $1 -xsl $jfosheet -out $2;;
	xt       ) java -cp "$xslt_classpath" ${xslt_launch} $1 $jfosheet > $2;;
	saxon    ) java -cp "$xslt_classpath" ${xslt_launch} -o $2 $1 $jfosheet;;
	saxon-xerces ) java -cp "$xslt_classpath" ${xslt_launch} -o $2 $1 $jfosheet;;
	xsltproc ) ${xslt_launch} $fosheet $1 > $2;;
    esac
}

# creates html output from xml. Arguments: input_filename out_filename
process_html () {
    case $xslt_processor in
	xalan    ) java -cp "$xslt_classpath" ${xslt_launch} -in $1 -xsl $jhtmlsheet -out $2;;
	xt       ) java -cp "$xslt_classpath" ${xslt_launch} $1 $jhtmlsheet > $2;;
	saxon    ) java -cp "$xslt_classpath" ${xslt_launch} -o $2 $1 $jhtmlsheet;;
	saxon-xerces ) java -cp "$xslt_classpath" ${xslt_launch} -o $2 $1 $jhtmlsheet;;
	xsltproc ) ${xslt_launch} $htmlsheet $1 > $2;;
    esac
}

# creates xhtml output from xml. Arguments: input_filename out_filename
process_xhtml () {
    case $xslt_processor in
	xalan    ) java -cp "$xslt_classpath" ${xslt_launch} -in $1 -xsl $jxhtmlsheet -out $2;;
	xt       ) java -cp "$xslt_classpath" ${xslt_launch} $1 $jxhtmlsheet > $2;;
	saxon    ) java -cp "$xslt_classpath" ${xslt_launch} -o $2 $1 $jxhtmlsheet;;
	saxon-xerces ) java -cp "$xslt_classpath" ${xslt_launch} -o $2 $1 $jxhtmlsheet;;
	xsltproc ) ${xslt_launch} $xhtmlsheet $1 > $2;;
    esac
}

# creates printable output (pdf|rtf) from fo.
# Arguments: input_filename out_filename
process_print () {
    case $outformat in
	rtf ) generators=$rtf_generators;;
	pdf ) generators=$pdf_generators;;
    esac

    processor_outformat_match=false
    for generator in $generators ; do
	[ "$generator" = "$fo_processor" ] && processor_outformat_match=true
    done

    if [ ${processor_outformat_match} = false ] ; then
	echo "Specified FO processor: $fo_processor."
	echo "Specified output format: $outformat."
	echo "Error: $fo_processor does not produce $outformat output."
	exit 1
    fi

    case $fo_processor in
	jfor )
		java -cp "$fo_classpath" ch.codeconsult.jfor.main.CmdLineConverter $1 $2;;
	passivetex )
		if [ ! -e $basename.aux ]; then
			touch $basename.aux
		fi
		cp $basename.aux $basename.aux.$$
		pdfxmltex $basename.fo
		if [ $? -ne 0 ]; then
		  exit 1
		fi
		# the following loop should be fixed to run not more
		# than a fixed number of iterations. 
		until diff --brief $basename.aux $basename.aux.$$; do
		  cp $basename.aux $basename.aux.$$
		  pdfxmltex $basename.fo
		done
		rm $basename.aux.$$;;
	fop )
		$fop_command $fop_config_file -fo $1 -$outformat $2;;
	xep )
		${xep_path}xep.sh -fo $1 -pdf $2;;
	esac
}

# read the command line options
while getopts ":hp:f:s:t:c:" opt; do
  case $opt in
    c  ) fop_config_file="-c $OPTARG";;
    h  ) echo "creates formatted output from a DocBook or TEI XML sources"
	 echo 'usage: refdbxml [-c fop_config_file] [-f fo_processor ] [-h] [-p xslt_processor] [-s stylesheet] [-t outformat] file1 [file2...]'
	 echo "Options: -c select the FOP configuration file"
	 echo "         -f specify fo processor: passivetex, fop or xep"
	 echo "         -h print this help and exit"
	 echo "         -p specify xslt processor: xalan, xt, saxon, or xsltproc"
	 echo "         -s stylesheet (the RefDB-generated driver file)"
	 echo "         -t select the output format. Possible values are html, xhtml, rtf, pdf."
	 exit 0 ;;
    p  ) xslt_processor=$OPTARG;;
    f  ) fo_processor=$OPTARG;;
    s  ) stylesheet=$OPTARG;;
    t  ) outformat=$OPTARG;;
    \? ) echo 'usage: refdbxml [-c fop_config_file] [-h] [-p xslt_processor] [-f fo_processor] [-s stylesheet] [-t outformat] file1 [file2...]'
	 echo 'type refdbxml -h to invoke help'
	 exit 1;;
  esac
done

# correct the index so the filename argument is always $1
shift $(($OPTIND - 1))

# test for valid arguments
if [ ! $outformat = html ] && [ ! $outformat = rtf ] && [ ! $outformat = pdf ] && [ ! $outformat = xhtml ]; then
  echo "specify one of 'html', 'xhtml', 'rtf', 'pdf' with the -t option"
  exit 1
fi

# pick stylesheet; the values "db", "db5", "tei", and "tei5" use the stock
# stylesheets without any RefDB extensions. To format a document using
# RefDB bibliographies the RefDB-generated driver file must be specified here
case $stylesheet in
  db  ) htmlsheet=$htmldb
	xhtmlsheet=$xhtmldb
	fosheet=$fodb;;
  tei ) htmlsheet=$htmltei
	xhtmlsheet=$xhtmltei
	fosheet=$fotei;;
  db5  ) htmlsheet=$htmldbns
	xhtmlsheet=$xhtmldbns
	fosheet=$fodbns;;
  tei5 ) htmlsheet=$htmlteins
	xhtmlsheet=$xhtmlteins
	fosheet=$foteins;;
  *   ) if [ $outformat = "html" ]; then
	    htmlsheet=$stylesheet
	elif [ $outformat = "xhtml" ]; then
	    xhtmlsheet=$stylesheet
	else
	    fosheet=$stylesheet
	fi;;
  \? ) echo 'type refdbxml -h to invoke help'
       exit 1;;
esac

# test for valid xslt processor
# default is xsltproc unless configured otherwise
if [ ! $xslt_processor = "xalan" ] && [ ! $xslt_processor = "xt" ] && [ ! $xslt_processor = "xsltproc" ] && [ ! $xslt_processor = "saxon" ] && [ ! $xslt_processor = "saxon-xerces" ]; then
  echo "specify one of 'xalan', 'xt', 'saxon', 'saxon-xerces', 'xsltproc' with the -p option"
  exit 1;
fi

# test for valid fo processor
# if none specified will be default (passivetex), which is legal
if [ ! $fo_processor = "passivetex" ] && [ ! $fo_processor = "fop" ] && [ ! $fo_processor = "xep" ] && [ ! $fo_processor = "jfor" ]; then
  echo "specify one of 'passivetex', 'fop', 'xep', or 'jfor' with the -f option"
  exit 1;
fi

# if using fop and config file is specified, then check config file exists
[ "$fo_processor" = "fop" ] && [ -n "$fop_config_file" ] && [ ! -r ${fop_config_file##-c } ] && echo "unable to access fop configuration file: ${fop_config_file##-c }" && fop_config_file=""

# on Win32-cygwin, the native Win32 tools want the DOS path
# the variables $jfosheet and jhtmlsheet will receive the appropriate paths
if [ $OSTYPE = "cygwin" ]; then
    if [ ! "X$classpath_root" = "X" ]; then
        # get the full dos path of the classpath root
	osclasspath_root=$(cygpath -w $classpath_root)
	classpath="$osclasspath_root\avalon-framework.jar;$osclasspath_root\batik.jar;$osclasspath_root\fop.jar;$osclasspath_root\jfor.jar;$osclasspath_root\jimi-1.0.jar;$osclasspath_root\logkit.jar;$osclasspath_root\sax.jar;$osclasspath_root\xalan.jar;$osclasspath_root\xerces.jar;$osclasspath_root\xp.jar;$osclasspath_root\xt.jar;$osclasspath_root\resolver.jar"
    fi
    if [ ! "X$fosheet" = "X" ]; then
	jfosheet=$(cygpath -w $fosheet)
    fi
    if [ ! "X$htmlsheet" = "X" ]; then
	jhtmlsheet=$(cygpath -w $htmlsheet)
    fi
    if [ ! "X$xhtmlsheet" = "X" ]; then
	jxhtmlsheet=$(cygpath -w $xhtmlsheet)
    fi
else
    classpath="$classpath_root/avalon-framework.jar:$classpath_root/batik.jar:$classpath_root/fop.jar:$classpath_root/jfor.jar:$classpath_root/jimi-1.0.jar:$classpath_root/logkit.jar:$classpath_root/sax.jar:$classpath_root/saxon.jar:$classpath_root/saxon-fop.jar:$classpath_root/saxon-jdom.jar:$classpath_root/xalan.jar:$classpath_root/xerces.jar:$classpath_root/xp.jar:$classpath_root/xt.jar:$classpath_root/resolver.jar"
    jfosheet=$fosheet
    jhtmlsheet=$htmlsheet
    jxhtmlsheet=$xhtmlsheet
fi

# Set classpath variables to default if not already specified
if ! [ -n "$xslt_classpath" ]; then
	xslt_classpath=$classpath
fi
if ! [ -n "$fo_classpath" ]; then
	fo_classpath=$classpath
fi

# loop over all files on the command line
for filename in $*; do
    if [ $OSTYPE = "cygwin" ]; then
      # get the full dos path of the file
	mypath=$(cygpath -w $filename)
    else
	mypath=$filename
    fi

    # extract the basename from the argument
    basename=${mypath%.*}

    case $outformat in
	rtf )
		process_fo $mypath $basename.fo
		if [ $? -ne 0 ]; then
		  exit 1
		fi
		process_print $basename.fo $basename.rtf;;
	html)
		process_html $mypath $basename.html;;
	xhtml)
		process_xhtml $mypath $basename.xhtml;;
	pdf )
		process_fo $mypath $basename.fo
		if [ $? -ne 0 ]; then
		  exit 1
		fi
		process_print $basename.fo $basename.pdf;;
    esac
done


exit 0
