#!/bin/bash
# 
# Stephen W. Thomas
# November 2010, late at night.
#
# This script executes a the workload (i.e., schema validation) on a single
# dataset. 
# To use this, make sure the $TXSCHEMA_HOME env variable is set correctly
#
# The runAll script calls this one iteratively; you may want to run that one
# instead.
#
# Results are writtin to stdout
##############################################################################


verbose=0
if [[ $# < 2 ]]; then
    echo "Usage: $0 dataset sequenced? [verbose?]"
    exit 1;
fi

data=$1
sequenced=$2

if [[ $# == 3 ]]; then
   verbose=$3
fi


# Some error checking
if [[ ! -e $data ]] || [[ ! -d $data ]]; then
    echo "$data doesn't exist or is not a directory."
    exit 3
fi
if [[ $TXSCHEMA_HOME == "" ]]; then
    echo "Need to set TXSCHEMA_HOME environment variable."
    exit 3;
fi

txmllint="$TXSCHEMA_HOME/executable/txmllint"


numSlices=`ls $data/output.2* | wc -l`

if [[ $sequenced == 1 ]]; then

    if [[ $verbose == 1 ]]; then
    echo "Running sequenced exeriments (units are seconds)"
    fi


    # This is the list of schemas in the schemas directory
    convSchemas="DCSD-ref.xsd DCSD-ident.xsd DCSD-datatype.xsd DCSD-card.xsd"
    for schema in $convSchemas
    do
        # First, run xmllint
        START=$(date +%s.%N)
    
        # Must call xmllint individually for each slice
        for snapshot in `ls $data/output.2*`
        do 
            cmd="xmllint --noout --schema schemas/$schema $snapshot"
            if [[ $verbose == 1 ]]; then
                $cmd
            else
                $cmd 2> /dev/null
            fi
            echo -n
        done
    
        END=$(date +%s.%N)
        TIME=$(echo "$END - $START" | bc)
    
        echo xmllint $numSlices $schema $TIME
    
    
        # Second, run txmllint
        ln -sf schemas/ts.xml 
        ln -sf schemas/A-blank.xml Annotations.xml
        ln -sf schemas/$schema DCSD.xsd
        ln -sf $data/output.final.xml
    
        START=$(date +%s.%N)
    
        cmd="$txmllint -d 0 output.final.xml"
        if [[ $verbose == 1 ]]; then
            $cmd 
        else
            $cmd > /dev/null
        fi
    
        END=$(date +%s.%N)
        TIME=$(echo "$END - $START" | bc)
    
        echo txmllint $numSlices $schema $TIME
    done

    #exit;
fi # end sequenced

if [[ $verbose == 1 ]]; then
    echo "Running nonsequenced exeriments"
fi

TYPE="Identity Referential Cardinality"
i=1
for type in $TYPE
do
    tempSchema=ts-$i.xml
    annotation=A-$i.xml

    ln -sf schemas/ts.xml 
    ln -sf schemas/$annotation Annotations.xml
    ln -sf schemas/DCSD-blank.xsd DCSD.xsd
    ln -sf $data/output.final.xml

    cmd="$txmllint -d 0 output.final.xml" 
    if [[ $verbose == 1 ]]; then
        START=$(date +%s.%N)
        $cmd
        END=$(date +%s.%N)
        TIME=$(echo "$END - $START" | bc)
    else
        START=$(date +%s.%N)
        TIME2=`$cmd | grep $type | awk '{print $4}'`
        END=$(date +%s.%N)
        TIME=$(echo "$END - $START" | bc)
    fi

    echo txmllint $numSlices $type $TIME2 $TIME

    ((i++))
done


