#!/usr/bin/perl
##############################################################
# tGenerator validation script: checkRefs.pl
# 
# Stephen W. Thomas
# sthomas@cs.queensu.ca
# http://research.cs.queensu.ca/~sthomas/
# Software Analysis and Intelligence Lab (SAIL)
# School of Computing, Queen's University, Canada
#
# Checks the keys for consisteny in the .csv files generated by tGenerator.
#
##############################################################
use strict;


# Check command-line argumets
if ($#ARGV != 0 ){
   print "$0: Error: Incorrect usage.\n";
   print "Usage: $0 directory_with_data\n";
   print "Exiting.\n";
   exit 1;
}

# The directory that holds all the .csv files
my $dir = $ARGV[0];



#######################################
#######################################
sub getIDs{
    my $file = shift;
    my %hash;

    open(FILE, "<$file") or die();
    while (<FILE>){
        (my $id, my $rest) = split(/&/);
        chomp $id;
        $hash{$id} = 1;
    }
    close(FILE);
    return %hash;
}



###################################
###################################
sub checkTime{

my $ct = shift;

my $item_file = "$dir/item.$ct.csv";
my $author_file = "$dir/author.$ct.csv";
my $publisher_file = "$dir/publisher.$ct.csv";
my $item_author_file = "$dir/item_author.$ct.csv";
my $item_publisher_file = "$dir/item_publisher.$ct.csv";
my $related_item_file = "$dir/related_item.$ct.csv";

# First, read in items to get IDs
my %items     = getIDs($item_file);
my %authors   = getIDs($author_file);
my %publishers= getIDs($publisher_file);


open(FILE, "<$item_author_file") or die();
my $lineNo = 1;
while (<FILE>){
    (my $iID, my $aID, my $rest) = split(/&/);
    chomp $aID;
    
    if (! defined $items{$iID}){
        print "\nERROR: no matching item ID for $iID for author $aID in $item_author_file\n";
        print "lines $lineNo\n";
        exit;
    } 
    if (! defined $authors{$aID}){
        print "\nERROR: no matching author ID for $aID in $item_author_file\n";
        print "lines $lineNo\n";
        exit;
    }
    ++$lineNo;
}
print "$item_author_file looks ok for referential integrity\n";


open(FILE, "<$item_publisher_file") or die();
my $lineNo = 1;
while (<FILE>){
    (my $iID, my $pID, my $rest) = split(/&/);
    chomp $pID;
    
    if (! defined $items{$iID}){
        print "\nERROR: no matching item ID for $iID for publisher $pID in $item_publisher_file\n";
        print "lines $lineNo\n";
        exit;
    } 
    if (! defined $publishers{$pID}){
        print "\nERROR: no matching publisher ID for $pID in $item_publisher_file\n";
        print "lines $lineNo\n";
        exit;
    }
    ++$lineNo;
}
print "$item_publisher_file looks ok for referential integrity\n";


open(FILE, "<$related_item_file") or die();
my $lineNo = 1;
while (<FILE>){
    (my $iID, my $rID, my $rest) = split(/&/);
    chomp $rID;
    
    if (! defined $items{$iID}){
        print "\nERROR: no matching item ID $iID for related $rID in $related_item_file\n";
        print "lines $lineNo\n";
        exit;
    } 
    if (! defined $items{$rID}){
        print "\nERROR: no matching item ID $rID for $iID in $related_item_file\n";
        print "lines $lineNo\n";
        exit;
    }
    ++$lineNo;
}
print "$related_item_file looks ok for referential integrity\n";

}


# If there are slices, process those
if (-e "$dir/item.????-??-??.csv"){
    my @slices=`ls $dir/item.????-??-??.csv`;

    foreach my $slice (@slices){
        chomp $slice;
        my $time = $slice;
        $time =~ s/item.(....-..-..).csv/\1/g;
        checkTime($time);
    }
}

checkTime("final");
