#!/usr/local/bin/perl -w

# This is the script for second screen of the web-interface. 

# initialize the error message string
my $err_msg = "";
my $path = "";
my $perl5lib = "";

#open the configuration file (./config.txt) and read the environment variables: PATH and PERL5LIB
open(CONF,"<config.txt") or $err_msg = "Error!! SC-cgi/config.txt not found.";

# to read the complete file in one go
while($config = <CONF>)
{
	# read the PATH settings
	if($config=~/^PATH=(.+)/)
	{
		$path = $1;

		chomp $path;
		$path =~ s/^\s//;
	}

	# read the PERL5LIB settings
	if($config=~/^PERL5LIB=(.+)/)
	{
		$perl5lib = $1;

		chomp $perl5lib;
		$perl5lib =~ s/^\s//;
	}
}

if($path eq "")
{
	$err_msg .= "Error!! PATH value not specified - please initialize the SC-cgi/config.txt file<br>";
}

if($perl5lib eq "")
{
	$err_msg .= "Error!! PERL5LIB value not specified - please initialize the SC-cgi/config.txt file<br>";	
}

# set the ENV variables
$ENV{'PATH'}=$path;
$ENV{'PERL5LIB'}=$perl5lib;

# --------------------------------------------------------------------

use CGI;
$CGI::DISABLE_UPLOADS = 0;

# Create the URL for the form-action
my $host = $ENV{'HTTP_HOST'};

$q=new CGI;

print $q->header;
print $q->start_html("SenseClusters");

# check if error occurred during reading the PATH and PERL5LIB variables
if($err_msg ne "")
{
	error($q, $err_msg);
}

$clustype=$q->param("clustype");

$prefix=$q->param("prefix");
if(!$prefix)
{
	$prefix="user";
}

if($prefix =~ m/^([\w\d_\-]+)$/)
{
    $prefix=$1;
}
else
{
    error($q,"Invalid Prefix value!");
}

$usr_dir="user_data/". $prefix.time();
$status=system("mkdir $usr_dir");
if($status!=0)
{
	error($q,"Can not create the user directory $usr_dir");
}

########################
# Test and Train Scope #
########################

$scope_test=$q->param("scope_test");
if($scope_test)
{
    if($scope_test =~ m/^(\d+)$/)
    {
        $scope_test=$1;
    }
    else
    {
        error($q,"Invalid TEST Scope value!");
    }
}

$scope_train=$q->param("scope_train");
if($scope_train)
{
    if($scope_train =~ m/^(\d+)$/)
    {
        $scope_train=$1;
    }
    else
    {
        error($q,"Invalid TRAIN Scope value!");
    }
}

#format
$precision = $q->param("precision");
$format='f16.' . $precision;

################
# Feature Type #
################

$feature_type=$q->param("feature");

################
# Split Type #
################

$split=$q->param("split");
if($split)
{
    if($split =~ m/^(\d+)$/)
    {
		if($split >= 1 && $split <= 99)
		{
			$split=$1;
		}
		else
		{
			error($q,"The Split value can be between 1 to 99 (inclusive).");
		}
    }
    else
    {
        error($q,"Invalid Split value!");
    }
}

####################
# loading Testfile #
####################

$testfile=$q->param("testfile");

if(!$testfile)
{
        print "Please specify the Testfile.<br>\n";
	exit;
}

$test="$usr_dir/$prefix-test.xml";
open(TEST,">$test") || error($q,"Error in uploading Testfile.");
while(read($testfile,$buffer,1024))
{
	print TEST $buffer;
}
close TEST;

#####################
# loading Trainfile #
#####################

$trainfile=$q->param("trainfile");
if($trainfile)
{
	$train="$usr_dir/$prefix-train.plain";
	open(TRAIN,">$train") || error($q,"Error in uploading Trainfile.");

	seek($trainfile,0,0);
	while(read($trainfile,$buffer,1024))
	{
       		print TRAIN $buffer;
	}
}
close TRAIN;

# Check if both TRAIN and split option specified!
if($trainfile && $split)
{
    error($q,"Split and TRAIN file - both options cannot be used together.");
}

#####################
# Loading Tokenfile #
#####################

$token="$usr_dir/token.regex";
open(TOKOUT,">$token") || error($q,"Error in loading Tokenfile.");

if($q->param("token"))
{
	$tokenfile=$q->param("token");
	while(read($tokenfile,$buffer,128))
	{
        	print TOKOUT $buffer;
	}
}
else
{
    open(TOKIN,"token.regex") || error($q,"Error in opening default token.regex file.");
    while(<TOKIN>)
    {
        print TOKOUT;
    }
    close TOKIN;
}
close TOKOUT;

######################
# Loading Targetfile #
######################
if($clustype eq "hclust" || $clustype eq "lsa-hclust")
{
	if($q->param("target"))
	{
		$target="$usr_dir/target.regex";
		open(TARGET_OUT,">$target") || error($q,"Error in loading Targetfile.");
		
		$targetfile=$q->param("target");
		while(read($targetfile,$buffer,128))
		{
			print TARGET_OUT $buffer;
		}
		close TARGET_OUT;
	}
	else
	{
		$target="$usr_dir/target.regex";
		open(TARGET_OUT,">$target") || error($q,"Error in loading Targetfile.");
		
		open(TARGET_IN,"target.regex") || error($q,"Error in opening default target.regex file.");
		while(<TARGET_IN>)
		{
			print TARGET_OUT;
		}
		close TARGET_IN;
		close TARGET_OUT;
	}
}

#else
#{
#    open(TARGET_IN,"target.regex") || error($q,"Error in opening default target.regex file.");
#    while(<TARGET_IN>)
#    {
#        print TARGET_OUT;
#    }
#    close TARGET_IN;
#}


#########################
# Writing to Param file #
#########################

$param_file="$usr_dir/param_file";
open(PARAM,">$param_file") || error($q,"Error in opening PARAM file.");

# word clustering / lsa options
if($clustype eq "wclust")
{
    print PARAM "WORDCLUST=ON\n";
}
elsif($clustype eq "lsa-fclust")
{
    print PARAM "LSAFEATCLUST=ON\n";
}
elsif($clustype eq "lsa-hclust" || $clustype eq "lsa-hlclust")
{
    print PARAM "LSACONTEXTCLUST=ON\n";
}

print PARAM "TEST=\"$prefix-test.xml\"\n";
if(defined $train)
{
	print PARAM "TRAIN=\"$prefix-train.plain\"\n";
}
print PARAM "TOKEN=\"token.regex\"\n";
if(defined $target)
{
	print PARAM "TARGET=\"target.regex\"\n";
}
if($prefix)
{
	print PARAM "PREFIX=\"$prefix\"\n";
}

print PARAM "FEATURE=$feature_type\n";
print PARAM "FORMAT=$format\n";

if($scope_test)
{
    print PARAM "SCOPE_TEST=$scope_test\n";
}

if($scope_train)
{
    print PARAM "SCOPE_TRAIN=$scope_train\n";
}

if($split)
{
    print PARAM "SPLIT=$split\n";
}

close PARAM;

########################
# Feature Options Form #
########################

print $q->start_form(-action=>'third.cgi', -method=>'post', -enctype=>'multipart/form-data');

print "<!-outermost table which divides the screen in 2 parts-->
<table width=100% height=100% border=1>
<tr>
<td bgcolor=#CFCFCF>

<table width=100% border=0>
<tr>
<td>
<a href=\"http://www.d.umn.edu\"><img src=\"http://$host/SC-htdocs/umdlogo.jpg\" border=0 width=\"100\" height=\"60\"></a>
</td>
<td>
<h1><center>
<a href=\"http://senseclusters.sourceforge.net/\">SenseClusters</a> Web Interface
</center></h1>
<center>
<h3>Clusters text instances based on their contextual similarity ...</h3>
</center>
</td>
</tr>
</table>

</td>
</tr>
<tr>
<td bgcolor=#EDEDED>";

print $q->h3("Step 2:");
print "
<table width=100% border=0 cellpadding=3>
<tr>
<td>";

if($clustype eq "wclust")
{
    print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type </td><td>", $q->popup_menu(-name=>'context', -values=>['o2'], -labels=>{o2=>'2nd Order'}, -default=>o2), "</td><td><br></td></tr>";

}
elsif($clustype eq "lsa-fclust")
{
    print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type </td><td>", $q->popup_menu(-name=>'context', -values=>['o1'], -labels=>{o1=>'1st Order'}, -default=>o1), "</td><td><br></td></tr>";

}
elsif($feature_type eq "uni" && ($clustype eq "hclust" || $clustype eq "hlclust"))
{
    print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type </td><td>", $q->popup_menu(-name=>'context', -values=>['o1'], -labels=>{o1=>'1st Order'}, -default=>o1), "</td><td><br></td></tr>";
}
elsif($clustype eq "lsa-hclust" || $clustype eq "lsa-hlclust")
{
    print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type </td><td>", $q->popup_menu(-name=>'context', -values=>['o2'], -labels=>{o2=>'2nd Order'}, -default=>o2), "</td><td><br></td></tr>";
}
else
{
    print $q->a({-href=>"http://$host/SC-htdocs/help.html#context_ord"},"Context Vector"), " Type </td><td>", $q->popup_menu(-name=>'context', -values=>['o1', 'o2'], -labels=>{o1=>'1st Order', o2=>'2nd Order'}, -default=>o2), "</td><td><br></td></tr>";
}

print "<tr><td>", $q->a({-href=>"http://$host/SC-htdocs/help.html#space_space"}, "Clustering Space"), " </td><td> ", $q->popup_menu(-name=>'space', -values=>['vector', 'similarity'], -default=>'vector'), "</td><td><br></td></tr>";

print "<tr><td>", $q->a({-href=>"http://$host/SC-htdocs/help.html#cluststop_cs"}, "Cluster Stopping"), " </td><td> ";
print "<input type=\"radio\" name=\"cluststop\" value=\"nclust\" checked> Set manually";
print "<input type=\"radio\" name=\"cluststop\" value=\"measure\" > Use cluster stopping measures";
print "</td><td><br></td></tr>";

print "<tr><td> Use ", $q->a({-href=>"http://$host/SC-htdocs/help.html#binary"}, "Binary Vectors"), "</td><td>",  $q->checkbox(-name=>'binary',-label=>''), "</td><td><br></td></tr><tr><td colspan=3><br></td></tr>";

print "<tr><td>Load the ", $q->a({-href=>"http://$host/SC-htdocs/help.html#stop_stopfile"},"STOP file"), " (Perl Regex Format) </td><td>", $q->filefield(-name=>'stop', -size=>30),"</td><td>";
print $q->checkbox(-name=>'default_stop',-label=>''), " Use ", $q->a({-href=>"http://$host/SC-htdocs/stopfile"}, "Default"),"</td></tr>";

print "<tr><td>Lower ", $q->a({-href=>"http://$host/SC-htdocs/help.html#remove_f"},"Frequency Cutoff"), " (Integer) </td><td>", $q->textfield(-name=>'remove', -size=>5,-value=>5,-maxlength=>7), "</td><td> [Use 0 to disable this option]</td></tr>";

if($feature_type ne "uni")
{
    print "<tr><td>", $q->a({-href=>"http://$host/SC-htdocs/help.html#window_w"},"Window"), " (Integer) </td><td>" , $q->textfield(-name=>'window', -size=>5, -value=>2, -maxlength=>7), "</td><td> [Use 0 to disable this option]</td></tr>";

    print "<tr><td>", $q->a({-href=>"http://$host/SC-htdocs/help.html#stat_stat"},"Statistical Test"), " of Association </td><td>", $q->popup_menu(-name=>'stat', -labels=>{ll=>"Log-Likelihood", x2=>"Chi-Square", dice=>"Dice", phi=>"Phi", odds=>"Odds Ratio", pmi=>"Point-wise Mutual Information", tmi=>"True Mutual Information", tscore=>"T-Score", leftFisher=>"Left Fishers", rightFisher=>"Right Fishers", none=>"None"}, -values=>['ll', 'x2', 'dice', 'phi', 'odds', 'pmi', 'tmi', 'tscore', 'leftFisher', 'rightFisher', 'none'], -default=>'none'), "</td><td><br></td></tr>";

    print "<tr><td>", $q->a({-href=>"http://$host/SC-htdocs/help.html#stat_rank_n"},"Statistical Rank"), " Cutoff (Integer) </td><td>", $q->textfield(-name=>'stat_rank', size=>5, -maxlength=>7), "</td><td><br></td></tr>";

    print "<tr><td>", $q->a({-href=>"http://$host/SC-htdocs/help.html#stat_score_s"},"Statistical Score"), " Cutoff (Real Number) </td><td>", $q->textfield(-name=>'stat_score', size=>10, -maxlength=>7), "</td><td><br></td></tr></table>";

}

print "<br><table width=100% cellpadding=5>
<tr>
<td align=right>
<input type=\"reset\" value=\"Clear All\">
</td>
<td align=left>
<input type=\"submit\" value=\"Submit\">
</td>
</tr>
</table>";

print $q->hidden(-name=>'usr_dir', -value=>$usr_dir);
print $q->hidden(-name=>'prefix', -value=>$prefix);
print $q->hidden(-name=>'clustype', -value=>$clustype);

print $q->end_form;

print $q->p;

print $q->a({-href=>"http://$host/SC-htdocs/help.html"},"Help");

print "
</td>
</tr>
</table>";

print $q->end_html;

sub error
{
        my ($q,$reason) = @_;
        print $q->h1("Error"),
        $q->p($q->i($reason)),
        $q->end_html;
        exit;
}