#!/bin/csh

# ------------------------------------------------------------------
# this script shows the use of wrapper program discriminate.pl when
# used in the senseclusters native mode or latent semantic analysis
# mode for carrying out word or feature clustering
# ------------------------------------------------------------------

# Originally written by Amruta Purandare, 2002-2004
# Modified by Ted Pedersen, July 2006

# the script runs several experiments that shows the use of :

# unigrams, bigram, co-occurrence, and target co-occurrence features 

# first and second order context vectors

# partitional and agglomerative clustering in vector and similarity spaces

# dimensionality reduction via SVD

# cluster stopping using pk1, pk2, pk3 and gap measures

# senseclusters native mode versus latent semantic analysis

set svd_params = "--svd"
set lsa_params = "--lsa"

set statistic = "--stat ll --stat_score 3.841"

set remove = 5
set window = 2

set expr_path = `pwd`

cd LexSample

    set lexelts = `ls`
    foreach lexelt ($lexelts)
	cd $lexelt

	    echo " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% "
            echo "                    PROCESSING $lexelt"
	    echo " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% "

	    mkdir $lexelt
	    mv $lexelt-t* $lexelt

	    # using bigram and co-occurrence features
	    foreach feature (uni bi co tco)
	        # using order1 and order2 vectors
	        foreach context (o1 o2)
		    # using vector and similarity spaces
		    foreach space (vector similarity)
		        # using agglomerative and
 		        # partitional clustering
		        foreach clmethod (agglo rbr)
			   # cluster stopping
			   foreach cluststop (pk1 pk2 pk3 gap)
	                     # svd 
	                     foreach svd (on off)
	                       # lsa 
                               foreach lsa (on off) 


			    echo " ******************************************************** "
			    echo "Running $lexelt with following parameters -"
			    echo "--feature = $feature"
			    echo "--context = $context"
			    echo "--space = $space"
			    echo "--clmethod = $clmethod"
			    echo "--cluststop = $cluststop"
			    echo "--svd = $svd"
			    echo "--lsa = $lsa"

			    cp -r $lexelt $lexelt.$feature.$context.$space.$clmethod.$cluststop.$svd.$lsa
			    cd $lexelt.$feature.$context.$space.$clmethod.$cluststop.$svd.$lsa

				if ($svd == "on") then 
				    set svd_string = "$svd_params"
                                else
                                    set svd_string = " "
                                endif

				if ($lsa == "on") then 
				    set lsa_string = "$lsa_params"
                                else
                                    set lsa_string = " "
                                endif

				echo " -------------------------------------------------------- "
				echo " Results in Directory: $lexelt.$feature.$context.$space.$clmethod.$cluststop.$svd.$lsa"
				echo " -------------------------------------------------------- "

			        echo "discriminate.pl --showargs --verbose --wordclust $lsa_string --space $space --clmethod $clmethod --token $expr_path/Regexs/token.regex --prefix $lexelt --context $context $svd_string --feature $feature --remove $remove --window $window --stop $expr_path/Regexs/stoplist-nsp.regex --cluststop $cluststop $statistic $lexelt-test.xml"

			        discriminate.pl --showargs --verbose --wordclust $lsa_string --space $space --clmethod $clmethod --token $expr_path/Regexs/token.regex --prefix $lexelt --context $context $svd_string --feature $feature --remove $remove --window $window --stop $expr_path/Regexs/stoplist-nsp.regex --cluststop $cluststop $statistic $lexelt-test.xml

 			        echo " ******************************************************** "

			        cd ..
                               end # end of lsa
	                     end # end of svd
	                   end # end of cluststop
		        end # end of clmethod loop
		    end # end of space loop
	        end # end of context loop
	    end # end of feature loop
        cd ..
    end
cd ..