NAME
Text::Bayon - Handling module for the clustering tool 'Bayon'
SYNOPSIS
use Text::Bayon;
use Data::Dumper;
my $bayon = Text::Bayon->new;
my $input_data = {
document_id1 => {
key1_1 => "value1_1",
key1_2 => "value1_2",
key1_3 => "value1_3",
},
document_id2 => {
key2_1 => "value2_1",
key2_2 => "value2_2",
key2_3 => "value2_3",
},
.
.
.
};
my $output = $bayon->clustering($input_data);
print Dumper $output;
#$output is ...
#{
# cluster1 => [ document_id, $document_id],
# cluster2 => [ document_id, $document_id],
# cluster2 => [ document_id, $document_id, $document_id],
# .
# .
# .
#}
#-----------
# give 'point' option, you can get the data below format.
#
my $options = { point => 1 };
my $output = $bayon->clustering( $input_data, $options );
print Dumper $output;
#$output is ...
#{
# cluster1 => [ { document_id => score}, {$document_id => score} ],
# cluster2 => [ { document_id => score}, {$document_id => score} ],
# cluster2 => [ { document_id => score}, { document_id => score } ,
# { document_id => score }, { document_id => score } ],
# .
# .
# .
#}
#-----------
# set 'clvector' option true, you can get clvector data, too.
#
my $options = { clvector => 1 };
my ( $output, $clvector ) = $bayon->clustering( $input_data, $options );
#-----------
# if you set outfiles as 3rd argument, it restricts returning data and out to files.
#
my $options = { clvector => 1 };
my $outfiles = {
output => 'output.tsv',
clvector => 'centroid.tsv',
};
$bayon->clustering( $input_data, $options, $outfiles );
DESCRIPTION
Text::Bayon is handling module for the clustering tool 'Bayon'.
Bayon is a simple and fast hard-clustering tool.
Bayon supports Repeated Bisection clustering and K-means clustering.
I think Bayon is an excellent software for Data-Mining-Peoples!
If you want to know and install Bayon, see the Bayon's maual. ( http://code.google.com/p/bayon/ )
METHODS
new(%conf)
%conf = (
bayon_path => '/usr/local/bin/bayon', # optional
dry_run => 1, # optional
);
clustering( $input, $options, $outfiles )
$input = I< hashref | filename | filehandle >; # required
$options = {
number => I<num>, # optional
limit => I<num>, # optional, default 1.5
point => 1, # optional
clvector => I< 1 | 0 >, # optional
clvector_size => I<num>, # optional
method => I< rb | kmeans >, # optional, default 'rb'
seed => num , # optional
};
$outfiles = {
output => I< filename >, # optional
clvector => I< filename >, # optional
}
classify($args)
$input = I< hashref | filename | filehandle >; # required
$options = {
classify => I< filename >, # required
inv_keys => I< num >, # optional, default 20
inv_size => I< num >, # optional, default 100
classify_size => I< num >, # optional, default 20
};
$outfiles = {
output => I< filename >, # optional
}
AUTHOR
Takeshi Miki <t.miki@nttr.co.jp>
( Bayon's AUTHOR is Mizuki Fujisawa <fujisawa@bayon.cc> )
LICENSE
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.