NAME

Text::Bayon - Handling module for the clustering tool 'Bayon'

SYNOPSIS

use Text::Bayon;
use Data::Dumper;

my $bayon = Text::Bayon->new;

my $input_data = {
	document_id1 => { 
		key1_1 => "value1_1",
		key1_2 => "value1_2",
		key1_3 => "value1_3",
	},
	document_id2 => { 
		key2_1 => "value2_1",
		key2_2 => "value2_2",
		key2_3 => "value2_3",
	},
		.
		.
		.
};

my $output = $bayon->clustering($input_data);

print Dumper $output;

#$output is ... 
#{
#  cluster1 => [ document_id, $document_id], 
#  cluster2 => [ document_id, $document_id], 
#  cluster2 => [ document_id, $document_id, $document_id], 
#		.
#		.
#		.
#} 

#-----------
# give 'point' option, you can get the data below format.
#

my $options = { point => 1 };
my $output = $bayon->clustering( $input_data, $options );

print Dumper $output;

#$output is ... 
#{
#  cluster1 => [ { document_id => score}, {$document_id => score} ], 
#  cluster2 => [ { document_id => score}, {$document_id => score} ], 
#  cluster2 => [ { document_id => score}, { document_id => score } , 
#		{ document_id => score }, { document_id => score } ], 
#		.
#		.
#		.
#} 

#-----------
# set 'clvector' option true, you can get clvector data, too. 
#

my $options = { clvector => 1 };
my ( $output, $clvector ) = $bayon->clustering( $input_data, $options );

#-----------
# if you set outfiles as 3rd argument, it restricts returning data and out to files.
#

my $options = { clvector => 1 };
my $outfiles = {
  output   => 'output.tsv',
  clvector => 'centroid.tsv',
};

$bayon->clustering( $input_data, $options, $outfiles );

DESCRIPTION

Text::Bayon is handling module for the clustering tool 'Bayon'.

Bayon is a simple and fast hard-clustering tool.

Bayon supports Repeated Bisection clustering and K-means clustering.

I think Bayon is an excellent software for Data-Mining-Peoples!

If you want to know and install Bayon, see the Bayon's maual. ( http://code.google.com/p/bayon/ )

METHODS

new(%conf)

%conf = (
    bayon_path => '/usr/local/bin/bayon', # optional
    dry_run    => 1, # optional
);

clustering( $input, $options, $outfiles )

$input = I< hashref | filename | filehandle >; # required

$options = {
    number        => I<num>,           # optional
    limit         => I<num>,           # optional, default 1.5 
    point         => 1,                # optional
    clvector      => I< 1 | 0 >,       # optional
    clvector_size => I<num>,           # optional
    method        => I< rb | kmeans >, # optional, default 'rb'
    seed          => num ,             # optional
};

$outfiles = {
    output   => I< filename >, # optional
    clvector => I< filename >, # optional
}

classify($args)

$input = I< hashref | filename | filehandle >; # required

$options = {
    classify      => I< filename >, # required
    inv_keys      => I< num >,      # optional, default 20
    inv_size      => I< num >,      # optional, default 100
    classify_size => I< num >,      # optional, default 20
};

$outfiles = {
    output   => I< filename >, # optional
}

AUTHOR

Takeshi Miki <t.miki@nttr.co.jp>

( Bayon's AUTHOR is Mizuki Fujisawa <fujisawa@bayon.cc> )

LICENSE

This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

SEE ALSO

http://code.google.com/p/bayon/