plan
tests
=> 26;
my
$data_dir
= path(
$Bin
,
'data'
);
test_constructor();
test_data();
test_dataset_from_file();
test_private_data();
sub
test_constructor {
throws_ok {
Algorithm::AM::DataSet->new();
}
qr/Failed to provide 'cardinality' parameter/
,
q<dies without 'cardinality' parameter>
;
throws_ok {
Algorithm::AM::DataSet->new(
cardinality
=> 3,
foo
=>
'bar'
,
baz
=>
'buff'
);
}
qr/Unknown parameters in DataSet constructor: baz, foo/
,
'dies with unknown parameters'
;
lives_ok {
Algorithm::AM::DataSet->new(
cardinality
=> 3,
);
}
'constructor lives with normal input'
;
my
$dataset
= Algorithm::AM::DataSet->new(
cardinality
=> 3);
is(
$dataset
->cardinality, 3,
'cardinality set by constructor'
);
return
;
}
sub
test_data {
my
$dataset
= Algorithm::AM::DataSet->new(
cardinality
=> 3);
is(
$dataset
->size, 0,
'new data set has 0 items'
);
is(
$dataset
->num_classes, 0,
'new data set has 0 classes'
);
$dataset
->add_item(
features
=> [
'a'
,
'b'
,
'c'
],
class
=>
'b'
,
comment
=>
'stuff'
);
is(
$dataset
->size, 1,
'add_item adds 1 item to data set'
);
is(
$dataset
->num_classes, 1,
'data set has 1 class'
);
$dataset
->add_item(
features
=> [
'a'
,
'b'
,
'd'
],
class
=>
'c'
,
comment
=>
'stuff'
);
is(
$dataset
->num_classes, 2,
'data set has 2 classes'
);
is(
$dataset
->get_item(1)->comment,
'stuff'
,
'get_item'
);
throws_ok {
$dataset
->add_item(
features
=> [
'3'
,
'1'
],
class
=>
'c'
,
comment
=>
'comment'
);
}
qr/Expected 3 features, but found 2 in 3 1 \(comment\)/
,
'add_item fails with wrong number of features'
;
throws_ok {
$dataset
->add_item();
}
qr/Must provide 'features' parameter of type array ref.*DataSet.t/
,
'add_item fails with missing features parameter'
;
return
;
}
sub
test_dataset_from_file {
subtest
'read nocommas data set'
=>
sub
{
plan
tests
=> 5;
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'chapter_3_no_commas.txt'
),
format
=>
'nocommas'
);
is(
$dataset
->cardinality, 3,
'cardinality'
);
is(
$dataset
->size, 5,
'size'
);
my
$item
=
$dataset
->get_item(0);
is(
$item
->class,
'e'
,
'item class'
);
is_deeply(
$item
->features, [
'3'
,
'1'
,
'0'
],
'item features'
);
is(
$item
->comment,
'myFirstCommentHere'
);
};
subtest
'read commas data set'
=>
sub
{
plan
tests
=> 5;
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'chapter_3_commas.txt'
),
format
=>
'commas'
);
is(
$dataset
->cardinality, 3,
'cardinality'
);
is(
$dataset
->size, 5,
'size'
);
my
$item
=
$dataset
->get_item(0);
is(
$item
->class,
'e'
,
'item class'
);
is_deeply(
$item
->features, [
'3'
,
'1'
,
'0'
],
'item features'
);
is(
$item
->comment,
'myFirstCommentHere'
);
};
throws_ok {
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'chapter_3_commas.txt'
),
);
}
qr/Failed to provide 'format' parameter/
,
'fail with missing format parameter'
;
throws_ok {
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'chapter_3_commas.txt'
),
format
=>
'buh'
);
}
qr/Unknown value buh for format parameter \(should be 'commas' or 'nocommas'\)/
,
'fail with incorrect format parameter'
;
throws_ok {
my
$dataset
= dataset_from_file(
format
=>
'commas'
);
}
qr/Failed to provide 'path' parameter/
,
'fail with missing path parameter'
;
throws_ok {
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'nonexistent'
),
format
=>
'commas'
);
}
qr/Could not find file .*nonexistent/
,
'fail with non-existent Path'
;
throws_ok {
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'bad_data_line.txt'
),
format
=>
'nocommas'
);
}
qr/Couldn't read data at line 2 in .*bad_data_line/
,
'fail with malformed data file'
;
subtest
'data set with default unknown/null labels'
=>
sub
{
plan
tests
=> 3;
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'no_labels_default.txt'
),
format
=>
'commas'
);
is(
$dataset
->size, 2,
'size'
);
my
$item
=
$dataset
->get_item(0);
is(
$item
->class,
undef
,
'class is unknown'
);
is_deeply(
$item
->features, [
'3'
,
'1'
,
''
],
'third feature is unknown'
)
};
subtest
'data set with custom unknown/null labels'
=>
sub
{
plan
tests
=> 3;
my
$dataset
= dataset_from_file(
path
=> path(
$data_dir
,
'no_labels_custom.txt'
),
format
=>
'commas'
,
unknown
=>
'?'
,
null
=>
'NUL'
,
);
is(
$dataset
->size, 2,
'size'
);
my
$item
=
$dataset
->get_item(0);
is(
$item
->class,
undef
,
'class is unknown'
);
is_deeply(
$item
->features, [
'3'
,
'1'
,
''
],
'third feature is unknown'
)
};
return
;
}
sub
test_private_data {
my
$dataset
= Algorithm::AM::DataSet->new(
cardinality
=> 3);
is_deeply(
$dataset
->_data_classes, [],
"empty data set has empty classes"
);
my
@data
= chapter_3_data();
$data
[1][2] =
''
;
for
my
$datum
(
@data
){
$dataset
->add_item(
features
=>
$datum
->[0],
class
=>
$datum
->[1],
comment
=>
$datum
->[2]);
}
is_deeply(
$dataset
->_data_classes, [
qw(1 2 2 2 2)
],
"list of classes for data set"
);
is(
$dataset
->_index_for_class(
'r'
), 2,
'_index_for_class'
);
is(
$dataset
->_class_for_index(2),
'r'
,
'_class_for_index'
);
return
;
}