{
$Treex::Tool::Parser::MSTperl::FeaturesControl::VERSION
=
'0.11319'
;
}
use
5.010;
has
'config'
=> (
isa
=>
'Treex::Tool::Parser::MSTperl::Config'
,
is
=>
'ro'
,
required
=>
'1'
,
weak_ref
=>
'1'
,
);
has
'feature_count'
=> (
is
=>
'rw'
,
isa
=>
'Int'
,
);
has
'feature_codes_from_config'
=> (
is
=>
'rw'
,
isa
=>
'ArrayRef[Str]'
,
default
=>
sub
{ [] },
);
has
'feature_codes'
=> (
is
=>
'rw'
,
isa
=>
'ArrayRef[Str]'
,
default
=>
sub
{ [] },
);
has
'feature_codes_hash'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Str]'
,
default
=>
sub
{ {} },
);
has
'feature_indexes'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Str]'
,
default
=>
sub
{ {} },
);
has
'feature_simple_features_indexes'
=> (
is
=>
'rw'
,
isa
=>
'ArrayRef[ArrayRef[Int]]'
,
default
=>
sub
{ [] },
);
has
'array_features'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Int]'
,
default
=>
sub
{ {} },
);
has
'dynamic_features'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Int]'
,
default
=>
sub
{ {} },
);
has
'simple_feature_count'
=> (
is
=>
'rw'
,
isa
=>
'Int'
,
);
has
'simple_feature_codes'
=> (
is
=>
'rw'
,
isa
=>
'ArrayRef[Str]'
,
default
=>
sub
{ [] },
);
has
'simple_feature_codes_hash'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Str]'
,
default
=>
sub
{ {} },
);
has
'simple_feature_indexes'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Str]'
,
default
=>
sub
{ {} },
);
has
'simple_feature_subs'
=> (
is
=>
'rw'
,
isa
=>
'ArrayRef'
,
default
=>
sub
{ [] },
);
has
'simple_feature_sub_arguments'
=> (
is
=>
'rw'
,
isa
=>
'ArrayRef'
,
default
=>
sub
{ [] },
);
has
'array_simple_features'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Int]'
,
default
=>
sub
{ {} },
);
has
'dynamic_simple_features'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[Int]'
,
default
=>
sub
{ {} },
);
has
'use_edge_features_cache'
=> (
is
=>
'ro'
,
isa
=>
'Bool'
,
default
=>
'0'
,
);
has
'edge_features_cache'
=> (
is
=>
'rw'
,
isa
=>
'HashRef[ArrayRef[Str]]'
,
default
=>
sub
{ {} },
);
has
pmi_model
=> (
is
=>
'rw'
,
isa
=>
'Maybe[Treex::Tool::Parser::MSTperl::ModelAdditional]'
,
default
=>
undef
,
);
has
cprob_model
=> (
is
=>
'rw'
,
isa
=>
'Maybe[Treex::Tool::Parser::MSTperl::ModelAdditional]'
,
default
=>
undef
,
);
sub
BUILD {
my
(
$self
) =
@_
;
foreach
my
$feature
( @{
$self
->feature_codes_from_config } ) {
$self
->set_feature(
$feature
);
}
$self
->feature_count(
scalar
( @{
$self
->feature_codes } ) );
$self
->simple_feature_count(
scalar
( @{
$self
->simple_feature_codes } ) );
return
;
}
sub
set_feature {
my
(
$self
,
$feature_code
) =
@_
;
if
(
$self
->feature_codes_hash->{
$feature_code
} ) {
warn
"Feature '$feature_code' is defined more than once; "
.
"disregarding its later definitions.\n"
;
}
else
{
my
$isArrayFeature
= 0;
my
$isDynamicFeature
= 0;
my
@simple_features_indexes
;
my
%simple_features_hash
;
foreach
my
$simple_feature_code
(
split
( /\|/,
$feature_code
) ) {
if
(
$simple_features_hash
{
$simple_feature_code
} ) {
warn
"Simple feature '$simple_feature_code' "
.
"is used more than once in '$feature_code'; "
.
"disregarding its later uses.\n"
;
next
;
}
if
( !
$self
->simple_feature_codes_hash->{
$simple_feature_code
} ) {
$self
->set_simple_feature(
$simple_feature_code
);
}
my
$simple_feature_index
=
$self
->simple_feature_indexes->{
$simple_feature_code
};
$simple_features_hash
{
$simple_feature_code
} = 1;
if
(
$self
->array_simple_features->{
$simple_feature_index
} ) {
$isArrayFeature
= 1;
}
if
(
$self
->dynamic_simple_features->{
$simple_feature_index
} ) {
$isDynamicFeature
= 1;
}
push
@simple_features_indexes
,
$simple_feature_index
;
}
my
$feature_index
=
scalar
( @{
$self
->feature_codes } );
$self
->feature_codes_hash->{
$feature_code
} = 1;
$self
->feature_indexes->{
$feature_code
} =
$feature_index
;
push
@{
$self
->feature_codes },
$feature_code
;
push
@{
$self
->feature_simple_features_indexes },
[
@simple_features_indexes
];
if
(
$isArrayFeature
) {
$self
->array_features->{
$feature_index
} = 1;
}
if
(
$isDynamicFeature
) {
$self
->dynamic_features->{
$feature_index
} = 1;
}
}
return
;
}
sub
set_simple_feature {
my
(
$self
,
$simple_feature_code
) =
@_
;
my
$simple_feature_index
=
scalar
@{
$self
->simple_feature_codes };
my
$simple_feature_sub
;
my
$simple_feature_field
;
if
(
$simple_feature_code
=~ /^([a-zA-Z0-9_]+)$/ ) {
if
(
$simple_feature_code
=~ /^([a-z0-9_]+)$/ ) {
$simple_feature_sub
= \&{feature_child};
$simple_feature_field
= $1;
}
elsif
(
$simple_feature_code
=~ /^([A-Z0-9_]+)$/ ) {
$simple_feature_sub
= \&{feature_parent};
$simple_feature_field
=
lc
($1);
}
else
{
die
"Incorrect simple feature format '$simple_feature_code'. "
.
"Use lowercase ("
.
lc
(
$simple_feature_code
) .
") for child node and UPPERCASE ("
.
uc
(
$simple_feature_code
) .
") for parent node.\n"
;
}
}
elsif
(
$simple_feature_code
=~ /^([12gGlr])\.([a-z0-9_]+)$/ ) {
$simple_feature_field
= $2;
if
( $1 eq
'1'
) {
$simple_feature_sub
= \&{feature_first};
}
elsif
( $1 eq
'2'
) {
$simple_feature_sub
= \&{feature_second};
}
elsif
( $1 eq
'g'
) {
$simple_feature_sub
= \&{feature_grandchildren};
}
elsif
( $1 eq
'G'
) {
$simple_feature_sub
= \&{feature_grandparent};
}
elsif
( $1 eq
'l'
) {
$simple_feature_sub
= \&{feature_left_sibling};
}
elsif
( $1 eq
'r'
) {
$simple_feature_sub
= \&{feature_right_sibling};
}
else
{
croak
"Assertion failed!"
;
}
}
elsif
(
$simple_feature_code
=~ /^([12gGlr\.a-z]+|[A-Z]+)\([-a-z0-9_,]*\)$/
)
{
my
$function_name
= $1;
$simple_feature_sub
=
$self
->get_simple_feature_sub_reference(
$function_name
);
if
(
$function_name
eq
'between'
||
$function_name
eq
'foreach'
||
substr
(
$function_name
, 0, 2 ) eq
'g.'
)
{
$self
->array_simple_features->{
$simple_feature_index
} = 1;
}
if
(
$function_name
eq
'LABEL'
||
$function_name
eq
'l.label'
||
$function_name
eq
'prevlabel'
||
$function_name
eq
'G.label'
||
$function_name
eq
'g.label'
)
{
$self
->dynamic_simple_features->{
$simple_feature_index
} = 1;
}
if
(
$simple_feature_code
=~ /
$function_name
\(\)$/ ) {
$simple_feature_field
= [];
}
elsif
(
$simple_feature_code
=~ /
$function_name
\(([-a-z0-9_]+)\)$/ ) {
$simple_feature_field
= $1;
}
elsif
(
$simple_feature_code
=~ /
$function_name
\(([-a-z0-9_,]+)\)$/
)
{
my
@fields
=
split
/,/, $1;
$simple_feature_field
= \
@fields
;
}
else
{
die
"Incorrect simple function feature format "
.
"'$simple_feature_code'.\n"
;
}
}
else
{
die
"Incorrect simple feature format '$simple_feature_code'.\n"
;
}
my
$simple_feature_sub_arguments
=
$self
->config->field_name2index(
$simple_feature_field
);
$self
->simple_feature_codes_hash->{
$simple_feature_code
} = 1;
$self
->simple_feature_indexes->{
$simple_feature_code
} =
$simple_feature_index
;
push
@{
$self
->simple_feature_codes },
$simple_feature_code
;
push
@{
$self
->simple_feature_subs },
$simple_feature_sub
;
push
@{
$self
->simple_feature_sub_arguments },
$simple_feature_sub_arguments
;
return
;
}
sub
get_all_features {
my
(
$self
,
$edge
,
$only_dynamic_features
) =
@_
;
my
$edge_signature
;
if
(
$self
->use_edge_features_cache ) {
$edge_signature
=
$edge
->signature();
my
$cache_features
=
$self
->edge_features_cache->{
$edge_signature
};
if
(
$cache_features
) {
return
$cache_features
;
}
}
my
$simple_feature_values
=
$self
->get_simple_feature_values_array(
$edge
);
my
@features
;
my
$features_count
=
$self
->feature_count;
for
(
my
$feature_index
= 0;
$feature_index
<
$features_count
;
$feature_index
++
)
{
if
(
$only_dynamic_features
&&
$only_dynamic_features
== 1
&& !
$self
->dynamic_features->{
$feature_index
}
)
{
next
;
}
elsif
(
$only_dynamic_features
&&
$only_dynamic_features
== -1
&&
$self
->dynamic_features->{
$feature_index
}
)
{
next
;
}
else
{
my
$feature_value
=
$self
->get_feature_value(
$feature_index
,
$simple_feature_values
);
if
(
$self
->array_features->{
$feature_index
} ) {
foreach
my
$value
( @{
$feature_value
} ) {
push
@features
,
"$feature_index:$value"
;
}
}
else
{
if
(
$feature_value
ne
''
) {
push
@features
,
"$feature_index:$feature_value"
;
}
}
}
}
if
(
$self
->use_edge_features_cache ) {
$self
->edge_features_cache->{
$edge_signature
} = \
@features
;
}
return
\
@features
;
}
sub
get_feature_value {
my
(
$self
,
$feature_index
,
$simple_feature_values
) =
@_
;
my
$simple_features_indexes
=
$self
->feature_simple_features_indexes->[
$feature_index
];
if
(
$self
->array_features->{
$feature_index
} ) {
my
$feature_value
=
$self
->get_array_feature_value(
$simple_features_indexes
,
$simple_feature_values
, 0
);
if
(
$feature_value
) {
return
$feature_value
;
}
else
{
return
[];
}
}
else
{
my
@values
;
foreach
my
$simple_feature_index
( @{
$simple_features_indexes
} ) {
my
$value
=
$simple_feature_values
->[
$simple_feature_index
];
if
(
defined
$value
&&
$value
ne
''
) {
push
@values
,
$value
;
}
else
{
return
''
;
}
}
my
$feature_value
=
join
'|'
,
@values
;
return
$feature_value
;
}
}
sub
get_array_feature_value {
my
(
$self
,
$simple_features_indexes
,
$simple_feature_values
,
$start_from
) =
@_
;
my
$simple_feature_index
=
$simple_features_indexes
->[
$start_from
];
my
$value
=
$simple_feature_values
->[
$simple_feature_index
];
if
( !
$self
->array_simple_features->{
$simple_feature_index
} ) {
$value
= [ (
$value
) ];
}
my
$simple_features_count
=
scalar
@{
$simple_features_indexes
};
if
(
$start_from
<
$simple_features_count
- 1 ) {
my
$append
=
$self
->get_array_feature_value(
$simple_features_indexes
,
$simple_feature_values
,
$start_from
+ 1
);
my
@values
;
foreach
my
$my_value
( @{
$value
} ) {
foreach
my
$append_value
( @{
$append
} ) {
my
$add_value
=
"$my_value|$append_value"
;
push
@values
,
$add_value
;
}
}
return
[
@values
];
}
else
{
return
$value
;
}
}
sub
get_simple_feature_values_array {
my
(
$self
,
$edge
) =
@_
;
my
@simple_feature_values
;
my
$simple_feature_count
=
$self
->simple_feature_count;
for
(
my
$simple_feature_index
= 0;
$simple_feature_index
<
$simple_feature_count
;
$simple_feature_index
++
)
{
my
$sub
=
$self
->simple_feature_subs->[
$simple_feature_index
];
my
$arguments
=
$self
->simple_feature_sub_arguments->[
$simple_feature_index
];
my
$value
=
&$sub
(
$self
,
$edge
,
$arguments
);
push
@simple_feature_values
,
$value
;
}
return
[
@simple_feature_values
];
}
my
%simple_feature_sub_references
= (
'LABEL'
=> \&{feature_parent_label},
'prevlabel'
=> \&{feature_previous_label},
'l.label'
=> \&{feature_previous_label},
'G.label'
=> \&{feature_grandparent_label},
'g.label'
=> \&{feature_grandchildren_label},
'distance'
=> \&{feature_distance},
'G.distance'
=> \&{feature_grandparent_distance},
'attdir'
=> \&{feature_attachement_direction},
'G.attdir'
=> \&{feature_grandparent_attachement_direction},
'preceding'
=> \&{feature_preceding_child},
'PRECEDING'
=> \&{feature_preceding_parent},
'1.preceding'
=> \&{feature_preceding_first},
'2.preceding'
=> \&{feature_preceding_second},
'following'
=> \&{feature_following_child},
'FOLLOWING'
=> \&{feature_following_parent},
'1.following'
=> \&{feature_following_first},
'2.following'
=> \&{feature_following_second},
'between'
=> \&{feature_between},
'foreach'
=> \&{feature_foreach},
'equals'
=> \&{feature_equals},
'equalspc'
=> \&{feature_equals_pc},
'equalspcat'
=> \&{feature_equals_pc_at},
'arrayat'
=> \&{feature_array_at_child},
'ARRAYAT'
=> \&{feature_array_at_parent},
'arrayatcp'
=> \&{feature_array_at_cp},
'isfirst'
=> \&{feature_child_is_first_in_sentence},
'ISFIRST'
=> \&{feature_parent_is_first_in_sentence},
'islast'
=> \&{feature_child_is_last_in_sentence},
'ISLAST'
=> \&{feature_parent_is_last_in_sentence},
'isfirstchild'
=> \&{feature_child_is_first_child},
'islastchild'
=> \&{feature_child_is_last_child},
'islastleftchild'
=> \&{feature_child_is_last_left_child},
'isfirstrightchild'
=> \&{feature_child_is_first_right_child},
'childno'
=> \&{feature_number_of_childs_children},
'CHILDNO'
=> \&{feature_number_of_parents_children},
'substr'
=> \&{feature_substr_child},
'SUBSTR'
=> \&{feature_substr_parent},
'pmi'
=> \&{feature_pmi},
'pmibucketed'
=> \&{feature_pmi_bucketed},
'pmirounded'
=> \&{feature_pmi_rounded},
'pmid'
=> \&{feature_pmi_d},
'cprob'
=> \&{feature_cprob},
'cprobbucketed'
=> \&{feature_cprob_bucketed},
'cprobrounded'
=> \&{feature_cprob_rounded},
);
sub
get_simple_feature_sub_reference {
my
(
$self
,
$simple_feature_function
) =
@_
;
if
(
$simple_feature_sub_references
{
$simple_feature_function
} ) {
return
$simple_feature_sub_references
{
$simple_feature_function
};
}
else
{
croak
"Unknown feature function '$simple_feature_function'!"
;
}
}
sub
get_grandparent {
my
(
$self
,
$edge
) =
@_
;
return
(
$edge
->parent )->parent;
}
sub
feature_distance {
my
(
$self
,
$edge
) =
@_
;
return
$self
->feature_distance_generic(
$edge
->parent,
$edge
->child );
}
sub
feature_grandparent_distance {
my
(
$self
,
$edge
) =
@_
;
my
$grandparent
=
$self
->get_grandparent(
$edge
);
if
(
defined
$grandparent
) {
return
$self
->feature_distance_generic(
$edge
->parent,
$edge
->child );
}
else
{
return
'#novalue#'
;
}
}
sub
feature_distance_generic {
my
(
$self
,
$node1
,
$node2
) =
@_
;
my
$distance
=
$node1
->
ord
-
$node2
->
ord
;
my
$bucket
=
$self
->config->distance2bucket->{
$distance
};
if
(
defined
$bucket
) {
return
$bucket
;
}
else
{
if
(
$distance
<=
$self
->config->minBucket ) {
return
$self
->config->minBucket;
}
else
{
return
$self
->config->maxBucket;
}
}
}
sub
feature_attachement_direction {
my
(
$self
,
$edge
) =
@_
;
return
$self
->feature_attachement_direction_generic(
$edge
->parent,
$edge
->child
);
}
sub
feature_grandparent_attachement_direction {
my
(
$self
,
$edge
) =
@_
;
my
$grandparent
=
$self
->get_grandparent(
$edge
);
if
(
defined
$grandparent
) {
return
$self
->feature_attachement_direction_generic(
$edge
->parent,
$edge
->child
);
}
else
{
return
'#novalue#'
;
}
}
sub
feature_attachement_direction_generic {
my
(
$self
,
$node1
,
$node2
) =
@_
;
if
(
$node1
->
ord
<
$node2
->
ord
) {
return
-1;
}
else
{
return
1;
}
}
sub
feature_child {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
(
$edge
->child->fields->[
$field_index
] );
}
sub
feature_parent {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
(
$edge
->parent->fields->[
$field_index
] );
}
sub
feature_grandparent {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$grandparent
=
$self
->get_grandparent(
$edge
);
if
(
defined
$grandparent
) {
return
(
$grandparent
->fields->[
$field_index
] );
}
else
{
return
'#novalue#'
;
}
}
sub
feature_parent_label {
my
(
$self
,
$edge
) =
@_
;
return
(
$edge
->parent->label );
}
sub
feature_previous_label {
my
(
$self
,
$edge
) =
@_
;
my
$left_sibling
=
$self
->get_left_sibling(
$edge
);
if
(
defined
$left_sibling
) {
return
(
$left_sibling
->child->label );
}
else
{
return
$self
->config->SEQUENCE_BOUNDARY_LABEL;
}
}
sub
feature_grandparent_label {
my
(
$self
,
$edge
) =
@_
;
my
$grandparent
=
$self
->get_grandparent(
$edge
);
if
(
defined
$grandparent
) {
return
(
$grandparent
->label );
}
else
{
return
'#novalue#'
;
}
}
sub
feature_first {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
(
$edge
->first->fields->[
$field_index
] );
}
sub
feature_second {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
(
$edge
->second->fields->[
$field_index
] );
}
sub
feature_left_sibling {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$left_sibling
=
$self
->get_left_sibling(
$edge
);
if
(
defined
$left_sibling
) {
return
(
$left_sibling
->child->fields->[
$field_index
] );
}
else
{
return
'#start#'
;
}
}
sub
feature_right_sibling {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$right_sibling
=
$self
->get_right_sibling(
$edge
);
if
(
defined
$right_sibling
) {
return
(
$right_sibling
->child->fields->[
$field_index
] );
}
else
{
return
'#end#'
;
}
}
sub
get_left_sibling {
my
(
$self
,
$edge
) =
@_
;
my
$siblings
=
$edge
->parent->children;
my
$is_first
= (
$siblings
->[0]->child->
ord
==
$edge
->child->
ord
);
if
(
$is_first
) {
return
;
}
else
{
my
$my_index
= 1;
while
(
$siblings
->[
$my_index
]->child->
ord
!=
$edge
->child->
ord
) {
$my_index
++;
}
return
(
$siblings
->[
$my_index
- 1 ] );
}
}
sub
get_right_sibling {
my
(
$self
,
$edge
) =
@_
;
my
$siblings
=
$edge
->parent->children;
my
$last_sibling_index
=
scalar
(
@$siblings
) - 1;
my
$is_last
= (
$siblings
->[
$last_sibling_index
]->child->
ord
==
$edge
->child->
ord
);
if
(
$is_last
) {
return
;
}
else
{
my
$my_index
=
$last_sibling_index
- 1;
while
(
$siblings
->[
$my_index
]->child->
ord
!=
$edge
->child->
ord
) {
$my_index
--;
}
return
$siblings
->[
$my_index
+ 1 ];
}
}
sub
feature_preceding_child {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->child->
ord
- 1 );
if
(
$node
) {
if
(
$edge
->parent->
ord
==
$node
->
ord
) {
return
'#mid#'
;
}
else
{
return
$node
->fields->[
$field_index
];
}
}
else
{
return
'#start#'
;
}
}
sub
feature_preceding_parent {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->parent->
ord
- 1 );
if
(
$node
) {
if
(
$edge
->child->
ord
==
$node
->
ord
) {
return
'#mid#'
;
}
else
{
return
$node
->fields->[
$field_index
];
}
}
else
{
return
'#start#'
;
}
}
sub
feature_following_child {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->child->
ord
+ 1 );
if
(
$node
) {
if
(
$edge
->parent->
ord
==
$node
->
ord
) {
return
'#mid#'
;
}
else
{
return
$node
->fields->[
$field_index
];
}
}
else
{
return
'#end#'
;
}
}
sub
feature_following_parent {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->parent->
ord
+ 1 );
if
(
$node
) {
if
(
$edge
->child->
ord
==
$node
->
ord
) {
return
'#mid#'
;
}
else
{
return
$node
->fields->[
$field_index
];
}
}
else
{
return
'#end#'
;
}
}
sub
feature_preceding_first {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->first->
ord
- 1 );
if
(
$node
) {
return
$node
->fields->[
$field_index
];
}
else
{
return
'#start#'
;
}
}
sub
feature_preceding_second {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->second->
ord
- 1 );
if
(
$node
) {
if
(
$edge
->first->
ord
==
$node
->
ord
) {
return
'#mid#'
;
}
else
{
return
$node
->fields->[
$field_index
];
}
}
else
{
return
'#start#'
;
}
}
sub
feature_following_first {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->first->
ord
+ 1 );
if
(
$node
) {
if
(
$edge
->second->
ord
==
$node
->
ord
) {
return
'#mid#'
;
}
else
{
return
$node
->fields->[
$field_index
];
}
}
else
{
return
'#end#'
;
}
}
sub
feature_following_second {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$node
=
$edge
->sentence->getNodeByOrd(
$edge
->second->
ord
+ 1 );
if
(
$node
) {
return
$node
->fields->[
$field_index
];
}
else
{
return
'#end#'
;
}
}
sub
feature_between {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
@values
;
my
$from
;
my
$to
;
if
(
$edge
->parent->
ord
<
$edge
->child->
ord
) {
$from
=
$edge
->parent->
ord
+ 1;
$to
=
$edge
->child->
ord
- 1;
}
else
{
$from
=
$edge
->child->
ord
+ 1;
$to
=
$edge
->parent->
ord
- 1;
}
for
(
my
$ord
=
$from
;
$ord
<=
$to
;
$ord
++ ) {
push
@values
,
$edge
->sentence->getNodeByOrd(
$ord
)->fields->[
$field_index
];
}
return
[
@values
];
}
sub
feature_foreach {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
$values
=
$edge
->child->fields->[
$field_index
];
if
(
$values
) {
my
@values
=
split
/ /,
$edge
->child->fields->[
$field_index
];
return
[
@values
];
}
else
{
return
''
;
}
}
sub
feature_equals {
my
(
$self
,
$edge
,
$field_indexes
) =
@_
;
if
( @{
$field_indexes
} == 2 ) {
my
(
$field_index_1
,
$field_index_2
) = @{
$field_indexes
};
my
$values_1
=
$edge
->child->fields->[
$field_index_1
];
my
$values_2
=
$edge
->child->fields->[
$field_index_2
];
if
(
defined
$values_1
&&
$values_1
ne
''
&&
defined
$values_2
&&
$values_2
ne
''
)
{
my
$result
= 0;
my
@values_1
=
split
/ /,
$values_1
;
my
@values_2
=
split
/ /,
$values_2
;
foreach
my
$value_1
(
@values_1
) {
foreach
my
$value_2
(
@values_2
) {
if
(
$value_1
eq
$value_2
) {
$result
= 1;
}
}
}
return
$result
;
}
else
{
return
-1;
}
}
else
{
croak
"equals() takes TWO arguments!!!"
;
}
}
sub
feature_equals_pc {
my
(
$self
,
$edge
,
$field_indexes
) =
@_
;
if
( @{
$field_indexes
} == 2 ) {
my
(
$field_index_1
,
$field_index_2
) = @{
$field_indexes
};
my
$values_1
=
$edge
->parent->fields->[
$field_index_1
];
my
$values_2
=
$edge
->child->fields->[
$field_index_2
];
if
(
defined
$values_1
&&
$values_1
ne
''
&&
defined
$values_2
&&
$values_2
ne
''
)
{
my
$result
= 0;
my
@values_1
=
split
/ /,
$values_1
;
my
@values_2
=
split
/ /,
$values_2
;
foreach
my
$value_1
(
@values_1
) {
foreach
my
$value_2
(
@values_2
) {
if
(
$value_1
eq
$value_2
) {
$result
= 1;
}
}
}
return
$result
;
}
else
{
return
-1;
}
}
else
{
croak
"equals() takes TWO arguments!!!"
;
}
}
sub
feature_equals_pc_at {
my
(
$self
,
$edge
,
$arguments
) =
@_
;
if
( @{
$arguments
} == 2 ) {
my
(
$field_index
,
$position
) = @{
$arguments
};
my
$field_parent
=
$edge
->parent->fields->[
$field_index
];
my
$field_child
=
$edge
->child->fields->[
$field_index
];
if
(
defined
$field_parent
&&
length
$field_parent
>
$position
&&
defined
$field_child
&&
length
$field_child
>
$position
)
{
my
$value_parent
=
substr
$field_parent
,
$position
, 1;
my
$value_child
=
substr
$field_child
,
$position
, 1;
if
(
$value_parent
eq
$value_child
) {
return
1;
}
else
{
return
0;
}
}
else
{
return
-1;
}
}
else
{
croak
"equals() takes TWO arguments!!!"
;
}
}
sub
feature_substr_child {
my
(
$self
,
$edge
,
$arguments
) =
@_
;
if
( @{
$arguments
} != 3 && @{
$arguments
} != 2 ) {
croak
"substr() takes THREE or TWO arguments!!!"
;
}
else
{
my
(
$field_index
,
$start
,
$length
) = @{
$arguments
};
my
$field
=
$edge
->child->fields->[
$field_index
];
my
$value
=
''
;
if
(
defined
$field
) {
if
(
defined
$length
) {
$value
=
substr
(
$field
,
$start
,
$length
);
}
else
{
$value
=
substr
(
$field
,
$start
);
}
}
return
$value
;
}
}
sub
feature_substr_parent {
my
(
$self
,
$edge
,
$arguments
) =
@_
;
if
( @{
$arguments
} != 3 && @{
$arguments
} != 2 ) {
croak
"substr() takes THREE or TWO arguments!!!"
;
}
else
{
my
(
$field_index
,
$start
,
$length
) = @{
$arguments
};
my
$field
=
$edge
->parent->fields->[
$field_index
];
my
$value
=
''
;
if
(
defined
$field
) {
if
(
defined
$length
) {
$value
=
substr
(
$field
,
$start
,
$length
);
}
else
{
$value
=
substr
(
$field
,
$start
);
}
}
return
$value
;
}
}
sub
feature_array_at_child {
my
(
$self
,
$edge
,
$arguments
) =
@_
;
if
( @{
$arguments
} != 2 ) {
croak
"arrayat() takes TWO arguments!!!"
;
}
else
{
my
(
$array_field
,
$index_field
) = @{
$arguments
};
my
$array
=
$edge
->child->fields->[
$array_field
];
my
$index
=
$edge
->child->fields->[
$index_field
];
my
@array
=
split
/ /,
$array
;
my
$value
=
$array
[
$index
];
if
( !
defined
$value
) {
$value
=
''
;
}
return
$value
;
}
}
sub
feature_array_at_parent {
my
(
$self
,
$edge
,
$arguments
) =
@_
;
if
( @{
$arguments
} != 2 ) {
croak
"arrayat() takes TWO arguments!!!"
;
}
else
{
my
(
$array_field
,
$index_field
) = @{
$arguments
};
my
$array
=
$edge
->parent->fields->[
$array_field
];
my
$index
=
$edge
->parent->fields->[
$index_field
];
my
@array
=
split
/ /,
$array
;
my
$value
=
$array
[
$index
];
if
( !
defined
$value
) {
$value
=
''
;
}
return
$value
;
}
}
sub
feature_array_at_cp {
my
(
$self
,
$edge
,
$arguments
) =
@_
;
if
( @{
$arguments
} != 2 ) {
croak
"arrayat() takes TWO arguments!!!"
;
}
else
{
my
(
$array_field
,
$index_field
) = @{
$arguments
};
my
$array
=
$edge
->child->fields->[
$array_field
];
my
$index
=
$edge
->parent->fields->[
$index_field
];
my
@array
=
split
/ /,
$array
;
my
$value
=
$array
[
$index
];
if
( !
defined
$value
) {
$value
=
''
;
}
return
$value
;
}
}
sub
feature_child_is_first_in_sentence {
my
(
$self
,
$edge
) =
@_
;
if
(
$edge
->child->
ord
== 1 ) {
return
1;
}
else
{
return
0;
}
}
sub
feature_parent_is_first_in_sentence {
my
(
$self
,
$edge
) =
@_
;
if
(
$edge
->parent->
ord
== 1 ) {
return
1;
}
else
{
return
0;
}
}
sub
feature_child_is_last_in_sentence {
my
(
$self
,
$edge
) =
@_
;
if
(
$edge
->child->
ord
==
scalar
( @{
$edge
->sentence->nodes } ) ) {
return
1;
}
else
{
return
0;
}
}
sub
feature_parent_is_last_in_sentence {
my
(
$self
,
$edge
) =
@_
;
if
(
$edge
->parent->
ord
==
scalar
( @{
$edge
->sentence->nodes } ) ) {
return
1;
}
else
{
return
0;
}
}
sub
feature_child_is_first_child {
my
(
$self
,
$edge
) =
@_
;
my
$children
=
$edge
->parent->children;
if
(
$children
->[0]->child->
ord
==
$edge
->child->
ord
) {
return
1;
}
else
{
return
0;
}
}
sub
feature_child_is_last_child {
my
(
$self
,
$edge
) =
@_
;
my
$children
=
$edge
->parent->children;
my
$childrenNum
=
scalar
(
@$children
);
if
(
$children
->[
$childrenNum
- 1 ]->child->
ord
==
$edge
->child->
ord
) {
return
1;
}
else
{
return
0;
}
}
sub
feature_child_is_first_right_child {
my
(
$self
,
$edge
) =
@_
;
my
$is_right
= (
$edge
->parent->
ord
<
$edge
->child->
ord
);
if
(
$is_right
) {
my
$siblings
=
$edge
->parent->children;
my
$is_first
= (
$siblings
->[0]->child->
ord
==
$edge
->child->
ord
);
if
(
$is_first
) {
return
1;
}
else
{
my
$my_index
= 1;
while
(
$siblings
->[
$my_index
]->child->
ord
!=
$edge
->child->
ord
) {
$my_index
++;
}
my
$sibling_is_left
=
(
$siblings
->[
$my_index
- 1 ]->child->
ord
<
$edge
->parent->
ord
);
if
(
$sibling_is_left
) {
return
1;
}
else
{
return
0;
}
}
}
else
{
return
0;
}
}
sub
feature_child_is_last_left_child {
my
(
$self
,
$edge
) =
@_
;
my
$is_left
= (
$edge
->child->
ord
<
$edge
->parent->
ord
);
if
(
$is_left
) {
my
$siblings
=
$edge
->parent->children;
my
$last_sibling_index
=
scalar
(
@$siblings
) - 1;
my
$is_last
= (
$siblings
->[
$last_sibling_index
]->child->
ord
==
$edge
->child->
ord
);
if
(
$is_last
) {
return
1;
}
else
{
my
$my_index
=
$last_sibling_index
- 1;
while
(
$siblings
->[
$my_index
]->child->
ord
!=
$edge
->child->
ord
) {
$my_index
--;
}
my
$sibling_is_right
=
(
$edge
->parent->
ord
<
$siblings
->[
$my_index
+ 1 ]->child->
ord
);
if
(
$sibling_is_right
) {
return
1;
}
else
{
return
0;
}
}
}
else
{
return
0;
}
}
sub
feature_number_of_childs_children {
my
(
$self
,
$edge
) =
@_
;
my
$children
=
$edge
->child->children;
if
(
$children
&&
scalar
(
@$children
) ) {
return
scalar
(
@$children
);
}
else
{
return
0;
}
}
sub
feature_number_of_parents_children {
my
(
$self
,
$edge
) =
@_
;
my
$children
=
$edge
->parent->children;
if
(
$children
&&
scalar
(
@$children
) ) {
return
scalar
(
@$children
);
}
else
{
return
0;
}
}
sub
feature_additional_model {
my
(
$self
,
$edge
,
$field_index
,
$model
) =
@_
;
my
$child
=
$edge
->child->fields->[
$field_index
];
my
$parent
=
$edge
->parent->fields->[
$field_index
];
if
(
defined
$child
&&
defined
$parent
) {
return
$model
->get_value(
$child
,
$parent
);
}
else
{
croak
"Either child or parent is undefined in additional model feature, "
.
"this should not happen!"
;
}
}
sub
feature_additional_model_bucketed {
my
(
$self
,
$edge
,
$field_index
,
$model
) =
@_
;
my
$child
=
$edge
->child->fields->[
$field_index
];
my
$parent
=
$edge
->parent->fields->[
$field_index
];
if
(
defined
$child
&&
defined
$parent
) {
return
$model
->get_bucketed_value(
$child
,
$parent
);
}
else
{
croak
"Either child or parent is undefined in additional model feature, "
.
"this should not happen!"
;
}
}
sub
feature_additional_model_rounded {
my
(
$self
,
$edge
,
$parameters
,
$model
) =
@_
;
my
(
$field_index
,
$rounding
) =
@$parameters
;
my
$child
=
$edge
->child->fields->[
$field_index
];
my
$parent
=
$edge
->parent->fields->[
$field_index
];
if
(
defined
$child
&&
defined
$parent
) {
return
$model
->get_rounded_value(
$child
,
$parent
,
$rounding
);
}
else
{
croak
"Either child or parent is undefined in additional model feature, "
.
"this should not happen!"
;
}
}
sub
feature_additional_model_d {
my
(
$self
,
$edge
,
$parameters
,
$model
) =
@_
;
my
(
$field_index_c
,
$field_index_p
) =
@$parameters
;
my
$child
=
$edge
->child->fields->[
$field_index_c
];
my
$parent
=
$edge
->parent->fields->[
$field_index_p
];
if
(
defined
$child
&&
defined
$parent
) {
return
$model
->get_rounded_value(
$child
,
$parent
);
}
else
{
croak
"Either child or parent is undefined in additional model feature, "
.
"this should not happen!"
;
}
}
sub
feature_pmi {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
$self
->feature_additional_model(
$edge
,
$field_index
,
$self
->pmi_model );
}
sub
feature_pmi_bucketed {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
$self
->feature_additional_model_bucketed(
$edge
,
$field_index
,
$self
->pmi_model );
}
sub
feature_pmi_rounded {
my
(
$self
,
$edge
,
$parameters
) =
@_
;
return
$self
->feature_additional_model_rounded(
$edge
,
$parameters
,
$self
->pmi_model );
}
sub
feature_pmi_d {
my
(
$self
,
$edge
,
$parameters
) =
@_
;
return
$self
->feature_additional_model_d(
$edge
,
$parameters
,
$self
->pmi_model );
}
sub
feature_pmi_2_rounded {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
@params
= (
$field_index
, 1 );
return
$self
->feature_pmi_rounded(
$edge
, \
@params
);
}
sub
feature_pmi_3_rounded {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
@params
= (
$field_index
, 2 );
return
$self
->feature_pmi_rounded(
$edge
, \
@params
);
}
sub
feature_cprob {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
$self
->feature_additional_model(
$edge
,
$field_index
,
$self
->cprob_model );
}
sub
feature_cprob_bucketed {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
return
$self
->feature_additional_model_bucketed(
$edge
,
$field_index
,
$self
->cprob_model );
}
sub
feature_cprob_rounded {
my
(
$self
,
$edge
,
$parameters
) =
@_
;
return
$self
->feature_additional_model_rounded(
$edge
,
$parameters
,
$self
->cprob_model );
}
sub
feature_cprob_2_rounded {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
@params
= (
$field_index
, 1 );
return
$self
->feature_cprob_rounded(
$edge
, \
@params
);
}
sub
feature_cprob_3_rounded {
my
(
$self
,
$edge
,
$field_index
) =
@_
;
my
@params
= (
$field_index
, 2 );
return
$self
->feature_cprob_rounded(
$edge
, \
@params
);
}
1;