our
$VERSION
=
'0.08'
;
use
POSIX
qw(floor ceil)
;
my
$packing
= ($] >= 5.008 ?
'w*'
:
'V*'
);
sub
new {
@_
& 1 or croak
'Usage: Algorithm::ClusterPoints->new(%options)'
;
my
(
$class
,
%opts
) =
@_
;
my
$dimension
=
delete
$opts
{dimension};
$dimension
= 2
unless
defined
$dimension
;
$dimension
< 1 and croak
"positive dimension required"
;
my
$radius
=
delete
$opts
{radius};
my
$minimum_size
=
delete
$opts
{minimum_size};
my
$ordered
=
delete
$opts
{ordered};
my
$scales
=
delete
$opts
{scales};
my
$dimensional_groups
=
delete
$opts
{dimensional_groups};
%opts
and croak
"unknown constructor option(s) '"
.
join
(
"', '"
,
sort
keys
%opts
).
"'"
;
my
$self
=
bless
{
radius
=> 1.0,
minimum_size
=> 1,
ordered
=> 0,
dimension
=>
$dimension
,
coords
=> [
map
[], 1..
$dimension
],
scales
=> [
map
1, 1..
$dimension
],
dimensional_groups
=> [[0..
$dimension
-1]],
},
$class
;
$self
->radius(
$radius
)
if
defined
$radius
;
$self
->minimum_size(
$minimum_size
)
if
defined
$minimum_size
;
$self
->ordered(
$ordered
)
if
defined
$ordered
;
if
(
defined
$scales
) {
ref
$scales
eq
'ARRAY'
or croak
'ARRAY reference expected for "scales" option'
;
$self
->scales(
@$scales
);
}
if
(
defined
$dimensional_groups
) {
ref
$dimensional_groups
eq
'ARRAY'
or croak
'ARRAY reference expected for "dimensional_groups" option'
;
$self
->dimensional_groups(
@$dimensional_groups
);
}
$self
;
}
sub
add_point {
my
$self
=
shift
;
my
$dimension
=
$self
->{dimension};
@_
%
$dimension
and croak
'coordinates list size is not a multiple of the problem dimension'
;
delete
$self
->{_clusters};
my
$ix
= @{
$self
->{coords}[0]};
while
(
@_
) {
push
@$_
,
shift
for
(@{
$self
->{coords}});
}
$ix
;
}
*add_points
= \
&add_point
;
sub
point_coords {
@_
== 2 or croak
'Usage: $clp->point_coords($index)'
;
my
(
$self
,
$ix
) =
@_
;
my
$top
= $
croak
"point index $ix out of range [0, $top]"
if
(
$ix
>
$top
or
$ix
< 0);
return
$self
->{coords}[0][
$ix
]
if
$self
->{dimension} == 1;
wantarray
or croak
'method requires list context'
;
map
$_
->[
$ix
], @{
$self
->{coords}};
}
sub
reset
{
delete
shift
->{_clusters} }
sub
radius {
@_
> 2 and croak
'Usage: $clp->radius([$new_radius])'
;
my
$self
=
shift
;
if
(
@_
) {
my
$radius
=
shift
;
$radius
> 0.0 or croak
'positive radius required'
;
$self
->{radius} =
$radius
;
delete
$self
->{_clusters};
}
$self
->{radius};
}
sub
ordered {
@_
> 2 and croak
'Usage: $clp->ordered([$ordered])'
;
my
$self
=
shift
;
if
(
@_
) {
$self
->{ordered} = !!
shift
;
delete
$self
->{_clusters};
}
$self
->{ordered};
}
sub
minimum_size {
@_
> 2 and croak
'Usage: $clp->minimum_size([$size])'
;
my
$self
=
shift
;
if
(
@_
) {
my
$minimum_size
=
shift
;
$minimum_size
> 0 or croak
'positive minimum_size required'
;
$self
->{minimum_size} =
$minimum_size
;
delete
$self
->{_clusters};
}
$self
->{minimum_size};
}
sub
scales {
my
$self
=
shift
;
my
$dimension
=
$self
->{dimension};
if
(
@_
) {
@_
==
$dimension
or croak
'number of scales does not match problem dimension'
;
grep
(
$_
<= 0,
@_
) and croak
'positive number required'
;
@{
$self
->{scales}} =
@_
;
delete
$self
->{_clusters};
}
return
@{
$self
->{scales}};
}
sub
dimensional_groups {
my
$self
=
shift
;
my
$hcb
=
$self
->{dimensional_groups};
my
$dimension
=
$self
->{dimension};
if
(
@_
) {
my
@all
=
eval
{
no
warnings;
sort
{
$a
<=>
$b
}
map
@$_
,
@_
;
};
croak
'bad dimension groups'
unless
(
@all
==
$dimension
and
join
(
'|'
,
@all
) eq
join
(
'|'
, 0..
$dimension
-1));
$self
->{dimensional_groups} = [
map
[
@$_
],
@_
];
delete
$self
->{_clusters};
}
map
[
@$_
], @{
$self
->{dimensional_groups}};
}
sub
clusters {
my
$self
=
shift
;
my
$clusters
=
$self
->{_clusters} ||=
$self
->_make_clusters_ix;
my
$ax
=
$self
->{x};
my
$ay
=
$self
->{y};
my
$coords
=
$self
->{coords};
my
@out
;
for
my
$cluster
(
@$clusters
) {
my
@cluster_coords
;
for
my
$ix
(
@$cluster
) {
push
@cluster_coords
,
map
$_
->[
$ix
],
@$coords
;
}
push
@out
, \
@cluster_coords
;
}
@out
;
}
sub
clusters_ix {
my
$self
=
shift
;
my
$clusters
=
$self
->{_clusters} ||=
$self
->_make_clusters_ix;
map
[
@$_
],
@$clusters
;
}
sub
_touch_2 {
my
(
$c1
,
$c2
,
$ax
,
$ay
) =
@_
;
my
$c1_xmin
= min @{
$ax
}[
@$c1
];
my
$c2_xmax
= max @{
$ax
}[
@$c2
];
return
0
if
$c1_xmin
-
$c2_xmax
> 1;
my
$c1_xmax
= max @{
$ax
}[
@$c1
];
my
$c2_xmin
= min @{
$ax
}[
@$c2
];
return
0
if
$c2_xmin
-
$c1_xmax
> 1;
my
$c1_ymin
= min @{
$ay
}[
@$c1
];
my
$c2_ymax
= max @{
$ay
}[
@$c2
];
return
0
if
$c1_ymin
-
$c2_ymax
> 1;
my
$c1_ymax
= max @{
$ay
}[
@$c1
];
my
$c2_ymin
= min @{
$ay
}[
@$c2
];
return
0
if
$c2_ymin
-
$c1_ymax
> 1;
for
my
$i
(
@$c1
) {
for
my
$j
(
@$c2
) {
my
$dx
=
$ax
->[
$i
] -
$ax
->[
$j
];
my
$dy
=
$ay
->[
$i
] -
$ay
->[
$j
];
return
1
if
(
$dx
*
$dx
+
$dy
*
$dy
<= 1);
}
}
0;
}
sub
_touch {
my
(
$c1
,
$c2
,
$coords
,
$groups
) =
@_
;
for
my
$coord
(
@$coords
) {
my
$c1_min
= min @{
$coord
}[
@$c1
];
my
$c2_max
= max @{
$coord
}[
@$c2
];
return
0
if
$c1_min
-
$c2_max
> 1;
my
$c1_max
= max @{
$coord
}[
@$c1
];
my
$c2_min
= min @{
$coord
}[
@$c2
];
return
0
if
$c2_min
-
$c1_max
> 1;
}
for
my
$i
(
@$c1
) {
J:
for
my
$j
(
@$c2
) {
for
my
$group
(
@$groups
) {
my
$sum
= 0;
for
(@{
$coords
}[
@$group
]) {
my
$delta
=
$_
->[
$i
] -
$_
->[
$j
];
$sum
+=
$delta
*
$delta
;
}
next
J
if
$sum
> 1;
}
return
1;
}
}
0;
}
sub
_scaled_coords {
my
$self
=
shift
;
my
@coords
= @{
$self
->{coords}};
my
$scales
=
$self
->{scales};
my
$ir
= 1.0 /
$self
->{radius};
for
my
$dimension
(0..
$#coords
) {
my
$scale
=
abs
(
$ir
*
$scales
->[
$dimension
]);
next
if
$scale
== 1;
$coords
[
$dimension
] = [
map
$scale
*
$_
, @{
$coords
[
$dimension
]} ];
}
@coords
;
}
sub
_hypercylinder_id {
join
'|'
,
map
join
(
','
,
@$_
),
@_
}
sub
_make_clusters_ix {
my
$self
=
shift
;
_hypercylinder_id(
$self
->dimensional_groups) eq
'0,1'
?
$self
->_make_clusters_ix_2
:
$self
->_make_clusters_ix_any;
}
sub
_make_clusters_ix_2 {
my
$self
=
shift
;
$self
->{dimension} == 2
or croak
'internal error: _make_clusters_ix_2 called but dimension is not 2'
;
my
(
$ax
,
$ay
) =
$self
->_scaled_coords;
@$ax
or croak
"points have not been added"
;
my
$xmin
= min
@$ax
;
my
$ymin
= min
@$ay
;
my
$istep
= 1.00001
*sqr2
;
my
@fx
=
map
{ floor(
$istep
* (
$_
-
$xmin
)) }
@$ax
;
my
@fy
=
map
{ floor(
$istep
* (
$_
-
$ymin
)) }
@$ay
;
my
(
%ifx
,
%ify
,
$c
);
$c
= 1;
$ifx
{
$_
} ||=
$c
++
for
@fx
;
$c
= 1;
$ify
{
$_
} ||=
$c
++
for
@fy
;
my
%rifx
=
reverse
%ifx
;
my
%rify
=
reverse
%ify
;
my
%cell
;
my
$cellid
= 1;
for
my
$i
(0..
$#$ax
) {
my
$cell
=
pack
$packing
=>
$ifx
{
$fx
[
$i
]},
$ify
{
$fy
[
$i
]};
push
@{
$cell
{
$cell
}},
$i
;
}
my
%cell2cluster
;
my
%cluster2cell
;
while
(
defined
(
my
$cell
=
each
%cell
)) {
my
%cluster
;
my
(
$ifx
,
$ify
) =
unpack
$packing
=>
$cell
;
my
$fx
=
$rifx
{
$ifx
};
my
$fy
=
$rify
{
$ify
};
for
my
$dx
(-2, -1, 0, 1, 2) {
my
$ifx
=
$ifx
{
$fx
+
$dx
};
defined
$ifx
or
next
;
my
$filter
= 6 -
$dx
*
$dx
;
for
my
$dy
(-2, -1, 0, 1, 2) {
next
if
$dy
*
$dy
>
$filter
;
my
$ify
=
$ify
{
$fy
+
$dy
};
defined
$ify
or
next
;
my
$neighbor
=
pack
$packing
=>
$ifx
,
$ify
;
my
$cluster
=
$cell2cluster
{
$neighbor
};
if
(
defined
$cluster
and
!
$cluster
{
$cluster
} and
_touch_2(
$cell
{
$cell
},
$cell
{
$neighbor
},
$ax
,
$ay
) ) {
$cluster
{
$cluster
} = 1;
}
}
}
if
(
%cluster
) {
my
(
$to
,
$to_cells
);
if
(
keys
%cluster
> 1) {
my
$max
= 0;
for
(
keys
%cluster
) {
my
$cells
=
$cluster2cell
{
$_
};
my
$n
=
defined
(
$cells
) ?
@$cells
: 1;
if
(
$n
>
$max
) {
$max
=
$n
;
$to
=
$_
;
}
}
delete
$cluster
{
$to
};
$to_cells
= (
$cluster2cell
{
$to
} ||= [
$to
]);
for
(
keys
%cluster
) {
my
$neighbors
=
delete
$cluster2cell
{
$_
};
if
(
defined
$neighbors
) {
push
@$to_cells
,
@$neighbors
;
$cell2cluster
{
$_
} =
$to
for
@$neighbors
;
}
else
{
push
@$to_cells
,
$_
;
$cell2cluster
{
$_
} =
$to
;
}
}
}
else
{
$to
=
each
%cluster
;
$to_cells
= (
$cluster2cell
{
$to
} ||= [
$to
]);
}
push
@$to_cells
,
$cell
;
$cell2cluster
{
$cell
} =
$to
;
}
else
{
$cell2cluster
{
$cell
} =
$cell
;
}
}
my
@clusters
;
while
(
my
(
$cluster
,
$cells
) =
each
%cluster2cell
) {
my
@points
=
map
@{
delete
$cell
{
$_
}},
@$cells
;
if
(
@points
>=
$self
->{minimum_size}) {
@points
=
sort
{
$a
<=>
$b
}
@points
if
$self
->{ordered};
push
@clusters
, \
@points
;
}
}
push
@clusters
,
grep
{
@$_
>=
$self
->{minimum_size} }
values
%cell
;
@clusters
=
sort
{
$a
->[0] <=>
$b
->[0] }
@clusters
if
$self
->{ordered};
return
\
@clusters
;
}
my
%delta_hypercylinder
;
sub
_delta_hypercylinder {
my
$dhc
=
$delta_hypercylinder
{_hypercylinder_id
@_
} ||=
do
{
my
@subdimension
;
for
my
$group
(
@_
) {
$subdimension
[
$_
] =
@$group
for
@$group
;
}
my
@top
=
map
ceil(
sqrt
(
$_
)),
@subdimension
;
my
@delta_hypercylinder
= [];
for
my
$dimension
(0..
$#subdimension
) {
my
@next
;
for
my
$dhc
(
@delta_hypercylinder
) {
my
$top
=
$top
[
$dimension
];
push
@next
,
map
[
@$dhc
,
$_
], -
$top
..
$top
;
}
@delta_hypercylinder
=
@next
;
}
for
my
$group
(
@_
) {
@delta_hypercylinder
=
grep
{
my
$sum
= 0;
for
(
@$_
[
@$group
]) {
my
$min
= (
$_
?
abs
(
$_
) - 1 : 0);
$sum
+=
$min
*
$min
;
}
$sum
<
@$group
;
}
@delta_hypercylinder
;
}
\
@delta_hypercylinder
};
@$dhc
;
}
sub
_print_clusters {
print
join
(
','
,
@$_
),
"\n"
for
sort
{
$a
->[0] <=>
$b
->[0] }
@_
;
}
sub
_make_clusters_ix_any {
my
$self
=
shift
;
my
$dimension
=
$self
->{dimension};
my
@coords
=
$self
->_scaled_coords;
my
$coords
= \
@coords
;
my
$top
= $
$top
>= 0 or croak
"points have not been added"
;
my
$groups
=
$self
->{dimensional_groups};
my
(
@fls
,
@ifls
,
@rifls
);
for
my
$group
(
@$groups
) {
my
$istep
= 1.000001 *
sqrt
(
@$group
);
for
my
$dimension
(
@$group
) {
my
$coord
=
$coords
[
$dimension
];
my
$min
= min
@$coord
;
my
@fl
=
map
floor(
$istep
* (
$_
-
$min
)),
@$coord
;
$fls
[
$dimension
] = \
@fl
;
my
%ifl
;
my
$c
= 1;
$ifl
{
$_
} ||=
$c
++
for
@fl
;
$ifls
[
$dimension
] = \
%ifl
;
my
%rifl
=
reverse
%ifl
;
$rifls
[
$dimension
] = \
%rifl
;
}
}
my
%cell
;
my
$dimension_top
=
$dimension
- 1;
for
my
$i
(0..
$top
) {
my
$cell
=
pack
$packing
=>
map
$ifls
[
$_
]{
$fls
[
$_
][
$i
]}, 0..
$dimension_top
;
push
@{
$cell
{
$cell
}},
$i
;
}
my
%cell2cluster
;
my
%cluster2cell
;
my
@delta_hypercylinder
= _delta_hypercylinder
@$groups
;
while
(
defined
(
my
$cell
=
each
%cell
)) {
my
%cluster
;
my
@ifl
=
unpack
$packing
=>
$cell
;
my
@fl
=
map
$rifls
[
$_
]{
$ifl
[
$_
]}, 0..
$dimension_top
;
for
my
$delta
(
@delta_hypercylinder
) {
my
@ifl
=
map
{
$ifls
[
$_
]{
$fl
[
$_
] +
$delta
->[
$_
]} ||
next
} 0..
$dimension_top
;
my
$neighbor
=
pack
$packing
=>
@ifl
;
my
$cluster
=
$cell2cluster
{
$neighbor
};
if
(
defined
$cluster
and
!
$cluster
{
$cluster
} and
_touch(
$cell
{
$cell
},
$cell
{
$neighbor
},
$coords
,
$groups
) ) {
$cluster
{
$cluster
} = 1;
}
}
if
(
%cluster
) {
my
(
$to
,
$to_cells
);
if
(
keys
%cluster
> 1) {
my
$max
= 0;
for
(
keys
%cluster
) {
my
$cells
=
$cluster2cell
{
$_
};
my
$n
=
defined
(
$cells
) ?
@$cells
: 1;
if
(
$n
>
$max
) {
$max
=
$n
;
$to
=
$_
;
}
}
delete
$cluster
{
$to
};
$to_cells
= (
$cluster2cell
{
$to
} ||= [
$to
]);
for
(
keys
%cluster
) {
my
$neighbors
=
delete
$cluster2cell
{
$_
};
if
(
defined
$neighbors
) {
push
@$to_cells
,
@$neighbors
;
$cell2cluster
{
$_
} =
$to
for
@$neighbors
;
}
else
{
push
@$to_cells
,
$_
;
$cell2cluster
{
$_
} =
$to
;
}
}
}
else
{
$to
=
each
%cluster
;
$to_cells
= (
$cluster2cell
{
$to
} ||= [
$to
]);
}
push
@$to_cells
,
$cell
;
$cell2cluster
{
$cell
} =
$to
;
}
else
{
$cell2cluster
{
$cell
} =
$cell
;
}
}
my
@clusters
;
while
(
my
(
$cluster
,
$cells
) =
each
%cluster2cell
) {
my
@points
=
map
@{
delete
$cell
{
$_
}},
@$cells
;
if
(
@points
>=
$self
->{minimum_size}) {
@points
=
sort
{
$a
<=>
$b
}
@points
if
$self
->{ordered};
push
@clusters
, \
@points
;
}
}
push
@clusters
,
grep
{
@$_
>=
$self
->{minimum_size} }
values
%cell
;
@clusters
=
sort
{
$a
->[0] <=>
$b
->[0] }
@clusters
if
$self
->{ordered};
return
\
@clusters
;
}
sub
brute_force_clusters_ix {
my
$self
=
shift
;
_hypercylinder_id(
$self
->dimensional_groups) eq
'0,1'
?
$self
->brute_force_clusters_ix_2
:
$self
->brute_force_clusters_ix_any
}
sub
brute_force_clusters_ix_2 {
@_
== 1 or croak
'Usage: $clp->brute_force_clusters_ix_2'
;
my
$self
=
shift
;
$self
->{dimension} == 2
or croak
"brute_force_clusters_ix_2 called but dimension is not equal to 2"
;
my
(
$ax
,
$ay
) =
$self
->_scaled_coords;
@$ax
or croak
"points have not been added"
;
my
@ix
= 0..
$#$ax
;
my
@clusters
;
while
(
@ix
) {
my
@current
=
shift
@ix
;
my
@done
;
while
(
@ix
) {
my
$ix
=
shift
@ix
;
my
@queue
;
for
my
$current
(
@current
) {
my
$dx
=
$ax
->[
$ix
] -
$ax
->[
$current
];
my
$dy
=
$ay
->[
$ix
] -
$ay
->[
$current
];
if
(
$dx
*
$dx
+
$dy
*
$dy
<= 1) {
push
@queue
,
$ix
;
last
;
}
}
if
(
@queue
) {
while
(
defined
(
$ix
=
shift
@queue
)) {
for
my
$done
(
@done
) {
next
unless
defined
$done
;
my
$dx
=
$ax
->[
$ix
] -
$ax
->[
$done
];
my
$dy
=
$ay
->[
$ix
] -
$ay
->[
$done
];
if
(
$dx
*
$dx
+
$dy
*
$dy
<= 1) {
push
@queue
,
$done
;
undef
$done
;
}
}
push
@current
,
$ix
;
}
}
else
{
push
@done
,
$ix
;
}
}
if
(
@current
>=
$self
->{minimum_size}) {
@current
=
sort
{
$a
<=>
$b
}
@current
if
$self
->{ordered};
push
@clusters
, \
@current
;
}
@ix
=
grep
defined
(
$_
),
@done
;
}
@clusters
=
sort
{
$a
->[0] <=>
$b
->[0] }
@clusters
if
$self
->{ordered};
return
@clusters
;
}
sub
_points_touch {
my
(
$ix0
,
$ix1
,
$coords
,
$groups
) =
@_
;
for
my
$group
(
@$groups
) {
my
$sum
= 0;
for
(@{
$coords
}[
@$group
]) {
my
$delta
=
$_
->[
$ix0
] -
$_
->[
$ix1
];
$sum
+=
$delta
*
$delta
;
}
return
0
if
$sum
> 1;
}
return
1;
}
sub
brute_force_clusters_ix_any {
@_
== 1 or croak
'Usage: $clp->brute_force_clusters_ix_any'
;
my
$self
=
shift
;
my
$dimension
=
$self
->{dimension};
my
$coords
= [
$self
->_scaled_coords ];
my
@ix
= 0..$
@ix
or croak
"points have not been added"
;
my
$groups
=
$self
->{dimensional_groups};
my
@clusters
;
while
(
@ix
) {
my
@current
=
shift
@ix
;
my
@done
;
while
(
@ix
) {
my
$ix
=
shift
@ix
;
my
@queue
;
for
my
$current
(
@current
) {
if
(_points_touch(
$ix
,
$current
,
$coords
,
$groups
)) {
push
@queue
,
$ix
;
last
;
}
}
if
(
@queue
) {
while
(
defined
(
$ix
=
shift
@queue
)) {
for
my
$done
(
@done
) {
next
unless
defined
$done
;
if
(_points_touch(
$ix
,
$done
,
$coords
,
$groups
)) {
push
@queue
,
$done
;
undef
$done
;
}
}
push
@current
,
$ix
;
}
}
else
{
push
@done
,
$ix
;
}
}
if
(
@current
>=
$self
->{minimum_size}) {
@current
=
sort
{
$a
<=>
$b
}
@current
if
$self
->{ordered};
push
@clusters
, \
@current
;
}
@ix
=
grep
defined
(
$_
),
@done
;
}
@clusters
=
sort
{
$a
->[0] <=>
$b
->[0] }
@clusters
if
$self
->{ordered};
return
@clusters
;
}
sub
distance {
@_
== 3 or croak
'Usage: $clp->distance($ix0, $ix1)'
;
my
(
$self
,
$ix0
,
$ix1
) =
@_
;
my
$coords
=
$self
->{coords};
my
$scales
=
$self
->{scales};
my
$sum
= 0;
for
my
$dimension
(0..$
my
$delta
=
$scales
->[
$dimension
] * (
$coords
->[
$dimension
][
$ix0
] -
$coords
->[
$dimension
][
$ix1
]);
$sum
+=
$delta
*
$delta
;
}
sqrt
(
$sum
);
}
1;