From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

use strict;
use 5.008008;
use Carp qw(croak);
our $VERSION = '0.24';
our $PERL_ONLY;
if ( !defined $PERL_ONLY ) {
$PERL_ONLY = $ENV{PERL_HLL_PUREPERL} ? 1 : 0;
}
if ( !exists $INC{'Algorithm/HyperLogLog/PP.pm'} ) {
if ( !$PERL_ONLY ) {
require XSLoader;
$PERL_ONLY = !eval { XSLoader::load( __PACKAGE__, $VERSION ); };
}
if ($PERL_ONLY) {
require 'Algorithm/HyperLogLog/PP.pm';
}
}
sub new_from_file {
my ( $class, $filename ) = @_;
open my $fh, '<', $filename or die $!;
my $on_error = sub { close $fh; croak "Invalid dump file($filename)"; };
binmode $fh;
my ( @dumpdata, $buf, $readed );
# Read register size data
$readed = read( $fh, $buf, 1 );
$on_error->() if $readed != 1;
my $k = unpack 'C', $buf;
# Read register content data
my $m = 2**$k;
$readed = read $fh, $buf, $m;
$on_error->() if $readed != $m;
close $fh;
@dumpdata = unpack 'C*', $buf;
my $self = $class->_new_from_dump( $k, \@dumpdata );
return $self;
}
sub dump_to_file {
my ( $self, $filename ) = @_;
my $k = log( $self->register_size ) / log(2); # Calculate log2(register_size)
my $dumpdata = $self->_dump_register();
open my $fh, '>', $filename or die $!;
binmode $fh;
my $buf = pack 'C', $k;
print $fh $buf;
$buf = pack 'C*', @$dumpdata;
print $fh $buf;
close $fh;
}
sub XS {
!$PERL_ONLY;
}
1;
__END__
=pod
=encoding utf8
=head1 NAME
Algorithm::HyperLogLog - Implementation of the HyperLogLog cardinality estimation algorithm
=head1 SYNOPSIS
use Algorithm::HyperLogLog;
my $hll = Algorithm::HyperLogLog->new(14);
while(<>){
$hll->add($_);
}
my $cardinality = $hll->estimate(); # Estimate cardinality
$hll->dump_to_file('hll_register.dump');# Dumps internal data
Construct object from dumped file.
use Algorithm::HyperLogLog;
# Restore internal state
my $hll = Algorithm::HyperLogLog->new_from_file('hll_register.dump');
=head1 DESCRIPTION
This module is implementation of the HyperLogLog algorithm.
HyperLogLog is an algorithm for estimating the cardinality of a set.
=head1 METHODS
=head2 new($b)
Constructor.
`$b` is the parameter for determining register size. (The register size is 2^$b.)
`$b` must be a integer between 4 and 16.
=head2 new_from_file($filename)
This method constructs object and restore the internal data of object from dumped file(dumped by dump_to_file() method).
=head2 dump_to_file($filename)
This method dumps the internal data of an object to a file.
=head2 add($data)
Adds element to the cardinality estimator.
=head2 estimate()
Returns estimated cardinality value in floating point number.
=head2 merge($other)
Merges the estimate from 'other' into this object, returning the estimate of their union.
=head2 register_size()
Return number of register.(In the XS implementation, this equals size in bytes)
=head2 XS()
If using XS backend, this method return true value.
=head1 SEE ALSO
Philippe Flajolet, Éric Fusy, Olivier Gandouet and Frédéric Meunier. HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm. 2007 Conference on Analysis of Algorithms, DMTCS proc. AH, pp. 127–146, 2007. L<http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf>
=head1 AUTHOR
Hideaki Ohno E<lt>hide.o.j55 {at} gmail.comE<gt>
=head1 THANKS TO
=over 4
=item Austin Appleby
=item Peter Scott
=back
=head1 LICENSE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut