# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use strict; package LucyX::Index::ZlibDocWriter; use base qw( Lucy::Index::DataWriter ); use Carp; use Scalar::Util qw( blessed ); use Compress::Zlib qw( compress ); use Clownfish::Util::StringHelper qw( cat_bytes ); use Clownfish; use bytes; no bytes; our $VERSION = '0.004099_002'; $VERSION = eval $VERSION; # Inside-out member vars. our %ix_out; our %dat_out; # Inherit constructor. sub _lazy_init { my $self = shift; # Get outstreams. Skip past non-doc #0. my $folder = $self->get_folder; my $ix_file = $self->get_segment->get_name . "/zdocs.ix"; my $dat_file = $self->get_segment->get_name . "/zdocs.dat"; $ix_out{$$self} = $folder->open_out($ix_file) or confess Clownfish->error; $dat_out{$$self} = $folder->open_out($dat_file) or confess Clownfish->error; $ix_out{$$self}->write_i64(0); } sub add_inverted_doc { my ( $self, %args ) = @_; _lazy_init($self) unless $ix_out{$$self}; my $inverter = $args{inverter}; my $ix_out = $ix_out{$$self}; my $dat_out = $dat_out{$$self}; # Check doc id. my $expected = $ix_out->tell / 8; confess("Expected doc id $expected, got '$args{doc_id}'") unless $args{doc_id} == $expected; my $to_compress = ""; my $count = 0; my $schema = $self->get_schema; $inverter->iterate; while ( $inverter->next ) { next unless $inverter->get_type->stored; my $name = $inverter->get_field_name; my $value = $inverter->get_value; cat_bytes( $to_compress, pack( "w", bytes::length($name) ) ); cat_bytes( $to_compress, $name ); cat_bytes( $to_compress, pack( "w", bytes::length($value) ) ); cat_bytes( $to_compress, $value ); $count++; } # Prepend count of fields to store in this Doc. $to_compress = pack( "w", $count ) . $to_compress; # Write file pointer to index file. Write compressed serialized string to # main file. $ix_out->write_i64( $dat_out->tell ); $dat_out->print( compress($to_compress) ); } sub add_segment { my ( $self, %args ) = @_; my $seg_reader = $args{reader}; my $doc_map = $args{doc_map}; my $doc_max = $seg_reader->doc_max; # Bail if the supplied segment is empty. */ return unless $doc_max; _lazy_init($self) unless $ix_out{$$self}; my $ix_out = $ix_out{$$self}; my $dat_out = $dat_out{$$self}; my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader"); confess("Not a ZlibDocReader") unless ( blessed($doc_reader) and $doc_reader->isa("LucyX::Index::ZlibDocReader") ); for ( my $i = 1; $i <= $doc_max; $i++ ) { next unless $doc_map->get($i); my $buf; $doc_reader->read_record( $i, \$buf ); $ix_out->write_i64( $dat_out->tell ); $dat_out->print($buf); } } sub finish { my $self = shift; my $ix_out = $ix_out{$$self}; my $dat_out = $dat_out{$$self}; if ($ix_out) { # Write one extra file pointer so that we can always derive record # length. $ix_out->write_i64( $dat_out->tell ); # Close streams and store metadata. $ix_out->close; $dat_out->close; my $segment = $self->get_segment; $segment->store_metadata( key => 'zdocs', metadata => $self->metadata, ); } } sub format {1} sub DESTROY { my $self = shift; delete $ix_out{$$self}; delete $dat_out{$$self}; $self->SUPER::DESTROY; } 1; __END__ __POD__ =head1 NAME LucyX::Index::ZlibDocWriter - Compressed doc storage. =head1 DESCRIPTION This is a proof-of-concept class to demonstrate alternate implementations for fetching documents. It is unsupported. =cut