#------------------------------------------------------------------------------
# File: RandomAccess.pm
#
# Description: Buffer to support random access reading of sequential file
#
# Revisions: 02/11/2004 - P. Harvey Created
# 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
# 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
# 01/02/2005 - P. Harvey Added DEBUG code
# 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
# multi-character EOL sequences
# 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
# generate "substr outside string" warning
# 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
# 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
# 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
# scalar with a multi-character newline
# 01/24/2009 - PH Protect against reading too much at once
# 10/04/2018 - PH Added NoBuffer option
# 01/20/2024 - PH Set ERROR on file read error
#
# Notes: Calls the normal file i/o routines unless SeekTest() fails, in
# which case the file is buffered in memory to allow random access.
# SeekTest() is called automatically when the object is created
# unless specified.
#
# May also be used for string i/o (just pass a scalar reference)
#
# Sets internal ERROR member from $! if there is an error reading
# the file.
#
# Legal: Copyright (c) 2003-2025, Phil Harvey (philharvey66 at gmail.com)
# This library is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
#------------------------------------------------------------------------------
package
File::RandomAccess;
use
strict;
require
5.002;
require
Exporter;
$VERSION
=
'1.13'
;
@ISA
=
qw(Exporter)
;
sub
Read($$$);
# constants
my
$CHUNK_SIZE
= 8192;
# size of chunks to read from file (must be power of 2)
my
$SKIP_SIZE
= 65536;
# size to skip when fast-forwarding over sequential data
my
$SLURP_CHUNKS
= 16;
# read this many chunks at a time when slurping
#------------------------------------------------------------------------------
# Create new RandomAccess object
# Inputs: 0) reference to RandomAccess object or RandomAccess class name
# 1) file reference or scalar reference
# 2) flag set if file is already random access (disables automatic SeekTest)
sub
new($$;$)
{
my
(
$that
,
$filePt
,
$isRandom
) =
@_
;
my
$class
=
ref
(
$that
) ||
$that
;
my
$self
;
if
(
ref
$filePt
eq
'SCALAR'
) {
# string i/o
$self
= {
BUFF_PT
=>
$filePt
,
BASE
=> 0,
POS
=> 0,
LEN
=>
length
(
$$filePt
),
TESTED
=> -1,
};
bless
$self
,
$class
;
}
else
{
# file i/o
my
$buff
=
''
;
$self
= {
FILE_PT
=>
$filePt
,
# file pointer
BUFF_PT
=> \
$buff
,
# reference to file data
BASE
=> 0,
# location of start of buffer in file
POS
=> 0,
# current position in buffer
LEN
=> 0,
# length of data in buffer
TESTED
=> 0,
# 0=untested, 1=passed, -1=failed (requires buffering)
};
bless
$self
,
$class
;
$self
->SeekTest()
unless
$isRandom
;
}
return
$self
;
}
#------------------------------------------------------------------------------
# Enable DEBUG code
# Inputs: 0) reference to RandomAccess object
sub
Debug($)
{
my
$self
=
shift
;
$self
->{DEBUG} = { };
}
#------------------------------------------------------------------------------
# Perform seek test and turn on buffering if necessary
# Inputs: 0) reference to RandomAccess object
# Returns: 1 if seek test passed (ie. no buffering required)
# Notes: Must be done before any other i/o
sub
SeekTest($)
{
my
$self
=
shift
;
unless
(
$self
->{TESTED}) {
my
$fp
=
$self
->{FILE_PT};
if
(
seek
(
$fp
, 1, 1) and
seek
(
$fp
, -1, 1)) {
$self
->{TESTED} = 1;
# test passed
}
else
{
$self
->{TESTED} = -1;
# test failed (requires buffering)
}
}
return
$self
->{TESTED} == 1 ? 1 : 0;
}
#------------------------------------------------------------------------------
# Get current position in file
# Inputs: 0) reference to RandomAccess object
# Returns: current position in file
sub
Tell($)
{
my
$self
=
shift
;
my
$rtnVal
;
if
(
$self
->{TESTED} < 0) {
$rtnVal
=
$self
->{POS} +
$self
->{BASE};
}
else
{
$rtnVal
=
tell
(
$self
->{FILE_PT});
}
return
$rtnVal
;
}
#------------------------------------------------------------------------------
# Seek to position in file
# Inputs: 0) reference to RandomAccess object
# 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
# Returns: 1 on success
# Notes: When buffered, this doesn't quite behave like seek() since it will return
# success even if you seek outside the limits of the file. However if you
# do this, you will get an error on your next Read().
sub
Seek($$;$)
{
my
(
$self
,
$num
,
$whence
) =
@_
;
$whence
= 0
unless
defined
$whence
;
my
$rtnVal
;
if
(
$self
->{TESTED} < 0) {
my
$newPos
;
if
(
$whence
== 0) {
$newPos
=
$num
-
$self
->{BASE};
# from start of file
}
elsif
(
$whence
== 1) {
$newPos
=
$num
+
$self
->{POS};
# relative to current position
}
elsif
(
$self
->{NoBuffer} and
$self
->{FILE_PT}) {
$newPos
= -1;
# (can't seek relative to end if no buffering)
}
else
{
$self
->Slurp();
# read whole file into buffer
$newPos
=
$num
+
$self
->{LEN};
# relative to end of file
}
if
(
$newPos
>= 0 and
# can't go backwards in unbuffered non-seekable file
(not
$self
->{NoBuffer} or
$newPos
>=
$self
->{POS}))
{
$self
->{POS} =
$newPos
;
$rtnVal
= 1;
}
}
else
{
$rtnVal
=
seek
(
$self
->{FILE_PT},
$num
,
$whence
);
}
return
$rtnVal
;
}
#------------------------------------------------------------------------------
# Read from the file
# Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
# Returns: Number of bytes read
sub
Read($$$)
{
my
$self
=
shift
;
my
$len
=
$_
[1];
my
$rtnVal
;
# protect against reading too much at once
# (also from dying with a "Negative length" error)
if
(
$len
& 0xf8000000) {
return
0
if
$len
< 0;
# read in smaller blocks because Windows attempts to pre-allocate
# memory for the full size, which can lead to an out-of-memory error
my
$maxLen
= 0x4000000;
# (MUST be less than bitmask in "if" above)
my
$num
= Read(
$self
,
$_
[0],
$maxLen
);
return
$num
if
$num
<
$maxLen
;
for
(;;) {
$len
-=
$maxLen
;
last
if
$len
<= 0;
my
$l
=
$len
<
$maxLen
?
$len
:
$maxLen
;
my
$buff
;
my
$n
= Read(
$self
,
$buff
,
$l
);
last
unless
$n
;
$_
[0] .=
$buff
;
$num
+=
$n
;
last
if
$n
<
$l
;
}
return
$num
;
}
# read through our buffer if necessary
if
(
$self
->{TESTED} < 0) {
# purge old data before reading in NoBuffer mode
$self
->Purge() or
return
0
if
$self
->{NoBuffer};
my
$buff
;
my
$newPos
=
$self
->{POS} +
$len
;
# number of bytes to read from file
my
$num
=
$newPos
-
$self
->{LEN};
if
(
$num
> 0 and
$self
->{FILE_PT}) {
# read data from file in multiples of $CHUNK_SIZE
$num
= ((
$num
- 1) | (
$CHUNK_SIZE
- 1)) + 1;
$num
=
read
(
$self
->{FILE_PT},
$buff
,
$num
);
if
(
$num
) {
${
$self
->{BUFF_PT}} .=
$buff
;
$self
->{LEN} +=
$num
;
}
elsif
(not
defined
$num
) {
$self
->{ERROR} = $!;
}
}
# number of bytes left in data buffer
$num
=
$self
->{LEN} -
$self
->{POS};
if
(
$len
<=
$num
) {
$rtnVal
=
$len
;
}
elsif
(
$num
<= 0) {
$_
[0] =
''
;
return
0;
}
else
{
$rtnVal
=
$num
;
}
# return data from our buffer
$_
[0] =
substr
(${
$self
->{BUFF_PT}},
$self
->{POS},
$rtnVal
);
$self
->{POS} +=
$rtnVal
;
}
else
{
# read directly from file
$_
[0] =
''
unless
defined
$_
[0];
$rtnVal
=
read
(
$self
->{FILE_PT},
$_
[0],
$len
);
unless
(
defined
$rtnVal
) {
$self
->{ERROR} = $!;
$rtnVal
= 0;
}
}
if
(
$self
->{DEBUG}) {
my
$pos
=
$self
->Tell() -
$rtnVal
;
unless
(
$self
->{DEBUG}->{
$pos
} and
$self
->{DEBUG}->{
$pos
} >
$rtnVal
) {
$self
->{DEBUG}->{
$pos
} =
$rtnVal
;
}
}
return
$rtnVal
;
}
#------------------------------------------------------------------------------
# Read a line from file (end of line is $/)
# Inputs: 0) reference to RandomAccess object, 1) buffer
# Returns: Number of bytes read
sub
ReadLine($$)
{
my
$self
=
shift
;
my
$rtnVal
;
my
$fp
=
$self
->{FILE_PT};
if
(
$self
->{TESTED} < 0) {
my
(
$num
,
$buff
);
$self
->Purge() or
return
0
if
$self
->{NoBuffer};
my
$pos
=
$self
->{POS};
if
(
$fp
) {
# make sure we have some data after the current position
while
(
$self
->{LEN} <=
$pos
) {
$num
=
read
(
$fp
,
$buff
,
$CHUNK_SIZE
);
unless
(
$num
) {
defined
$num
or
$self
->{ERROR} = $!;
return
0;
}
${
$self
->{BUFF_PT}} .=
$buff
;
$self
->{LEN} +=
$num
;
}
# scan and read until we find the EOL (or hit EOF)
for
(;;) {
$pos
=
index
(${
$self
->{BUFF_PT}}, $/,
$pos
);
if
(
$pos
>= 0) {
$pos
+=
length
($/);
last
;
}
$pos
=
$self
->{LEN};
# have scanned to end of buffer
$num
=
read
(
$fp
,
$buff
,
$CHUNK_SIZE
);
unless
(
$num
) {
defined
$num
or
$self
->{ERROR} = $!;
last
;
}
${
$self
->{BUFF_PT}} .=
$buff
;
$self
->{LEN} +=
$num
;
}
}
else
{
# string i/o
$pos
=
index
(${
$self
->{BUFF_PT}}, $/,
$pos
);
if
(
$pos
< 0) {
$pos
=
$self
->{LEN};
$self
->{POS} =
$pos
if
$self
->{POS} >
$pos
;
}
else
{
$pos
+=
length
($/);
}
}
# read the line from our buffer
$rtnVal
=
$pos
-
$self
->{POS};
$_
[0] =
substr
(${
$self
->{BUFF_PT}},
$self
->{POS},
$rtnVal
);
$self
->{POS} =
$pos
;
}
else
{
$_
[0] = <
$fp
>;
if
(
defined
$_
[0]) {
$rtnVal
=
length
(
$_
[0]);
}
else
{
$rtnVal
= 0;
}
}
if
(
$self
->{DEBUG}) {
my
$pos
=
$self
->Tell() -
$rtnVal
;
unless
(
$self
->{DEBUG}->{
$pos
} and
$self
->{DEBUG}->{
$pos
} >
$rtnVal
) {
$self
->{DEBUG}->{
$pos
} =
$rtnVal
;
}
}
return
$rtnVal
;
}
#------------------------------------------------------------------------------
# Read whole file into buffer (without changing read pointer)
# Inputs: 0) reference to RandomAccess object
sub
Slurp($)
{
my
$self
=
shift
;
my
$fp
=
$self
->{FILE_PT} ||
return
;
# read whole file into buffer (in large chunks)
my
(
$buff
,
$num
);
for
(;;) {
$num
=
read
(
$fp
,
$buff
,
$CHUNK_SIZE
*
$SLURP_CHUNKS
);
unless
(
$num
) {
defined
$num
or
$self
->{ERROR} = $!;
last
;
}
${
$self
->{BUFF_PT}} .=
$buff
;
$self
->{LEN} +=
$num
;
}
}
#------------------------------------------------------------------------------
# Purge internal buffer [internal use only]
# Inputs: 0) reference to RandomAccess object
# Returns: 1 on success, or 0 if current buffer position is negative
# Notes: This is called only in NoBuffer mode
sub
Purge($)
{
my
$self
=
shift
;
return
1
unless
$self
->{FILE_PT};
return
0
if
$self
->{POS} < 0;
# error if we can't read from here
if
(
$self
->{POS} >
$CHUNK_SIZE
) {
my
$purge
=
$self
->{POS} - (
$self
->{POS} %
$CHUNK_SIZE
);
if
(
$purge
>=
$self
->{LEN}) {
# read up to current position in 64k chunks, discarding as we go
while
(
$self
->{POS} >
$self
->{LEN}) {
$self
->{BASE} +=
$self
->{LEN};
$self
->{POS} -=
$self
->{LEN};
${
$self
->{BUFF_PT}} =
''
;
$self
->{LEN} =
read
(
$self
->{FILE_PT}, ${
$self
->{BUFF_PT}},
$SKIP_SIZE
);
if
(not
defined
$self
->{LEN}) {
$self
->{ERROR} = $!;
last
;
}
last
if
$self
->{LEN} <
$SKIP_SIZE
;
}
}
elsif
(
$purge
> 0) {
${
$self
->{BUFF_PT}} =
substr
${
$self
->{BUFF_PT}},
$purge
;
$self
->{BASE} +=
$purge
;
$self
->{POS} -=
$purge
;
$self
->{LEN} -=
$purge
;
}
}
return
1;
}
#------------------------------------------------------------------------------
# Set binary mode
# Inputs: 0) reference to RandomAccess object
sub
BinMode($)
{
my
$self
=
shift
;
binmode
(
$self
->{FILE_PT})
if
$self
->{FILE_PT};
}
#------------------------------------------------------------------------------
# Close the file and free the buffer
# Inputs: 0) reference to RandomAccess object
sub
Close($)
{
my
$self
=
shift
;
if
(
$self
->{DEBUG}) {
local
$_
;
if
(
$self
->Seek(0,2)) {
$self
->{DEBUG}->{
$self
->Tell()} = 0;
# set EOF marker
my
$last
;
my
$tot
= 0;
my
$bad
= 0;
foreach
(
sort
{
$a
<=>
$b
}
keys
%{
$self
->{DEBUG}}) {
my
$pos
=
$_
;
my
$len
=
$self
->{DEBUG}->{
$_
};
if
(
defined
$last
and
$last
<
$pos
) {
my
$bytes
=
$pos
-
$last
;
$tot
+=
$bytes
;
$self
->Seek(
$last
);
my
$buff
;
$self
->Read(
$buff
,
$bytes
);
my
$warn
=
''
;
if
(
$buff
=~ /[^\0]/) {
$bad
+= (
$pos
-
$last
);
$warn
=
' - NON-ZERO!'
;
}
printf
"0x%.8x - 0x%.8x (%d bytes)$warn\n"
,
$last
,
$pos
,
$bytes
;
}
my
$cur
=
$pos
+
$len
;
$last
=
$cur
unless
defined
$last
and
$last
>
$cur
;
}
"$tot bytes missed"
;
$bad
and
", $bad non-zero!"
;
"\n"
;
}
else
{
warn
"File::RandomAccess DEBUG not working (file already closed?)\n"
;
}
delete
$self
->{DEBUG};
}
# close the file
if
(
$self
->{FILE_PT}) {
close
(
$self
->{FILE_PT});
delete
$self
->{FILE_PT};
}
# reset the buffer
my
$emptyBuff
=
''
;
$self
->{BUFF_PT} = \
$emptyBuff
;
$self
->{BASE} = 0;
$self
->{LEN} = 0;
$self
->{POS} = 0;
}
#------------------------------------------------------------------------------
1;
# end