Hide Show 64 lines of Pod
$Bio::SeqIO::scf::VERSION
=
'1.7.8'
;
use
vars
qw($DEFAULT_QUALITY)
;
my
$dumper
= Dumpvalue->new();
$dumper
->veryCompact(1);
BEGIN {
$DEFAULT_QUALITY
= 10;
}
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
if
( !
defined
$self
->sequence_factory ) {
$self
->sequence_factory(Bio::Seq::SeqFactory->new
(
-verbose
=>
$self
->verbose(),
-type
=>
'Bio::Seq::Quality'
));
}
binmode
$self
->_fh;
}
Hide Show 17 lines of Pod
sub
next_seq {
my
(
$self
) =
@_
;
my
(
$seq
,
$seqc
,
$fh
,
$buffer
,
$offset
,
$length
,
$read_bytes
,
@read
,
%names
);
return
if
$self
->{_readfile};
$fh
=
$self
->_fh();
unless
(
$fh
) {
if
( !
fileno
(ARGV) or
eof
(ARGV) ) {
return
unless
my
$ARGV
=
shift
;
open
(ARGV,
$ARGV
) or
$self
->throw(
"Could not open $ARGV for SCF stream reading $!"
);
}
$fh
= \
*ARGV
;
}
return
unless
read
$fh
,
$buffer
, 128;
my
$creator
;
$creator
->{header} =
$self
->_get_header(
$buffer
);
if
(
$creator
->{header}->{
'version'
} lt
"3.00"
) {
$self
->debug(
"scf.pm is working with a version 2 scf.\n"
);
$length
=
$creator
->{header}->{
'samples'
} *
$creator
->{header}->{sample_size}*4;
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
,
$creator
->{header}->{samples_offset});
$creator
->{traces} =
$self
->_parse_v2_traces(
$buffer
,
$creator
->{header}->{sample_size});
$offset
=
$creator
->{header}->{bases_offset};
$length
= (
$creator
->{header}->{bases} * 12);
seek
$fh
,
$offset
,0;
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
,
$creator
->{header}->{bases_offset});
(
$creator
->{peak_indices},
$creator
->{qualities},
$creator
->{sequence},
$creator
->{accuracies}) =
$self
->_parse_v2_bases(
$buffer
);
}
else
{
$self
->debug(
"scf.pm is working with a version 3+ scf.\n"
);
my
$transformed_read
;
my
$current_read_position
=
$creator
->{header}->{sample_offset};
$length
=
$creator
->{header}->{
'samples'
}*
$creator
->{header}->{sample_size};
foreach
(
qw(a c g t)
) {
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
,
$current_read_position
);
my
$byte
=
"n"
;
if
(
$creator
->{header}->{sample_size} == 1) {
$byte
=
"c"
;
}
@read
=
unpack
"${byte}${length}"
,
$buffer
;
foreach
(
@read
) {
if
(
$_
> 30000) {
$_
-= 65536;
}
}
$transformed_read
=
$self
->_delta(\
@read
,
"backward"
);
if
(
$creator
->{header}->{sample_size} == 1) {
foreach
(@{
$transformed_read
}) {
$_
+= 256
if
(
$_
< 0);
}
}
$current_read_position
+=
$length
;
$creator
->{
'traces'
}->{
$_
} =
join
(
' '
,@{
$transformed_read
});
}
$offset
=
$creator
->{header}->{bases_offset};
$length
= (
$creator
->{header}->{bases} * 4);
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
,
$offset
);
$creator
->{peak_indices} =
$self
->_get_v3_peak_indices(
$buffer
);
$offset
+=
$length
;
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
,
$offset
);
$creator
->{accuracies} =
$self
->_get_v3_base_accuracies(
$buffer
);
$offset
+=
$length
;
$length
=
$creator
->{header}->{bases};
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
,
$offset
);
$creator
->{
'sequence'
} =
unpack
(
"a$length"
,
$buffer
);
$creator
->{qualities} =
$self
->_get_v3_quality(
$creator
->{
'sequence'
},
$creator
->{accuracies});
}
$offset
=
$creator
->{header}->{comments_offset};
seek
$fh
,
$offset
,0;
$length
=
$creator
->{header}->{comment_size};
$buffer
=
$self
->read_from_buffer(
$fh
,
$buffer
,
$length
);
$creator
->{comments} =
$self
->_get_comments(
$buffer
);
my
@name_comments
=
grep
{
$_
->tagname() eq
'NAME'
}
$creator
->{comments}->get_Annotations(
'comment'
);
my
$name_comment
;
if
(
@name_comments
){
$name_comment
=
$name_comments
[0]->as_text();
$name_comment
=~ s/^Comment:\s+//;
}
my
$swq
= Bio::Seq::Quality->new(
-seq
=>
$creator
->{
'sequence'
},
-qual
=>
$creator
->{
'qualities'
},
-id
=>
$name_comment
);
my
$returner
= Bio::Seq::SequenceTrace->new(
-swq
=>
$swq
,
-trace_a
=>
$creator
->{
'traces'
}->{
'a'
},
-trace_t
=>
$creator
->{
'traces'
}->{
't'
},
-trace_g
=>
$creator
->{
'traces'
}->{
'g'
},
-trace_c
=>
$creator
->{
'traces'
}->{
'c'
},
-accuracy_a
=>
$creator
->{
'accuracies'
}->{
'a'
},
-accuracy_t
=>
$creator
->{
'accuracies'
}->{
't'
},
-accuracy_g
=>
$creator
->{
'accuracies'
}->{
'g'
},
-accuracy_c
=>
$creator
->{
'accuracies'
}->{
'c'
},
-peak_indices
=>
$creator
->{
'peak_indices'
}
);
$returner
->annotation(
$creator
->{
'comments'
});
$self
->{
'_readfile'
} = 1;
return
$returner
;
}
Hide Show 11 lines of Pod
sub
_get_v3_quality {
my
(
$self
,
$sequence
,
$accuracies
) =
@_
;
my
@bases
=
split
//,
$sequence
;
my
(
@qualities
,
$currbase
,
$currqual
,
$counter
);
for
(
$counter
=0;
$counter
<=
$#bases
;
$counter
++) {
$currbase
=
lc
(
$bases
[
$counter
]);
if
(
$currbase
eq
"a"
) {
$currqual
=
$accuracies
->{
'a'
}->[
$counter
]; }
elsif
(
$currbase
eq
"c"
) {
$currqual
=
$accuracies
->{
'c'
}->[
$counter
]; }
elsif
(
$currbase
eq
"g"
) {
$currqual
=
$accuracies
->{
'g'
}->[
$counter
]; }
elsif
(
$currbase
eq
"t"
) {
$currqual
=
$accuracies
->{
't'
}->[
$counter
]; }
else
{
$currqual
=
"unknown"
; }
push
@qualities
,
$currqual
;
}
return
\
@qualities
;
}
Hide Show 11 lines of Pod
sub
_get_v3_peak_indices {
my
(
$self
,
$buffer
) =
@_
;
my
$length
=
length
(
$buffer
);
my
@read
=
unpack
"N$length"
,
$buffer
;
return
join
(
' '
,
@read
);
}
Hide Show 11 lines of Pod
sub
_get_v3_base_accuracies {
my
(
$self
,
$buffer
) =
@_
;
my
$length
=
length
(
$buffer
);
my
$qlength
=
$length
/4;
my
$offset
= 0;
my
(
@qualities
,
@sorter
,
$counter
,
$round
,
$last_base
,
$accuracies
,
$currbase
);
foreach
$currbase
(
qw(a c g t)
) {
my
@read
;
$last_base
=
$offset
+
$qlength
;
for
(;
$offset
<
$last_base
;
$offset
+=
$qlength
) {
@read
=
unpack
"C$qlength"
,
substr
(
$buffer
,
$offset
,
$qlength
);
$accuracies
->{
$currbase
} = \
@read
;
}
}
return
$accuracies
;
}
Hide Show 14 lines of Pod
sub
_get_comments {
my
(
$self
,
$buffer
) =
@_
;
my
$comments
= Bio::Annotation::Collection->new();
my
$size
=
length
(
$buffer
);
my
$comments_retrieved
=
unpack
"a$size"
,
$buffer
;
$comments_retrieved
=~ s/\0//;
my
@comments_split
=
split
/\n/,
$comments_retrieved
;
if
(
@comments_split
) {
foreach
(
@comments_split
) {
/(\w+)=(.*)/;
if
($1 && $2) {
my
(
$tagname
,
$text
) = ($1, $2);
my
$comment_obj
= Bio::Annotation::Comment->new(
-text
=>
$text
,
-tagname
=>
$tagname
);
$comments
->add_Annotation(
'comment'
,
$comment_obj
);
}
}
}
$self
->{
'comments'
} =
$comments
;
return
$comments
;
}
Hide Show 14 lines of Pod
sub
_get_header {
my
(
$self
,
$buffer
) =
@_
;
my
$header
;
(
$header
->{
'scf'
},
$header
->{
'samples'
},
$header
->{
'sample_offset'
},
$header
->{
'bases'
},
$header
->{
'bases_left_clip'
},
$header
->{
'bases_right_clip'
},
$header
->{
'bases_offset'
},
$header
->{
'comment_size'
},
$header
->{
'comments_offset'
},
$header
->{
'version'
},
$header
->{
'sample_size'
},
$header
->{
'code_set'
},
@{
$header
->{
'header_spare'
}} ) =
unpack
"a4 NNNNNNNN a4 NN N20"
,
$buffer
;
$self
->{
'header'
} =
$header
;
return
$header
;
}
Hide Show 14 lines of Pod
sub
_parse_v2_bases {
my
(
$self
,
$buffer
) =
@_
;
my
$length
=
length
(
$buffer
);
my
(
$offset2
,
$currbuff
,
$currbase
,
$currqual
,
$sequence
,
@qualities
,
@indices
);
my
(
@read
,
$harvester
,
$accuracies
);
for
(
$offset2
=0;
$offset2
<
$length
;
$offset2
+=12) {
@read
=
unpack
"N C C C C a C3"
,
substr
(
$buffer
,
$offset2
,
$length
);
push
@indices
,
$read
[0];
$currbase
=
lc
(
$read
[5]);
if
(
$currbase
eq
"a"
) {
$currqual
=
$read
[1]; }
elsif
(
$currbase
eq
"c"
) {
$currqual
=
$read
[2]; }
elsif
(
$currbase
eq
"g"
) {
$currqual
=
$read
[3]; }
elsif
(
$currbase
eq
"t"
) {
$currqual
=
$read
[4]; }
else
{
$currqual
=
"UNKNOWN"
; }
push
@{
$accuracies
->{
"a"
}},
$read
[1];
push
@{
$accuracies
->{
"c"
}},
$read
[2];
push
@{
$accuracies
->{
"g"
}},
$read
[3];
push
@{
$accuracies
->{
"t"
}},
$read
[4];
$sequence
.=
$currbase
;
push
@qualities
,
$currqual
;
}
return
(\
@indices
,\
@qualities
,
$sequence
,
$accuracies
)
}
Hide Show 11 lines of Pod
sub
_parse_v2_traces {
my
(
$self
,
$buffer
,
$sample_size
) =
@_
;
my
$byte
;
if
(
$sample_size
== 1) {
$byte
=
"c"
; }
else
{
$byte
=
"n"
; }
my
$length
= CORE::
length
(
$buffer
);
my
@read
=
unpack
"${byte}${length}"
,
$buffer
;
my
$traces
;
my
$array
= 0;
for
(
my
$offset2
= 0;
$offset2
<
scalar
(
@read
);
$offset2
+=4) {
push
@{
$traces
->{
'a'
}},
$read
[
$offset2
];
push
@{
$traces
->{
'c'
}},
$read
[
$offset2
+1];
push
@{
$traces
->{
'g'
}},
$read
[
$offset2
+3];
push
@{
$traces
->{
't'
}},
$read
[
$offset2
+2];
}
return
$traces
;
}
sub
get_trace_deprecated_use_the_sequencetrace_object_instead {
}
sub
_deprecated_get_peak_indices_deprecated_use_the_sequencetrace_object_instead {
my
(
$self
) =
shift
;
my
@temp
=
split
(
' '
,
$self
->{
'parsed'
}->{
'peak_indices'
});
return
\
@temp
;
}
Hide Show 11 lines of Pod
sub
get_header {
my
(
$self
) =
shift
;
return
$self
->{
'header'
};
}
Hide Show 11 lines of Pod
sub
get_comments {
my
(
$self
) =
shift
;
return
$self
->{
'comments'
};
}
sub
_dump_traces_outgoing_deprecated_use_the_sequencetrace_object {
my
(
$self
,
$transformed
) =
@_
;
my
(
@sA
,
@sT
,
@sG
,
@sC
);
if
(
$transformed
) {
@sA
= @{
$self
->{
'text'
}->{
't_samples_a'
}};
@sC
= @{
$self
->{
'text'
}->{
't_samples_c'
}};
@sG
= @{
$self
->{
'text'
}->{
't_samples_g'
}};
@sT
= @{
$self
->{
'text'
}->{
't_samples_t'
}};
}
else
{
@sA
= @{
$self
->{
'text'
}->{
'samples_a'
}};
@sC
= @{
$self
->{
'text'
}->{
'samples_c'
}};
@sG
= @{
$self
->{
'text'
}->{
'samples_g'
}};
@sT
= @{
$self
->{
'text'
}->{
'samples_t'
}};
}
print
(
"Count\ta\tc\tg\tt\n"
);
for
(
my
$curr
=0;
$curr
<
scalar
(
@sG
);
$curr
++) {
print
(
"$curr\t$sA[$curr]\t$sC[$curr]\t$sG[$curr]\t$sT[$curr]\n"
);
}
return
;
}
sub
_dump_traces_incoming_deprecated_use_the_sequencetrace_object {
}
Hide Show 30 lines of Pod
sub
write_seq {
my
(
$self
,
%args
) =
@_
;
my
%comments
;
my
(
$label
,
$arg
);
my
(
$swq
) =
$self
->_rearrange([
qw(TARGET)
],
%args
);
my
$writer_fodder
;
if
(
ref
(
$swq
) =~ /Bio::Seq::SequenceTrace|Bio::Seq::Quality/) {
if
(
ref
(
$swq
) eq
"Bio::Seq::Quality"
) {
$swq
= Bio::Seq::SequenceTrace->new(
-swq
=>
$swq
);
}
}
else
{
$self
->throw(
"You must pass a Bio::Seq::Quality or a Bio::Seq::SequenceTrace object to write_seq as a parameter named \"target\""
);
}
foreach
$arg
(
sort
keys
%args
) {
next
if
(
$arg
=~ /target/i);
(
$label
=
$arg
) =~ s/^\-//;
$writer_fodder
->{comments}->{
$label
} =
$args
{
$arg
};
}
if
(!
$comments
{
'NAME'
}) {
$comments
{
'NAME'
} =
$swq
->id(); }
$writer_fodder
->{comments}->{
'CONV'
} =
"Bioperl-Chads Mighty SCF writer."
unless
defined
$comments
{
'CONV'
};
if
(
$writer_fodder
->{comments}->{version}) {
if
(
$writer_fodder
->{comments}->{version} != 2 &&
$writer_fodder
->{comments}->{version} != 3) {
$self
->
warn
(
"This module can only write version 2.0 or 3.0 scf's. Writing a version 2.0 scf by default."
);
$writer_fodder
->{header}->{version} =
"2.00"
;
}
elsif
(
$writer_fodder
->{comments}->{
'version'
} > 2) {
$writer_fodder
->{header}->{
'version'
} =
"3.00"
;
}
else
{
$writer_fodder
->{header}->{version} =
"2"
;
}
}
else
{
$writer_fodder
->{header}->{
'version'
} =
"3.00"
;
}
$writer_fodder
->{
'header'
}->{
'magic'
} =
".scf"
;
$writer_fodder
->{
'header'
}->{
'sample_size'
} =
"2"
;
$writer_fodder
->{
'header'
}->{
'bases'
} =
length
(
$swq
->seq());
$writer_fodder
->{
'header'
}->{
'bases_left_clip'
} =
"0"
;
$writer_fodder
->{
'header'
}->{
'bases_right_clip'
} =
"0"
;
$writer_fodder
->{
'header'
}->{
'sample_size'
} =
"2"
;
$writer_fodder
->{
'header'
}->{
'code_set'
} =
"9"
;
@{
$writer_fodder
->{
'header'
}->{
'spare'
}} =
qw(0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0)
;
$writer_fodder
->{
'header'
}->{
'samples_offset'
} =
"128"
;
$writer_fodder
->{
'header'
}->{
'samples'
} =
$swq
->trace_length();
$writer_fodder
->{comments} =
$self
->_get_binary_comments(
$writer_fodder
->{comments});
$writer_fodder
->{traces} =
$self
->_get_binary_traces(
$writer_fodder
->{
'header'
}->{
'version'
},
$swq
,
$writer_fodder
->{
'header'
}->{
'sample_size'
});
my
(
$b_base_offsets
,
$b_base_accuracies
,
$samples_size
,
$bases_size
);
if
(
$writer_fodder
->{
'header'
}->{
'version'
} == 2) {
$writer_fodder
->{bases} =
$self
->_get_binary_bases(
2,
$swq
,
$writer_fodder
->{
'header'
}->{
'sample_size'
});
$samples_size
= CORE::
length
(
$writer_fodder
->{traces}->{
'binary'
});
$bases_size
= CORE::
length
(
$writer_fodder
->{bases}->{binary});
$writer_fodder
->{
'header'
}->{
'bases_offset'
} = 128 +
$samples_size
;
$writer_fodder
->{
'header'
}->{
'comments_offset'
} = 128 +
$samples_size
+
$bases_size
;
$writer_fodder
->{
'header'
}->{
'comments_size'
} =
length
(
$writer_fodder
->{
'comments'
}->{binary});
$writer_fodder
->{
'header'
}->{
'private_size'
} =
"0"
;
$writer_fodder
->{
'header'
}->{
'private_offset'
} = 128 +
$samples_size
+
$bases_size
+
$writer_fodder
->{
'header'
}->{
'comments_size'
};
$writer_fodder
->{
'header'
}->{
'binary'
} =
$self
->_get_binary_header(
$writer_fodder
->{header});
$dumper
->dumpValue(
$writer_fodder
)
if
$self
->verbose > 0;
$self
->_print (
$writer_fodder
->{
'header'
}->{
'binary'
})
or
print
(
"Could not write binary header...\n"
);
$self
->_print (
$writer_fodder
->{
'traces'
}->{
'binary'
})
or
print
(
"Could not write binary traces...\n"
);
$self
->_print (
$writer_fodder
->{
'bases'
}->{
'binary'
})
or
print
(
"Could not write binary base structures...\n"
);
$self
->_print (
$writer_fodder
->{
'comments'
}->{
'binary'
})
or
print
(
"Could not write binary comments...\n"
);
}
else
{
(
$writer_fodder
->{peak_indices},
$writer_fodder
->{accuracies},
$writer_fodder
->{bases},
$writer_fodder
->{reserved} ) =
$self
->_get_binary_bases(
3,
$swq
,
$writer_fodder
->{
'header'
}->{
'sample_size'
}
);
$writer_fodder
->{
'header'
}->{
'bases_offset'
} = 128 +
length
(
$writer_fodder
->{
'traces'
}->{
'binary'
});
$writer_fodder
->{
'header'
}->{
'comments_size'
} =
length
(
$writer_fodder
->{
'comments'
}->{
'binary'
});
$writer_fodder
->{
'header'
}->{
'private_size'
} =
"0"
;
$writer_fodder
->{
'header'
}->{
'comments_offset'
} =
128+
length
(
$writer_fodder
->{
'traces'
}->{
'binary'
})+
length
(
$writer_fodder
->{
'peak_indices'
}->{
'binary'
})+
length
(
$writer_fodder
->{
'accuracies'
}->{
'binary'
})+
length
(
$writer_fodder
->{
'bases'
}->{
'binary'
})+
length
(
$writer_fodder
->{
'reserved'
}->{
'binary'
});
$writer_fodder
->{
'header'
}->{
'private_offset'
} =
$writer_fodder
->{
'header'
}->{
'comments_offset'
} +
$writer_fodder
->{
'header'
}->{
'comments_size'
};
$writer_fodder
->{
'header'
}->{
'spare'
}->[1] =
$writer_fodder
->{
'header'
}->{
'comments_offset'
} +
length
(
$writer_fodder
->{
'comments'
}->{
'binary'
});
$writer_fodder
->{header}->{binary} =
$self
->_get_binary_header(
$writer_fodder
->{header});
$self
->_print (
$writer_fodder
->{
'header'
}->{
'binary'
})
or
print
(
"Couldn't write header\n"
);
$self
->_print (
$writer_fodder
->{
'traces'
}->{
'binary'
})
or
print
(
"Couldn't write samples\n"
);
$self
->_print (
$writer_fodder
->{
'peak_indices'
}->{
'binary'
})
or
print
(
"Couldn't write peak offsets\n"
);
$self
->_print (
$writer_fodder
->{
'accuracies'
}->{
'binary'
})
or
print
(
"Couldn't write accuracies\n"
);
$self
->_print (
$writer_fodder
->{
'bases'
}->{
'binary'
})
or
print
(
"Couldn't write called_bases\n"
);
$self
->_print (
$writer_fodder
->{
'reserved'
}->{
'binary'
})
or
print
(
"Couldn't write reserved\n"
);
$self
->_print (
$writer_fodder
->{
'comments'
}->{
'binary'
})
or
print
(
"Couldn't write comments\n"
);
}
$self
->flush
if
$self
->_flush_on_write &&
defined
$self
->_fh;
$self
->
close
();
return
1;
}
Hide Show 13 lines of Pod
sub
_get_binary_header {
my
(
$self
,
$header
) =
@_
;
my
$binary
=
pack
"a4 NNNNNNNN a4 NN N20"
,
(
$header
->{
'magic'
},
$header
->{
'samples'
},
$header
->{
'samples_offset'
},
$header
->{
'bases'
},
$header
->{
'bases_left_clip'
},
$header
->{
'bases_right_clip'
},
$header
->{
'bases_offset'
},
$header
->{
'comments_size'
},
$header
->{
'comments_offset'
},
$header
->{
'version'
},
$header
->{
'sample_size'
},
$header
->{
'code_set'
},
@{
$header
->{
'spare'
}}
);
return
$binary
;
}
Hide Show 14 lines of Pod
sub
_get_binary_traces {
my
(
$self
,
$version
,
$ref
,
$sample_size
) =
@_
;
my
$returner
;
my
$sequence
=
$ref
->seq();
my
$sequence_length
=
length
(
$sequence
);
my
(
$traceobj
,
@traces
,
$current
);
if
(
ref
(
$ref
) eq
"Bio::Seq::Quality"
) {
$traceobj
= Bio::Seq::Quality->new(
-target
=>
$ref
);
$traceobj
->_synthesize_traces();
}
else
{
$traceobj
=
$ref
;
if
(
$version
eq
"2"
) {
my
$trace_length
=
$traceobj
->trace_length();
for
(
$current
= 1;
$current
<=
$trace_length
;
$current
++) {
foreach
(
qw(a c g t)
) {
push
@traces
,
$traceobj
->trace_value_at(
$_
,
$current
);
}
}
}
elsif
(
$version
== 3) {
foreach
my
$current_trace
(
qw(a c g t)
) {
my
@trace
= @{
$traceobj
->trace(
$current_trace
)};
foreach
(
@trace
) {
if
(
$_
> 30000) {
$_
-= 65536;
}
}
my
$transformed
=
$self
->_delta(\
@trace
,
"forward"
);
if
(
$sample_size
== 1){
foreach
(@{
$transformed
}) {
$_
+= 256
if
(
$_
< 0);
}
}
push
@traces
,@{
$transformed
};
}
}
}
$returner
->{version} =
$version
;
$returner
->{string} = \
@traces
;
my
$length_of_traces
=
scalar
(
@traces
);
my
$byte
;
if
(
$sample_size
== 1) {
$byte
=
"c"
; }
else
{
$byte
=
"n"
; }
$returner
->{binary} =
pack
"n${length_of_traces}"
,
@traces
;
$returner
->{
length
} = CORE::
length
(
$returner
->{binary});
return
$returner
;
}
sub
_get_binary_bases {
my
(
$self
,
$version
,
$trace
,
$sample_size
) =
@_
;
my
$byte
;
if
(
$sample_size
== 1) {
$byte
=
"c"
; }
else
{
$byte
=
"n"
; }
my
(
$returner
,
@current_row
,
$current_base
,
$string
,
$binary
);
my
$length
=
$trace
->
length
();
if
(
$version
== 2) {
$returner
->{
'version'
} =
"2"
;
for
(
my
$current_base
=1;
$current_base
<=
$length
;
$current_base
++) {
my
@current_row
;
push
@current_row
,
$trace
->peak_index_at(
$current_base
);
push
@current_row
,
$trace
->accuracy_at(
"a"
,
$current_base
);
push
@current_row
,
$trace
->accuracy_at(
"c"
,
$current_base
);
push
@current_row
,
$trace
->accuracy_at(
"g"
,
$current_base
);
push
@current_row
,
$trace
->accuracy_at(
"t"
,
$current_base
);
push
@current_row
,
$trace
->baseat(
$current_base
);
push
@current_row
,0,0,0;
push
@{
$returner
->{string}},
@current_row
;
$returner
->{binary} .=
pack
"N C C C C a C3"
,
@current_row
;
}
return
$returner
;
}
else
{
$returner
->{
'version'
} =
"3.00"
;
$returner
->{peak_indices}->{string} =
$trace
->peak_indices();
my
$length
=
scalar
(@{
$returner
->{peak_indices}->{string}});
$returner
->{peak_indices}->{binary} =
pack
"N$length"
,@{
$returner
->{peak_indices}->{string}};
$returner
->{peak_indices}->{
length
} =
CORE::
length
(
$returner
->{peak_indices}->{binary});
my
@accuracies
;
foreach
my
$base
(
qw(a c g t)
) {
$returner
->{accuracies}->{
$base
} =
$trace
->accuracies(
$base
);
push
@accuracies
,@{
$trace
->accuracies(
$base
)};
}
$returner
->{sequence} =
$trace
->seq();
$length
=
scalar
(
@accuracies
);
$returner
->{accuracies}->{binary} =
pack
"C${length}"
,
@accuracies
;
$returner
->{accuracies}->{
length
} =
CORE::
length
(
$returner
->{accuracies}->{binary});
$length
=
$trace
->seq_obj()->
length
();
for
(
my
$count
=0;
$count
<
$length
;
$count
++) {
push
@{
$returner
->{reserved}->{string}},0,0,0;
}
}
$length
=
scalar
(@{
$returner
->{reserved}->{string}});
$returner
->{
'reserved'
}->{
'binary'
} =
pack
"c$length"
,@{
$returner
->{reserved}->{string}};
$returner
->{
'reserved'
}->{
'length'
} =
CORE::
length
(
$returner
->{
'reserved'
}->{
'binary'
});
my
@bases
=
split
(
''
,
$trace
->seq());
$length
=
$trace
->
length
();
$returner
->{
'bases'
}->{
'binary'
} =
$trace
->seq();
return
(
$returner
->{peak_indices},
$returner
->{accuracies},
$returner
->{bases},
$returner
->{reserved});
}
Hide Show 12 lines of Pod
sub
_make_trace_string {
my
(
$self
,
$version
) =
@_
;
my
@traces
;
my
@traces_view
;
my
@as
= @{
$self
->{
'text'
}->{
'samples_a'
}};
my
@cs
= @{
$self
->{
'text'
}->{
'samples_c'
}};
my
@gs
= @{
$self
->{
'text'
}->{
'samples_g'
}};
my
@ts
= @{
$self
->{
'text'
}->{
'samples_t'
}};
if
(
$version
== 2) {
for
(
my
$curr
=0;
$curr
<
scalar
(
@as
);
$curr
++) {
$as
[
$curr
] =
$DEFAULT_QUALITY
unless
defined
$as
[
$curr
];
$cs
[
$curr
] =
$DEFAULT_QUALITY
unless
defined
$cs
[
$curr
];
$gs
[
$curr
] =
$DEFAULT_QUALITY
unless
defined
$gs
[
$curr
];
$ts
[
$curr
] =
$DEFAULT_QUALITY
unless
defined
$ts
[
$curr
];
push
@traces
,(
$as
[
$curr
],
$cs
[
$curr
],
$gs
[
$curr
],
$ts
[
$curr
]);
}
}
elsif
(
$version
== 3) {
@traces
= (
@as
,
@cs
,
@gs
,
@ts
);
}
else
{
$self
->throw(
"No idea what version required to make traces here. You gave #$version# Bailing."
);
}
my
$length
=
scalar
(
@traces
);
$self
->{
'text'
}->{
'samples_all'
} = \
@traces
;
}
Hide Show 14 lines of Pod
sub
_get_binary_comments {
my
(
$self
,
$rcomments
) =
@_
;
my
$returner
;
my
$comments_string
=
''
;
my
%comments
=
%$rcomments
;
foreach
my
$key
(
sort
keys
%comments
) {
$comments
{
$key
} ||=
''
;
$comments_string
.=
"$key=$comments{$key}\n"
;
}
$comments_string
.=
"\n\0"
;
my
$length
= CORE::
length
(
$comments_string
);
$returner
->{
length
} =
$length
;
$returner
->{string} =
$comments_string
;
$returner
->{binary} =
pack
"A$length"
,
$comments_string
;
return
$returner
;
}
Hide Show 14 lines of Pod
sub
_delta {
my
(
$self
,
$rsamples
,
$direction
) =
@_
;
my
@samples
=
@$rsamples
;
my
(
$i
,
$num_samples
,
$p_delta
,
$p_sample
,
@samples_converted
,
$p_sample1
,
$p_sample2
);
my
$SLOW_BUT_CLEAR
= 0;
$num_samples
=
scalar
(
@samples
);
if
(
$direction
eq
"forward"
) {
if
(
$SLOW_BUT_CLEAR
){
$p_delta
= 0;
for
(
$i
=0;
$i
<
$num_samples
;
$i
++) {
$p_sample
=
$samples
[
$i
];
$samples
[
$i
] =
$samples
[
$i
] -
$p_delta
;
$p_delta
=
$p_sample
;
}
$p_delta
= 0;
for
(
$i
=0;
$i
<
$num_samples
;
$i
++) {
$p_sample
=
$samples
[
$i
];
$samples
[
$i
] =
$samples
[
$i
] -
$p_delta
;
$p_delta
=
$p_sample
;
}
}
else
{
for
(
$i
=
$num_samples
-1;
$i
> 1;
$i
--){
$samples
[
$i
] =
$samples
[
$i
] - 2
*$samples
[
$i
-1] +
$samples
[
$i
-2];
}
$samples
[1] =
$samples
[1] - 2
*$samples
[0];
}
}
elsif
(
$direction
eq
"backward"
) {
if
(
$SLOW_BUT_CLEAR
){
$p_sample
= 0;
for
(
$i
=0;
$i
<
$num_samples
;
$i
++) {
$samples
[
$i
] =
$samples
[
$i
] +
$p_sample
;
$p_sample
=
$samples
[
$i
];
}
$p_sample
= 0;
for
(
$i
=0;
$i
<
$num_samples
;
$i
++) {
$samples
[
$i
] =
$samples
[
$i
] +
$p_sample
;
$p_sample
=
$samples
[
$i
];
}
}
else
{
$p_sample1
=
$p_sample2
= 0;
for
(
$i
= 0;
$i
<
$num_samples
;
$i
++){
$p_sample1
=
$p_sample1
+
$samples
[
$i
];
$samples
[
$i
] =
$p_sample1
+
$p_sample2
;
$p_sample2
=
$samples
[
$i
];
}
}
}
else
{
$self
->
warn
(
"Bad direction. Use \"forward\" or \"backward\"."
);
}
return
\
@samples
;
}
Hide Show 12 lines of Pod
sub
_unpack_magik {
my
(
$self
,
$buffer
) =
@_
;
my
$length
=
length
(
$buffer
);
my
(
@read
,
$counter
);
foreach
(
qw(c C s S i I l L n N v V)
) {
@read
=
unpack
"$_$length"
,
$buffer
;
for
(
$counter
=0;
$counter
< 20;
$counter
++) {
print
(
"$read[$counter]\n"
);
}
}
}
Hide Show 12 lines of Pod
sub
read_from_buffer {
my
(
$self
,
$fh
,
$buffer
,
$length
,
$start_position
) =
@_
;
if
(
$start_position
) {
}
if
(
$start_position
) {
seek
(
$fh
,
$start_position
,0);
}
else
{
}
read
$fh
,
$buffer
,
$length
;
unless
(
length
(
$buffer
) ==
$length
) {
$self
->
warn
(
"The read was incomplete! Trying harder."
);
my
$missing_length
=
$length
-
length
(
$buffer
);
my
$buffer2
;
read
$fh
,
$buffer2
,
$missing_length
;
$buffer
.=
$buffer2
;
if
(
length
(
$buffer
) !=
$length
) {
$self
->throw(
"Unexpected end of file while reading from SCF file. I should have read $length but instead got "
.
length
(
$buffer
).
"! Current file position is "
.
tell
(
$fh
).
"."
);
}
}
return
$buffer
;
}
Hide Show 11 lines of Pod
sub
_dump_keys {
my
$rhash
=
shift
;
if
(
$rhash
!~ /HASH/) {
print
(
"_dump_keys: that was not a hash.\nIt was #$rhash# which was this reference:"
.
ref
(
$rhash
).
"\n"
);
return
;
}
print
(
"_dump_keys: The keys for $rhash are:\n"
);
foreach
(
sort
keys
%$rhash
) {
print
(
"$_\n"
);
}
}
Hide Show 11 lines of Pod
sub
_dump_base_accuracies {
my
$self
=
shift
;
print
(
"Dumping base accuracies! for v3\n"
);
print
(
"There are this many elements in a,c,g,t:\n"
);
print
(
scalar
(@{
$self
->{
'text'
}->{
'v3_base_accuracy_a'
}}).
","
.
scalar
(@{
$self
->{
'text'
}->{
'v3_base_accuracy_c'
}}).
","
.
scalar
(@{
$self
->{
'text'
}->{
'v3_base_accuracy_g'
}}).
","
.
scalar
(@{
$self
->{
'text'
}->{
'v3_base_accuracy_t'
}}).
"\n"
);
my
$number_traces
=
scalar
(@{
$self
->{
'text'
}->{
'v3_base_accuracy_a'
}});
for
(
my
$counter
=0;
$counter
<
$number_traces
;
$counter
++ ) {
print
(
"$counter\t"
);
print
$self
->{
'text'
}->{
'v3_base_accuracy_a'
}->[
$counter
].
"\t"
;
print
$self
->{
'text'
}->{
'v3_base_accuracy_c'
}->[
$counter
].
"\t"
;
print
$self
->{
'text'
}->{
'v3_base_accuracy_g'
}->[
$counter
].
"\t"
;
print
$self
->{
'text'
}->{
'v3_base_accuracy_t'
}->[
$counter
].
"\t"
;
print
(
"\n"
);
}
}
Hide Show 11 lines of Pod
sub
_dump_peak_indices_incoming {
my
$self
=
shift
;
print
(
"Dump peak indices incoming!\n"
);
my
$length
=
$self
->{
'bases'
};
print
(
"The length is $length\n"
);
for
(
my
$count
=0;
$count
<
$length
;
$count
++) {
print
(
"$count\t$self->{parsed}->{peak_indices}->[$count]\n"
);
}
}
Hide Show 11 lines of Pod
sub
_dump_base_accuracies_incoming {
my
$self
=
shift
;
print
(
"Dumping base accuracies! for v3\n"
);
my
$number_traces
=
$self
->{
'bases'
};
for
(
my
$counter
=0;
$counter
<
$number_traces
;
$counter
++ ) {
print
(
"$counter\t"
);
foreach
(
qw(A T G C)
) {
print
$self
->{
'parsed'
}->{
'base_accuracies'
}->{
$_
}->[
$counter
].
"\t"
;
}
print
(
"\n"
);
}
}
Hide Show 11 lines of Pod
sub
_dump_comments {
my
(
$self
) =
@_
;
warn
(
"SCF comments:\n"
);
foreach
my
$k
(
keys
%{
$self
->{
'comments'
}}) {
warn
(
"\t {$k} ==> "
,
$self
->{
'comments'
}->{
$k
},
"\n"
);
}
}
1;