package Bio::CUA::CodonTable;

=pod

=head1 NAME

Bio::CUA::CodonTable -- A package processing genetic codon table

=head1 SYNOPSIS

This package is provided to improve portability of
L<http://search.cpan.org/dist/Bio-CUA/>, in case that one may not 
install L<BioPerl/http://www.bioperl.org/> which includes huge number 
of modules.

The package obtains genetic code tables from NCBI at
L<http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes>

examples:

	# get the standard genetic code
    my $table = Bio::CUA::CodonTable->new(-id => 1)

	# get table from an input file if know genetic codes can not
	# satisfy the need.
	my $table = Bio::CUA::CodonTable->new(-map_file =>
	'codon_to_aa.tsv')
	# in 'codon_to_aa.tsv', it looks like this
	# GCU	A
	# AAU	N
	# CAU	H
	# ...   ...

=cut

use 5.006;
use strict;
use warnings;
use parent qw/Bio::CUA/;

# global variables
my $pkg = __PACKAGE__;
my $STOPAA = '*';
my %validGCIds = map { $_ => 1 } (1..6,9..14,16,21..25); # in future this can be derived
# from data section at the end

=head2 new

 Title   : new
 Usage   : $obj = Bio::CUA::CodonTable->new(-map_file => 'file');
 Function: creat an object for processing genetic codon tables
 Returns : an object of L<Bio::CUA::CodonTable>
 Args    : a hash with following keys:

=over 4

=item -id

 genetic code id. The id follows NCBI's standard, here are
 the list:
  1. The Standard Code
  2. The Vertebrate Mitochondrial Code
  3. The Yeast Mitochondrial Code
  4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and
     the Mycoplasma/Spiroplasma Code
  5. The Invertebrate Mitochondrial Code
  6. The Ciliate, Dasycladacean and Hexamita Nuclear Code
  9. The Echinoderm and Flatworm Mitochondrial Code
  10. The Euplotid Nuclear Code
  11. The Bacterial, Archaeal and Plant Plastid Code
  12. The Alternative Yeast Nuclear Code
  13. The Ascidian Mitochondrial Code
  14. The Alternative Flatworm Mitochondrial Code
  16. Chlorophycean Mitochondrial Code
  21. Trematode Mitochondrial Code
  22. Scenedesmus obliquus Mitochondrial Code
  23. Thraustochytrium Mitochondrial Code
  24. Pterobranchia Mitochondrial Code
  25. Candidate Division SR1 and Gracilibacteria Code
  see
  L<http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=tgencodes#SG1>
  for more details.

=item -map_file

 -map_file = a file containing a mapping between codons to amino
 acids, one codon per line followed by its amino acid, separated by
 tab or space.

=item -debug

 a switch to indicate whether to show more warnings which may
 help to identify sources of errors if any. put 1 to switch
 it on. The default is off.

=back

  Note: argument -map_file has higher priority than -id, and the
  default is -id => 1, i.e., the standard genetic code

=cut

sub new
{
	my ($caller, @args) = @_;

	my $self = $caller->SUPER::new(@args);

	my $hash = $self->_array_to_hash(\@args);

	if($hash->{'map_file'})
	{
		$self->_build_table_by_file($hash->{'map_file'});
	}elsif($hash->{'id'})
	{
		$self->_build_table_by_id($hash->{'id'});
	}else
	{
		$self->warn("No arguments -map_file or -id is provided in",
			"$pkg, -id => 1 will be used") if($self->debug);
		$self->_build_table_by_id(1);
	}

	return $self;
}

# get genetic code table by parsing a file
sub _build_table_by_file
{
	my ($self, $file) = @_;

	my $codonToAA = $self->_parse_file($file,2);

	# check all the codons and amino acids
	my %validCodons;
	my %stopCodons;
	while(my ($codon, $AA) = each %$codonToAA)
	{
		$codon = _process_codon($codon);
		($self->warn("$codon is Not a valid codon") and next)
		unless($codon =~ /^[ATCG]{3}$/);
		$validCodons{$codon} = $AA;
		$stopCodons{$codon}++ if($self->_is_stop_aa($AA));
	}

	$self->{'_codon_to_aa'} = \%validCodons;
	$self->{'_stop_codons'} = \%stopCodons;
	my $totalCodonNum = scalar(keys %validCodons);
	$self->{'_num_codons'} = $totalCodonNum;
	if($totalCodonNum < 64)
	{
		$self->warn("Only $totalCodonNum valid codons found in '$file'");
	}

	return 1;
}

# make codon table with given table ID
sub _build_table_by_id
{
	my ($self, $id) = @_;

	$self->throw("Id '$id' is not a valid genetic code table Id")
	unless($self->_is_valid_gc_id($id));

	#my $curFile = __FILE__;
	#warn "I am in $curFile\n";

	my $fh = $self->_open_file(__FILE__);

	my $inDataSection = 0;
	my $inGCSection = 0; # genetic codon section
	my $data = '';
	# cut the genetic codon section first
	while(<$fh>)
	{
		$inDataSection = 1 if(/^__END__/);
		next unless($inDataSection);
		last if(/^<<GC/); # end of the section
		$inGCSection = 1 if(/^>>GC/);
		next if(/^>/ or /^--/); # comment lines
		$data .= $_;
	}
	close $fh;

	# match each table and find that with the id = $id
	my $table;
	while($data =~ /\n *{ *\n *(name[^}]+)}/gcm)
	{
		$table = $1;
		next unless($table =~ /^ *id\s+$id\s*,/m);
		last; # found
	}

	# now parse this table
	my %codonToAA;
	my %stopCodons;
	my %startCodons;
	my ($b1) = $table =~ /^ *-- +Base1 +(\w+)/mo;
	my ($b2) = $table =~ /^ *-- +Base2 +(\w+)/mo;
	my ($b3) = $table =~ /^ *-- +Base3 +(\w+)/mo;
	my ($AAs) = $table =~ /^ *ncbieaa +"([^"]+)"/mo;
	$AAs =~ s/\s+//g;
	my ($starts) = $table =~ /^ *sncbieaa +"([^"]+)"/mo;
	$starts =~ s/\s+//g;
	my @names;
	while($table =~ /^ *name +("[^"]+")/mgco)
	{
		my $name = $1;
		$name =~ s/\n/ /g;
		push @names, $name;
	}

	$self->warn("The length of lines in genetic table $id is not 64")
	unless(length($b1) == 64);
	$self->throw("lines of bases and amino acids are not the same long", 
		"in genetic table $id") 
	unless( length($b1) == length($b2) and
		    length($b1) == length($b3) and
			length($b1) == length($AAs) and
			length($b1) == length($starts));

	$self->set_tag('name', join(' or ', @names));
	$self->set_tag('id', $id);
	for(my $i = 0; $i < length($b1); $i++)
	{
		my $nt1 = substr($b1, $i, 1);
		my $nt2 = substr($b2, $i, 1);
		my $nt3 = substr($b3, $i, 1);
		my $AA  = substr($AAs, $i, 1);
		my $start = substr($starts, $i, 1);
		my $codon = uc($nt1.$nt2.$nt3);
		$codonToAA{$codon} = $AA;
		$stopCodons{$codon}++ if($self->_is_stop_aa($AA));
		$startCodons{$codon}++ unless($start eq '-');
	}

	$self->{'_codon_to_aa'} = \%codonToAA;
	$self->{'_stop_codons'} = \%stopCodons;
	$self->{'_start_codons'} = \%startCodons;
	$self->{'_num_codons'} = scalar(keys %codonToAA);

	return 1;
}

=head2 name

 Title   : name
 Usage   : $name = $self->name();
 Function: the name of genetic code table in use
 Returns : a string for the name
 Args    : None

=cut

sub name
{
	$_[0]->get_tag('name');
}

=head2 id

 Title   : id
 Usage   : $id = $self->id();
 Function: the id of genetic code table in use
 Returns : a integer for the id
 Args    : None

=cut

sub id
{
	$_[0]->get_tag('id');
}

=head2 total_num_of_codons

 Title   : total_num_of_codons
 Usage   : $num = $self->total_num_of_codons;
 Function: get total codons of the genetic code table in use
 Returns : an integer
 Args    : None

=cut

sub total_num_of_codons
{
	$_[0]->{'_num_codons'};
}

sub _is_valid_gc_id
{
	my ($self, $id) = @_;
	
	return 1 if($validGCIds{$id});
	return 0;
}

# check whether this AA is a stop symbol
sub _is_stop_aa
{
	my ($self, $AA) = @_;

	return 1 if($AA eq $STOPAA);
	return 0;
}

=head2 is_valid_codon

 Title   : is_valid_codon
 Usage   : $test = $self->is_valid_codon('ACG');
 Function: test whether a given character string is a valid codon in
 current codon table
 Returns : 1 if true, otherwise 0
 Args    : a codon sequence

=cut
# check whether this is a valid codon
sub is_valid_codon
{
	my ($self,$codon,$allowAmb) = @_;

	$codon = _process_codon($codon);
	return 0 unless($codon =~ /^[ATCGU]{3}$/); # no ambiguous at present
	# also check whether it is in codon table
	my $codons = $self->{'_codon_to_aa'};
	return 0 unless(exists $codons->{$codon});
	return 1;
}

=head2 all_sense_codons

 Title   : all_sense_codons
 Usage   : @codons = $self->all_sense_codons;
 Function: get all the sense codons in this genetic code table
 Returns : an array of codons, or its reference in scalar context
 Args    : None

=cut

sub all_sense_codons
{
	my ($self) = @_;

	my $codonToAA = $self->{'_codon_to_aa'};
	my $stopCodons = $self->{'_stop_codons'};
	my @senseCodons = grep {!exists($stopCodons->{$_})} keys %$codonToAA;

	return wantarray? @senseCodons : \@senseCodons;
}

=head2 all_amino_acids

 Title   : all_amino_acids
 Usage   : @AAs = $self->all_amino_acids
 Function: get all the amino acids in this genetic code table
 Returns : an array of amino acids, or its reference if in scalar
 context
 Args    : None

=cut

sub all_amino_acids
{
	my $self = shift;
	my $codonToAA = $self->{'_codon_to_aa'} or return;

	my %AAs;
	while(my ($k,$v) = each %$codonToAA)
	{
		next if $self->_is_stop_aa($v);
		$AAs{$v}++;
	}

	my @tmp = keys %AAs;
	return wantarray? @tmp : \@tmp;
}

=head2 all_start_codons

 Title   : all_start_codons
 Usage   : @startCodons = $self->all_start_codons;
 Function: get all the start codons in the genetic code table in use
 Returns : an array of codons, or its reference if in scalar context
 Args    : None

=cut

sub all_start_codons
{
	my $self = shift;
	$self->warn("No marked start codons in this GC table") and return
	unless(exists $self->{'_start_codons'});
	my @codons = keys %{$self->{'_start_codons'}};
	wantarray? @codons : \@codons;
}

=head2 all_stop_codons

 Title   : all_stop_codons
 Usage   : @stopCodons = $self->all_stop_codons;
 Function: get all the stop codons in the genetic code table in use
 Returns : an array of codons, or its reference if in scalar context
 Args    : None

=cut

sub all_stop_codons
{
	my @codons = keys %{$_[0]->{'_stop_codons'}};
	wantarray? @codons : \@codons;
}

=head2 codons_of_AA

 Title   : codons_of_AA
 Usage   : @codons = $self->codons_of_AA('S');
 Function: get codons encoding the given amino acid
 Returns : an array of codons, or its reference if in scalar context
 Args    : a single amino acid; for stop codons, one can give '*' here

=cut

sub codons_of_AA
{
	my ($self, $AA) = @_;

	$AA =~ s/\s+//g; $AA = uc($AA);
	$self->throw("Can only process one amino acid each time")
	if(length($AA) > 1);

	my $codonToAA = $self->{'_codon_to_aa'};
	my @codons = grep { $codonToAA->{$_} eq $AA } keys %$codonToAA;

	return wantarray? @codons : \@codons;
}

=head2 codon_to_AA_map

 Title   : codon_to_AA_map
 Usage   : $hash = $self->codon_to_AA_map
 Function: get the mapping from codon to amino acid in a hash
 Returns : a hash reference in which codons are keys and AAs are
 values
 Args    : None

=cut

sub codon_to_AA_map
{
	$_[0]->{'_codon_to_aa'};
}

=head2 translate

 Title   : translate
 Usage   : $AA_string = $self->translate('ATGGCA');
 Function: get the translation of input nucleotides
 Returns : a string of amino acids, unknown amino acids are
 represented as 'X'.
 Args    : nucleotide sequence.
 Note : if the input sequence is not multiple of 3 long, the last
 remained 1 or 2 nucleotides would be simply ignored.

=cut

sub translate
{
	my ($self, $seq) = @_;

	$seq =~ s/\s+//g;
	$seq = uc($seq);

	my $seqLen = length($seq);
	my $accuLen = 0;
	my $AAs = '';
	my $codonToAA = $self->{'_codon_to_aa'};
	while($accuLen + 3 <= $seqLen)
	{
		my $codon = substr($seq, $accuLen, 3);
		$self->warn("'$codon' is not a valid codon") 
		unless($self->is_valid_codon($codon));
		$AAs .= exists $codonToAA->{$codon}? $codonToAA->{$codon} :
		'X'; # X for unknown codons
		$accuLen += 3;
	}

	return $AAs;
}

=head2 is_stop_codon

 Title   : is_stop_codon
 Usage   : $test = $self->is_stop_codon('UAG');
 Function: check whether this is a stop codon
 Returns : 1 if true, otherwise 0
 Args    : a codon sequence

=cut
# check whether 
sub is_stop_codon
{
	my ($self, $codon) = @_;
	my $stopCodons = $self->{'_stop_codons'};
	$codon = _process_codon($codon);
	return 1 if($stopCodons->{$codon});
	return 0;
}

# process codons before other actions
sub _process_codon
{
	my $codon = shift;
	$codon =~ s/\s+//g;
	$codon =~ tr/uU/TT/; # U to T
	return uc($codon);
}

=head2 codon_degeneracy

 Title   : codon_degeneracy
 Usage   : $hash = $self->codon_degeneracy;
 Function: group AAs and codons into codon degeneracy groups
 Returns : reference to a hash in which 1st level key is degeneracy
 (i.e., 1,2,6,etc), 2nd level key is amino acids for that degeneracy
 group, and 3rd level is reference of arrays containing coding codons
 for each amino acid. For example:

 { 2 => { D => [GAU, GAC],
          C => [UGU, UGC],
		  ...  ...
 		},
   4 => { A => [GCU, GCC, GCA, GCG],
          ...  ...
        },
	...  ...  ...
 }

 Args    : None

=cut

# group AAs and codons into redundancy groups
sub codon_degeneracy
{
	my $self = shift;

	return $self->{'_codon_deg'} if(exists $self->{'_codon_deg'});

	# otherwise construct it if its the first time
	my $codonToAA = $self->{'_codon_to_aa'};
	my %aaToCodon;
	while(my ($codon, $AA) = each %$codonToAA)
	{
		# ignore stop codons
		next if($self->is_stop_codon($codon)); 
		push @{$aaToCodon{$AA}}, $codon;
	}

	my %redundancy;
	while(my ($AA, $codons) = each %aaToCodon)
	{
		my $red = $#$codons + 1;
		$redundancy{$red}->{$AA} = [sort @$codons];
	}

	$self->{'_codon_deg'} = \%redundancy; # store it first
	return \%redundancy;
}


=head1 AUTHOR

Zhenguo Zhang, C<< <zhangz.sci at gmail.com> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-bio-cua at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Bio-CUA>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.


=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Bio::CUA::CodonTable


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker (report bugs here)

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Bio-CUA>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/Bio-CUA>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/Bio-CUA>

=item * Search CPAN

L<http://search.cpan.org/dist/Bio-CUA/>

=back


=head1 ACKNOWLEDGEMENTS


=head1 LICENSE AND COPYRIGHT

Copyright 2015 Zhenguo Zhang.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see L<http://www.gnu.org/licenses/>.


=cut

1; # End of Bio::CUA::CodonTable

__END__
-- data section
>>GC
-- downloaded from ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
--**************************************************************************
--  This is the NCBI genetic code table
--  Initial base data set from Andrzej Elzanowski while at PIR International
--  Addition of Eubacterial and Alternative Yeast by J.Ostell at NCBI
--  Base 1-3 of each codon have been added as comments to facilitate
--    readability at the suggestion of Peter Rice, EMBL
--  Later additions by Taxonomy Group staff at NCBI
--
--  Version 4.0
--     Updated version to reflect numerous undocumented changes:
--     Corrected start codons for genetic code 25
--     Name of new genetic code is Candidate Division SR1 and Gracilibacteria
--     Added candidate division SR1 nuclear genetic code 25
--     Added GTG as start codon for genetic code 24
--     Corrected Pterobranchia Mitochondrial genetic code (24)
--     Added genetic code 24, Pterobranchia Mitochondrial
--     Genetic code 11 is now Bacterial, Archaeal and Plant Plastid
--     Fixed capitalization of mitochondrial in codes 22 and 23
--     Added GTG, ATA, and TTG as alternative start codons to code 13
--
--  Version 3.9
--     Code 14 differs from code 9 only by translating UAA to Tyr rather than
--     STOP.  A recent study (Telford et al, 2000) has found no evidence that
--     the codon UAA codes for Tyr in the flatworms, but other opinions exist.
--     There are very few GenBank records that are translated with code 14,
--     but a test translation shows that retranslating these records with code
--     9 can cause premature terminations.  Therefore, GenBank will maintain
--     code 14 until further information becomes available.
--
--  Version 3.8
--     Added GTG start to Echinoderm mitochondrial code, code 9
--
--  Version 3.7
--     Added code 23 Thraustochytrium mitochondrial code
--        formerly OGMP code 93
--        submitted by Gertraude Berger, Ph.D.
--
--  Version 3.6
--     Added code 22 TAG-Leu, TCA-stop
--        found in mitochondrial DNA of Scenedesmus obliquus
--        submitted by Gertraude Berger, Ph.D.
--        Organelle Genome Megasequencing Program, Univ Montreal
--
--  Version 3.5
--     Added code 21, Trematode Mitochondrial
--       (as deduced from: Garey & Wolstenholme,1989; Ohama et al, 1990)
--     Added code 16, Chlorophycean Mitochondrial
--       (TAG can translated to Leucine instaed to STOP in chlorophyceans
--        and fungi)
--
--  Version 3.4
--     Added CTG,TTG as allowed alternate start codons in Standard code.
--        Prats et al. 1989, Hann et al. 1992
--
--  Version 3.3 - 10/13/95
--     Added alternate intiation codon ATC to code 5
--        based on complete mitochondrial genome of honeybee
--        Crozier and Crozier (1993)
--
--  Version 3.2 - 6/24/95
--  Code       Comments
--   10        Alternative Ciliate Macronuclear renamed to Euplotid Macro...
--   15        Bleharisma Macro.. code added
--    5        Invertebrate Mito.. GTG allowed as alternate initiator
--   11        Eubacterial renamed to Bacterial as most alternate starts
--               have been found in Achea
--
--
--  Version 3.1 - 1995
--  Updated as per Andrzej Elzanowski at NCBI
--     Complete documentation in NCBI toolkit documentation
--  Note: 2 genetic codes have been deleted
--
--   Old id   Use id     - Notes
--
--   id 7      id 4      - Kinetoplast code now merged in code id 4
--   id 8      id 1      - all plant chloroplast differences due to RNA edit
--
--*************************************************************************

Genetic-code-table ::= {
 {
  name "Standard" ,
  name "SGC0" ,
  id 1 ,
  ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "---M---------------M---------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Vertebrate Mitochondrial" ,
  name "SGC1" ,
  id 2 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
  sncbieaa "--------------------------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Yeast Mitochondrial" ,
  name "SGC2" ,
  id 3 ,
  ncbieaa  "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "----------------------------------MM----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
    name "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate
 Mitochondrial; Mycoplasma; Spiroplasma" ,
  name "SGC3" ,
  id 4 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "--MM---------------M------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Invertebrate Mitochondrial" ,
  name "SGC4" ,
  id 5 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "---M----------------------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear" ,
  name "SGC5" ,
  id 6 ,
  ncbieaa  "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Echinoderm Mitochondrial; Flatworm Mitochondrial" ,
  name "SGC8" ,
  id 9 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Euplotid Nuclear" ,
  name "SGC9" ,
  id 10 ,
  ncbieaa  "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Bacterial, Archaeal and Plant Plastid" ,
  id 11 ,
  ncbieaa  "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "---M---------------M------------MMMM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Alternative Yeast Nuclear" ,
  id 12 ,
  ncbieaa  "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-------------------M---------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Ascidian Mitochondrial" ,
  id 13 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
  sncbieaa "---M------------------------------MM---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 },
 {
  name "Alternative Flatworm Mitochondrial" ,
  id 14 ,
  ncbieaa  "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Blepharisma Macronuclear" ,
  id 15 ,
  ncbieaa  "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Chlorophycean Mitochondrial" ,
  id 16 ,
  ncbieaa  "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Trematode Mitochondrial" ,
  id 21 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Scenedesmus obliquus Mitochondrial" ,
  id 22 ,
  ncbieaa  "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "-----------------------------------M----------------------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Thraustochytrium Mitochondrial" ,
  id 23 ,
  ncbieaa  "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "--------------------------------M--M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Pterobranchia Mitochondrial" ,
  id 24 ,
  ncbieaa  "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
  sncbieaa "---M---------------M---------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 } ,
 {
  name "Candidate Division SR1 and Gracilibacteria" ,
  id 25 ,
  ncbieaa  "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
  sncbieaa "---M-------------------------------M---------------M------------"
  -- Base1  TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
  -- Base2  TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
  -- Base3  TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
 }
}
<<GC # end of genetic code section

<<END_DATA