#
# BioStudio module for comparing normalized SeqFeatures
#

=head1 NAME

Bio::BioStudio::Diff

=head1 VERSION

Version 2.10

=head1 DESCRIPTION

An object that allows the comparison of two Bio::BioStudio::Chromosome objects.

=head1 AUTHOR

Sarah Richardson <smrichardson@lbl.gov>.

=cut

package Bio::BioStudio::Diff;

use Bio::BioStudio::Diff::Difference;
use Bio::BioStudio::ConfigData;
use Bio::GeneDesign::Basic qw(_compare_sequences);
use Text::Diff;

use strict;
use warnings;

use base qw(Bio::Root::Root);

our $VERSION = 2.10;

my %CODES = (
      1  => "deleted feature",
      2  => "added feature",
      3  => "lost subfeature",
      4  => "gained subfeature",
      5  => "lost sequence",
      6  => "gained sequence",
      7  => "change in translation",
      8  => "change in sequence",
      9  => "lost attributes",
      10 => "gained attributes",
      11 => "changed annotation",
      12 => "changed subfeature"
);

=head1 CONSTRUCTOR METHOD

=head2 new

=cut

sub new
{
  my ($class, @args) = @_;
  my $self = $class->SUPER::new(@args);

  my ($oldchr, $newchr, $checktrx, $aligntrx, $checkseq, $alignseq) =
     $self->_rearrange([qw(OLDCHR
                           NEWCHR
                           CHECKTRANSLATION
                           ALIGNTRANSLATION
                           CHECKSEQUENCE
                           ALIGNSEQUENCE)], @args);
                           
   $self->throw("\"Old\" chromosome not supplied") unless ($oldchr);
   $self->oldchr($oldchr);

   $self->throw("\"New\" chromosome not supplied") unless ($newchr);
   $self->newchr($newchr);
   
   if ($aligntrx || $alignseq)
   {
     my $bs = Bio::BioStudio::ConfigData->config('blast_support');
     if ($bs ne 'Y')
     {
       warn "Cannot perform alignments: blast support is not enabled\n";
       $aligntrx = undef;
       $alignseq = undef;
     }
   }
                  
   $self->{checktrx} = $checktrx;
   $self->{aligntrx} = $aligntrx;
   $self->{checkseq} = $checkseq;
   $self->{alignseq} = $alignseq;

   return $self;
}

=head2 DESTROY

=cut

sub DESTROY
{
  my ($self) = @_;
  $self->{BLAST_factory}->cleanup() if (defined $self->{BLAST_factory});
  $self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
  return;
}

=head1 COMPARISON METHODS

=head2 compare_dbs

=cut

sub compare_dbs
{
  my ($self) = @_;

  my $olddb = $self->oldchr->db;
  my $newdb = $self->newchr->db;

  my (%feats1, %feats2) = ((), ());

  #PARENTS ONLY! Subfeatures will be dealt with downstream
  my $iterator1 = $olddb->get_seq_stream();
  while (my $feature = $iterator1->next_seq)
  {
    next if ($feature->has_tag('parent_id'));
    $feats1{$feature->display_name} = $feature->primary_id;
  }

  my $iterator2 = $newdb->get_seq_stream();
  while (my $feature = $iterator2->next_seq)
  {
    next if ($feature->has_tag('parent_id'));
    $feats2{$feature->display_name} = $feature->primary_id;
  }

  my @D;

  #
  # Common Features
  #
  my @commons = grep  { exists $feats2{$_} } keys %feats1;
  foreach my $featid (@commons)
  {
    my $oldfeat = $olddb->fetch($feats1{$featid});
    my $newfeat = $newdb->fetch($feats2{$featid});
    $self->throw("Can't find $featid in olddb!") unless ($oldfeat);
    $self->throw("Can't find $featid in newdb!") unless ($newfeat);
    push @D, $self->compare_features($oldfeat, $newfeat);
  }

  #
  # Deleted Features
  #
  my @onlyolds = grep {! exists $feats2{$_} } keys %feats1;
  foreach my $featid (@onlyolds)
  {
    my $feat = $olddb->fetch($feats1{$featid});
    $self->throw("Can't find $featid in olddb!") unless ($feat);
    my $baseloss = $feat->end - $feat->start + 1;
    push @D, Bio::BioStudio::Diff::Difference->new(
        -oldfeat => $feat,
        -code => 1,
        -baseloss => $baseloss
    );
  }

  #
  # Inserted Features
  #
  my @onlynews = grep {! exists $feats1{$_} } keys %feats2;
  foreach my $featid (@onlynews)
  {
    my $feat = $newdb->fetch($feats2{$featid});
    $self->throw("Can't find $featid in newdb!") unless ($feat);
    my $basechange = 0;
    my $basegain = $feat->end - $feat->start + 1;
    if ($feat->has_tag("newseq"))
    {
      my $cthsh = _compare_sequences($feat->Tag_wtseq, $feat->Tag_newseq);
      $basechange = $cthsh->{D};
    }
    push @D, Bio::BioStudio::Diff::Difference->new(
        -newfeat => $feat,
        -code => 2,
        -basegain => $basegain,
        -basechange => $basechange
    );
  }

  return @D;
}

=head2 compare_features

=cut

sub compare_features
{
  my ($self, $feat1, $feat2) = @_;
  my @Changes = ();

  my $oldchr = $self->{oldchr};
  my $GD = $oldchr->GD;

  ##Check for subfeatures and compare
  if (scalar($feat1->get_SeqFeatures) || scalar($feat2->get_SeqFeatures))
  {
    my @allsubs1 = $oldchr->flatten_subfeats($feat1);
    my @allsubs2 = $oldchr->flatten_subfeats($feat2);
    my %types = map {$_->primary_tag() => 1} @allsubs1, @allsubs2;
    foreach my $type (keys %types)
    {
      my @subtype1s = grep {$_->primary_tag eq $type} @allsubs1;
      my %subs1 = map {$_->Tag_load_id => $_} @subtype1s;
      my @subtype2s = grep {$_->primary_tag eq $type} @allsubs2;
      my %subs2 = map {$_->Tag_load_id => $_} @subtype2s;

      #Compare all sub features of type $type
      foreach my $sub1id (grep {exists $subs2{$_}} keys %subs1)
      {
        my $subfeat1 = delete $subs1{$sub1id};
        my $subfeat2 = delete $subs2{$sub1id};
        foreach my $sobj ($self->compare_features($subfeat1, $subfeat2))
        {
          if ($sobj->code() >= 4)
          {
            push @Changes, Bio::BioStudio::Diff::Difference->new(
                      -oldfeat => $feat1,
                      -newfeat => $feat2,
                      -oldsubfeat => $subfeat1,
                      -newsubfeat => $subfeat2,
                      -code => 12,
                      -subdiff => $sobj);
          }
        }
      }
      #Deleted subfeatures of type $type
      foreach (keys %subs1)
      {
        my $subfeat = $subs1{$_};
        my $baseloss = $subfeat->end - $subfeat->start + 1;

        push @Changes, Bio::BioStudio::Diff::Difference->new(
                  -oldfeat => $feat1,
                  -oldsubfeat => $subfeat,
                  -newfeat => $feat2,
                  -baseloss => $baseloss,
                  -code => 3);
      }
      #Inserted subfeatures of type $type
      foreach (keys %subs2)
      {
        my $subfeat = $subs2{$_};
        my $basechange = 0;
        if ($subfeat->has_tag("newseq"))
        {
          my $ct = _compare_sequences($subfeat->Tag_wtseq, $subfeat->Tag_newseq);
          $basechange = $ct->{D};
        }
        push @Changes, Bio::BioStudio::Diff::Difference->new(
                  -oldfeat => $feat1,
                  -newfeat => $feat2,
                  -newsubfeat => $subfeat,
                  -code => 4,
                  -basechange => $basechange);
      }
    }
  }

  ##Check to see if attributes have changed
  my $attarray = $self->compare_feature_attributes($feat1, $feat2);
  push @Changes, @{$attarray} if ($attarray);

  ##Check to see if annotations have changed
  my $annarray = $self->compare_feature_annotations($feat1, $feat2);
  push @Changes, @{$annarray} if ($annarray);

  ##If the translation switch is on, see if translation has changed (CDS ONLY)
  if ($feat1->primary_tag() eq "CDS" && ($self->checktrx || $self->aligntrx))
  {
    my $ch = $self->compare_feature_translations($feat1, $feat2);
    push @Changes, $ch if ($ch);
  }

  ##If check sequence is on, see if the feature sequence has changed
  if ($self->{checkseq} || $self->{alignseq})
  {
    my $ch = $self->compare_feature_sequences($feat1, $feat2);
    push @Changes, $ch if ($ch);
  }
  return @Changes;
}

=head2 compare_feature_sources

=cut

sub compare_feature_sources
{
  my ($self, $feat1, $feat2) = @_;
  return if ($feat1->source() eq $feat2->source());
  return Bio::BioStudio::Diff::Difference->new(
            -oldfeat => $feat1,
            -newfeat => $feat2,
            -oldatt => $feat1->source_tag(),
            -newatt => $feat2->source_tag(),
            -comment => "source",
            -code => 11
  );
}

=head2 compare_feature_types

=cut

sub compare_feature_types
{
  my ($self, $feat1, $feat2) = @_;
  return if ($feat1->primary_tag() eq $feat2->primary_tag());
  return Bio::BioStudio::Diff::Difference->new(
            -oldfeat => $feat1,
            -newfeat => $feat2,
            -oldatt => $feat1->primary_tag(),
            -newatt => $feat2->primary_tag(),
            -comment => "primary_tag",
            -code => 11
  );
}

=head2 compare_feature_lengths

=cut

sub compare_feature_lengths
{
  my ($self, $feat1, $feat2) = @_;
  my $len1 = $feat1->end - $feat1->start + 1;
  my $len2 = $feat2->end - $feat2->start + 1;
  my $difference = abs($len1 - $len2);
  return if ($difference == 0);
  my ($code, $baseloss, $basegain) = (0, 0, 0);
  if ($len1 > $len2)
  {
    $code = 5;
    $baseloss = $difference;
  }
  else
  {
    $code = 6;
    $basegain = $difference;
  }
  return Bio::BioStudio::Diff::Difference->new(
            -oldfeat => $feat1,
            -newfeat => $feat2,
            -oldatt => $len1,
            -newatt => $len2,
            -code => $code,
            -baseloss => $baseloss,
            -basegain => $basegain
  );
}

=head2 compare_feature_orientations

=cut

sub compare_feature_orientations
{
  my ($self, $feat1, $feat2) = @_;
  return if ($feat1->strand() eq $feat2->strand());
  return Bio::BioStudio::Diff::Difference->new(
            -oldfeat => $feat1,
            -newfeat => $feat2,
            -oldatt => $feat1->strand(),
            -newatt => $feat2->strand(),
            -comment => "strand",
            -code => 11
  );
}

=head2 compare_feature_scores

=cut

sub compare_feature_scores
{
  my ($self, $feat1, $feat2) = @_;
  my $f1score = $feat1->score ? $feat1->score : 0;
  my $f2score = $feat2->score ? $feat2->score : 0;
  return if ($f1score eq $f2score);
  return Bio::BioStudio::Diff::Difference->new(
            -oldfeat => $feat1,
            -newfeat => $feat2,
            -oldatt => $f1score,
            -newatt => $f2score,
            -comment => "score",
            -code => 11
  );
}

=head2 compare_feature_phases

=cut

sub compare_feature_phases
{
  my ($self, $feat1, $feat2) = @_;
  my $f1phase = $feat1->phase ? $feat1->phase : 0;
  my $f2phase = $feat2->phase ? $feat2->phase : 0;
  return if ($f1phase eq $f2phase);
  return Bio::BioStudio::Diff::Difference->new(
            -oldfeat => $feat1,
            -newfeat => $feat2,
            -oldatt => $f1phase,
            -newatt => $f2phase,
            -comment => "phase",
            -code => 11
  );
}

=head2 compare_feature_attributes

=cut

sub compare_feature_attributes
{
  my ($self, $feat1, $feat2) = @_;
  my @attChanges;

  ##Check to see if source has changed
  my $sourcechange = $self->compare_feature_sources($feat1, $feat2);
  push @attChanges, $sourcechange if ($sourcechange);

  ##Check to see if type has changed
  my $typechange = $self->compare_feature_types($feat1, $feat2);
  push @attChanges, $typechange if ($typechange);

  ##Check to see if length has changed
  my $lengthchange = $self->compare_feature_lengths($feat1, $feat2);
  push @attChanges, $lengthchange if ($lengthchange);

  ##Check to see if score has changed
  my $scorechange = $self->compare_feature_scores($feat1, $feat2);
  push @attChanges, $scorechange if ($scorechange);

  ##Check to see if orientation has changed
  my $orientchange = $self->compare_feature_orientations($feat1, $feat2);
  push @attChanges, $orientchange if ($orientchange);

  ##Check to see if phase has changed
  my $phasechange = $self->compare_feature_phases($feat1, $feat2);
  push @attChanges, $phasechange if ($phasechange);

  if (scalar (@attChanges))
  {
    return \@attChanges;
  }
  else
  {
    return;
  }
}

=head2 compare_feature_annotations

=cut

sub compare_feature_annotations
{
  my ($self, $feat1, $feat2) = @_;
  my @annChanges;

  my @tags1 = $feat1->get_all_tags();
  my @tags2 = $feat2->get_all_tags();
  foreach my $tag (grep {$feat2->has_tag($_)} @tags1)
  {
    my %vals1 = map {$_ => 1} $feat1->get_tag_values($tag);
    my %vals2 = map {$_ => 1} $feat2->get_tag_values($tag);
    foreach my $val (grep {! exists $vals2{$_}} keys %vals1)
    {
      push @annChanges, Bio::BioStudio::Diff::Difference->new(
                -oldfeat => $feat1,
                -newfeat => $feat2,
                -oldatt => "$tag = $val",
                -code => 9
      );
    }
    foreach my $val (grep {! exists $vals1{$_}} keys %vals2)
    {
      push @annChanges, Bio::BioStudio::Diff::Difference->new(
                -oldfeat => $feat1,
                -newfeat => $feat2,
                -newatt => "$tag = $val",
                -code => 10
      );
    }
  }
  foreach my $tag (grep {! $feat2->has_tag($_)} @tags1)
  {
    push @annChanges, Bio::BioStudio::Diff::Difference->new(
                -oldfeat => $feat1,
                -newfeat => $feat2,
                -oldatt => "$tag = " . join(", ", $feat1->get_tag_values($tag)),
                -code => 9
    );
  }
  foreach my $tag (grep {! $feat1->has_tag($_)} @tags2)
  {
    push @annChanges, Bio::BioStudio::Diff::Difference->new(
                -oldfeat => $feat1,
                -newfeat => $feat2,
                -oldatt => "$tag = " . join(", ", $feat2->get_tag_values($tag)),
                -code => 10
    );
  }

  if (scalar (@annChanges))
  {
    return \@annChanges;
  }
  else
  {
    return;
  }
}

=head2 compare_feature_sequences

=cut

sub compare_feature_sequences
{
  my ($self, $feat1, $feat2) = @_;

  my ($old, $var) = ($feat1->seq->seq, $feat2->seq->seq);
  return if ($old eq $var);
  my ($aligns, $basechange) = (undef, 0);
  if ($feat1->primary_tag() eq "chromosome")
  {
    ##WHOLE CHROMOSOME ALIGNMENT
    return;
  }
  if ($self->{alignseq})
  {
    my $alnz = $self->BLAST_factory->bl2seq(
      -method         => 'blastn',
      -subject        => $feat1->seq,
      -query          => $feat2->seq,
      -method_args => [
        gapopen       => 11,
        gapextend     => 2
      ]
    );
    if ($alnz)
    {
      $aligns = $alnz;
      my $mismatchcount = 0;
      my $hitlen = 0;
      while (my $hit = $aligns->next_hit())
      {
        if (scalar($hit->hsps()))
        {
          $hitlen += $hit->length();
          $mismatchcount += ($hit->length() - $hit->matches('id'));
        }
      }
      $mismatchcount += (length($var) - $hitlen);
      $basechange = $mismatchcount;
    }
  }
  else
  {
    #my $cthsh = _compare_sequences($feat2->Tag_wtseq, $feat2->Tag_newseq);
    my $cthsh = _compare_sequences($old, $var);
    $basechange = $cthsh->{D};
  }
  my $difference = Bio::BioStudio::Diff::Difference->new(
    -oldfeat    => $feat1,
    -newfeat    => $feat2,
    -oldatt     => $old,
    -newatt     => $var,
    -code       => 8,
    -aligns     => $aligns,
    -basechange => $basechange
  );
  return $difference;
}

=head2 compare_feature_translations

=cut

sub compare_feature_translations
{
  my ($self, $feat1, $feat2) = @_;
  my $oldchr = $self->{oldchr};
  my $GD = $oldchr->GD;

  my $orf1 = $feat1->seq->seq;
  my $f1phase = $feat1->phase ? $feat1->phase : 0;
	my $old = $GD->translate(-sequence => $orf1, -frame => $f1phase + 1);

	my $orf2 = $feat2->seq->seq;
  my $f2phase = $feat2->phase ? $feat2->phase : 0;
	my $var = $GD->translate(-sequence => $orf2, -frame => $f2phase + 1);

  return if ($old eq $var);

  if ((!$old && length($orf1) >=3) || (!$var && length($orf2) >=3))
  {
    print "original $feat1 " if (!$old);
    print "variant $feat2 " if (!$var);
    print "has no translation, strangely!!\n";
    return;
  }
  my $aligns = undef;
  if ($self->{aligntrx})
  {
    my $newfeat1 = Bio::Seq->new(-id => $feat1->id, -seq => $old);
    my $newfeat2 = Bio::Seq->new(-id => $feat2->id, -seq => $var);
    $aligns = $self->BLAST_factory->bl2seq(
      -method  => 'blastp',
      -subject => $newfeat1,
      -query   => $newfeat2
    );
  }
  my $difference = Bio::BioStudio::Diff::Difference->new(
    -oldfeat => $feat1,
    -newfeat => $feat2,
    -oldatt => $old,
    -newatt => $var,
    -code => 7,
    -aligns => $aligns
  );
  return $difference;
}

=head2 compare_comments

=cut

sub compare_comments
{
	my ($self) = @_;
  my $ref1 = join "\n", $self->{oldchr}->comments();
  my $ref2 = join "\n", $self->{newchr}->comments();
	my @textdiff;
  diff \$ref1, \$ref2, {OUTPUT => \@textdiff};
	return @textdiff;
}

=head2 verify_newseq

=cut

sub verify_newseq
{
  my ($self, $feat) = @_;
  return 1 unless ($feat->has_tag("newseq"));
  my $newseq = $feat->Tag_newseq;
  my $featseq = $feat->seq->seq;
  return 1 if ($newseq eq $featseq);
  my $chr = $self->{newchr};
  my $GD = $chr->GD;
  $featseq = $GD->complement(-sequence => $featseq, -reverse => 1);
  return 1 if ($newseq eq $featseq && $feat->strand == -1);
  return 0;
}

=head1 ACCESSOR METHODS

=head2 oldchr

=cut

sub oldchr
{
  my ($self, $value) = @_;
  if (defined $value)
  {
    unless ($value->isa("Bio::BioStudio::Chromosome"))
    {
      $self->throw("oldchr value is not a Bio::BioStudio::Chromosome object");
    }
	  $self->{oldchr} = $value;
  }
  return $self->{oldchr};
}

=head2 newchr

=cut

sub newchr
{
  my ($self, $value) = @_;
  if (defined $value)
  {
    unless ($value->isa("Bio::BioStudio::Chromosome"))
    {
      $self->throw("newchr value is not a Bio::BioStudio::Chromosome object");
    }
	  $self->{newchr} = $value;
  }
  return $self->{newchr};
}

=head2 checktrx

=cut

sub checktrx
{
  my ($self) = @_;
  return $self->{checktrx};
}

=head2 aligntrx

=cut

sub aligntrx
{
  my ($self) = @_;
  return $self->{aligntrx};
}

=head2 alignseq

=cut

sub alignseq
{
  my ($self) = @_;
  return $self->{alignseq};
}

=head2 checkseq

=cut

sub checkseq
{
  my ($self) = @_;
  return $self->{checkseq};
}

=head2 BLAST_factory

=cut

sub BLAST_factory
{
  my ($self) = @_;
  if (! defined $self->{BLAST_factory})
  {
    $self->_make_bl2seq_factory();
  }
  return $self->{BLAST_factory};
}

=head1 PRIVATE

=head2 _make_bl2seq_factory

Returns a L<Bio::Tools::Run::StandAloneBlastPlus> object that can be used to
run BLAST bl2seq queries

=cut

sub _make_bl2seq_factory
{
  my ($self) = @_;
  my $bdir = Bio::BioStudio::ConfigData->config('tmp_path');
  my $factory = Bio::Tools::Run::StandAloneBlastPlus->new(
    -db_dir  => $bdir
  );
  $self->{BLAST_factory} = $factory;
  return $factory;
}

1;

__END__

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2015, BioStudio developers
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.

* The names of Johns Hopkins, the Joint Genome Institute, the Joint BioEnergy 
Institute, the Lawrence Berkeley National Laboratory, the Department of Energy, 
and the BioStudio developers may not be used to endorse or promote products 
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=cut