package Sequence;

use strict;

sub new {
  my ($class, $seq)= @_;
  my $hashref={seq=>$seq};
  bless ($hashref,$class);
  return $hashref;
}

# remove spaces and digits from the sequence
sub clean {
  my $self = shift;
  $self->{seq} =~ s/[\s\d\n]//g;
}


sub rc {
  my $self = shift;
  my $seq = $self->{seq};
  $seq =~ tr/AaTtCcGg/TtAaGgCc/;
  $seq=reverse($seq);
  return $seq;
}

# Wrap the sequence for display
sub wrap {
        my ($self, $width) = @_; 
	if ( !$width ) { 
		$width = 50; 
	}
        my $seq = $self->{seq};
        my $seqlen = length($seq);
        my $blocks = int($seqlen/$width);
        my ( $subseq, $newseq );
                        
        for (my $i = 0; $i <= $blocks; $i++ ) {
                $subseq = substr($seq, $i*$width, $width);      
                if ( $i==0 ) { 
                        $newseq = $subseq; 
                } else { 
                        $newseq .= "\n$subseq";
                }
        }
        $newseq .="\n";
        return $newseq;
}

# find gc content
sub gc_content {
  my $self=shift;
  my $seq=uc($self->{seq});
  my $len = length( $seq );
  my @A = ($seq =~ m/A/g);
  my @T = ($seq =~ m/T/g);
  my @C = ($seq =~ m/C/g);
  my @G = ($seq =~ m/G/g);
  my $a = sprintf("%.1f",($#A+1)/$len*100);
  my $t = sprintf("%.1f",($#T+1)/$len*100);
  my $c = sprintf("%.1f",($#C+1)/$len*100);
  my $g = sprintf("%.1f",($#G+1)/$len*100);
  return ($a,$t,$c,$g);
}


# Shuffle sequence
sub shuffle {
        my $self=shift;
  	my $seq = $self->{seq};
        my $len = length($seq);
        if ( $len == 1 ) {
                return $seq; 
        } 
        
        # get random number
        my $number = rand $len;
        $number = int($number);
        
        my @a = split (//, $seq);
        my $letter = $a[$number];
        
        # reconstruct the shortened sequence
        $a[$number] = '';
        my $newseq = join ('', @a);
        my $newobj = new Sequence($newseq);
        return $letter . $newobj->shuffle();
}

sub translate {
  my $self=shift;
  my $codon='';
  my $prot = '';
  my $seq = $self->{seq};

  for ( my $i=0; $i<length( $seq )-2; $i +=3 ) {
        $codon=substr($seq,$i,3);
        $prot .= codon2aa($codon);
  }

  return $prot;
}

# A subroutine to translate a DNA 3-character codon to an amino acid
sub codon2aa {
    my $codon = shift @_;
    $codon = uc $codon;
 
    my %genetic_code = (
    
    'TCA' => 'S',    # Serine
    'TCC' => 'S',    # Serine
    'TCG' => 'S',    # Serine
    'TCT' => 'S',    # Serine
    'TTC' => 'F',    # Phenylalanine
    'TTT' => 'F',    # Phenylalanine
    'TTA' => 'L',    # Leucine
    'TTG' => 'L',    # Leucine
    'TAC' => 'Y',    # Tyrosine
    'TAT' => 'Y',    # Tyrosine
    'TAA' => '_',    # Stop
    'TAG' => '_',    # Stop
    'TGC' => 'C',    # Cysteine
    'TGT' => 'C',    # Cysteine
    'TGA' => '_',    # Stop
    'TGG' => 'W',    # Tryptophan
    'CTA' => 'L',    # Leucine
    'CTC' => 'L',    # Leucine
    'CTG' => 'L',    # Leucine
    'CTT' => 'L',    # Leucine
    'CCA' => 'P',    # Proline
    'CCC' => 'P',    # Proline
    'CCG' => 'P',    # Proline
    'CCT' => 'P',    # Proline
    'CAC' => 'H',    # Histidine
    'CAT' => 'H',    # Histidine
    'CAA' => 'Q',    # Glutamine
    'CAG' => 'Q',    # Glutamine
    'CGA' => 'R',    # Arginine
    'CGC' => 'R',    # Arginine
    'CGG' => 'R',    # Arginine
    'CGT' => 'R',    # Arginine
    'ATA' => 'I',    # Isoleucine
    'ATC' => 'I',    # Isoleucine
    'ATT' => 'I',    # Isoleucine
    'ATG' => 'M',    # Methionine
    'ACA' => 'T',    # Threonine
    'ACC' => 'T',    # Threonine
    'ACG' => 'T',    # Threonine
    'ACT' => 'T',    # Threonine
    'AAC' => 'N',    # Asparagine
    'AAT' => 'N',    # Asparagine
    'AAA' => 'K',    # Lysine
    'AAG' => 'K',    # Lysine
    'AGC' => 'S',    # Serine
    'AGT' => 'S',    # Serine
    'AGA' => 'R',    # Arginine
    'AGG' => 'R',    # Arginine
    'GTA' => 'V',    # Valine
    'GTC' => 'V',    # Valine
    'GTG' => 'V',    # Valine
    'GTT' => 'V',    # Valine
    'GCA' => 'A',    # Alanine
    'GCC' => 'A',    # Alanine
    'GCG' => 'A',    # Alanine
    'GCT' => 'A',    # Alanine
    'GAC' => 'D',    # Aspartic Acid
    'GAT' => 'D',    # Aspartic Acid
    'GAA' => 'E',    # Glutamic Acid
    'GAG' => 'E',    # Glutamic Acid
    'GGA' => 'G',    # Glycine
    'GGC' => 'G',    # Glycine
    'GGG' => 'G',    # Glycine
    'GGT' => 'G',    # Glycine
    );

    if(exists $genetic_code{$codon}) {
        return $genetic_code{$codon};
    }else{
        print STDERR "Bad codon: $codon\n";
        exit;
    }
}



1;

