NOTE! This is a read-only copy of the ENCODE2 wiki.
Please go to the ENCODE3 wiki for current information.

Proserver example 1

From Encode2 Wiki
Jump to: navigation, search
#
# Example SourceAdaptor for Proserver DAS sources
# complying to GENCODE format.
#
# Example 1: Reading data from GFF file
#
# For questions contact Felix Kokocinski (fsk@sanger.ac.uk)
#

package Bio::Das::ProServer::SourceAdaptor::example;

use strict;
#Proserver module:
use base qw(Bio::Das::ProServer::SourceAdaptor);
#for the datestamp format:
use Date::Format;


# General initialization function
# Set metadata such as the commands supported by this source.
sub init {
  my ($self) = @_;
  $self->{'capabilities'} = { 'features' => '1.0' };
}


# General function for "features" DAS command
# Gather the features annotated in a given segment of sequence.
sub build_features {
  my ($self, $args) = @_;

  my $segment = $args->{'segment'}; # The query segment ID
  my $start   = $args->{'start'};   # The query start position (optional)
  my $end     = $args->{'end'};     # The query end position (optional)

  my @features = ();
  my %group_start;
  my %group_end;
  my %group_count;

  #category controlled vocabulary:
  #id: ECO:00000067; name: inferred from electronic annotation
  my $typecategory = "ECO:00000067";

  #read data from gff file
  open FH, '<', '/Users/fsk/great_data/annotation.gff'
    or die "Unable to open data file";

  while (defined (my $line = <FH>)) {

    chomp $line;
    my ($f_seg, $method, $type, $f_start, $f_end, $score, $strand, $phase, $add) = split /\t/, $line;

    #get extra info from last column
    my ($f_id, $stamp) = split(";", $add);

    #replace unwanted characters
    $f_id =~ s|\"||g;
    $f_seg =~ s/^chr//;

    #create group attributes for new set of features
    if($type eq "mRNA"){
      $group_start{$f_id} = $f_start;
      $group_end{$f_id}   = $f_end;
      $group_count{$f_id};
      next;
    }

    #convert datestamp from machine time format into desired format (2006-04-07T15:15:58+0100)
    my $modstamp = time2str("%Y-%m-%dT%H:%M:%S%z", $stamp);

    #index for unique feature id
    $group_count{$f_id}++;

    #get the features overlapping this genomic region only
    if (($f_seg eq $segment) && ($f_start <= $end && $f_end >= $start) ) {

      #create individual feature
      my $feature = {
		     #unique id
		     'id'           => $f_id."_".$group_count{$f_id},
		     #genomic start
		     'start'        => $f_start,
		     #genomic end
		     'end'          => $f_end,
		     #strand: +/-/0
		     'ori'          => $strand,
		     #name of this method/annotation
		     'method'       => $method,
		     #type must be exon, intron, etc.
		     'type'         => $type,
		     #category type: ECO id
		     'typecategory' => $typecategory,
		     #phase: 0/1/2/-
		     'phase'        => '-',
		     #score, 0 if n.a.
		     'score'        => 0,
		     #note for various fields, key=value pairs
		     'note'         => [
				       'lastmod='.$modstamp,
				       ],
		     #group of features
		     'group_id'     => $f_id,
		     'grouptype'    => $method."_prediction",
		     'groupnote'    => 'Note='.$group_start{$f_id}."-".$group_end{$f_id},
		    };

      #store in feature array
      push @features, $feature;
    }

  }
  close FH or warn "Problem closing data file";

  #return entire features array
  return @features;
}


1;