Changeset 22809


Ignore:
Timestamp:
08/09/10 21:40:39 (17 months ago)
Author:
alex
Message:

Edited version of old kegg script - with less obfuscation (such as helper methods) and a clear construction syntax

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/bio/scripts/kegg_to_items

    r17402 r22809  
    33use warnings; 
    44use strict; 
     5use lib '/../intermine/perl/lib'; 
    56 
    6 BEGIN { 
    7   push (@INC, ($0 =~ m:(.*)/.*:)[0] . '/../../intermine/perl/lib'); 
     7use XML::Writer; 
     8use InterMine::Model; 
     9use InterMine::ItemFactory; 
     10 
     11if (@ARGV != 4) { 
     12  die "usage: $0 model_file kegg_map_title_file kegg_gene_map out_file\n"; 
    813} 
    914 
    10 use XML::Writer; 
    11 use InterMine::Item; 
    12 use InterMine::ItemFactory; 
    13 use InterMine::Model; 
    14  
    15 if (@ARGV != 3) { 
    16   die "usage: $0 model_file kegg_map_title_file kegg_gene_map\n"; 
    17 } 
    18  
    19 my ($model_file, $map_title_filename, $gene_map_filename) = @ARGV; 
     15my ($model_file, $map_title_filename, $gene_map_filename, $outfile) = @ARGV; 
    2016 
    2117my $data_source = 'Kegg'; 
    22 my $taxon_id = 36329; 
    23  
    24 my @items = (); 
    25  
     18my $taxon_id    = 36329; 
    2619 
    2720# The item factory needs the model so that it can check that new objects have 
    2821# valid classnames and fields 
    29 my $model = new InterMine::Model(file => $model_file); 
    30 my $item_factory = new InterMine::ItemFactory(model => $model); 
     22my $model        = InterMine::Model->new(file => $model_file); 
     23my $item_factory = InterMine::ItemFactory->new(model => $model); 
    3124 
     25my (@items, %pathway_with); 
    3226 
    33 # objects that the new Pathway and Gene objects will refer to 
    34 my $data_source_item; 
    35 my $data_set_item; 
    36 my $org_item; 
    37  
    38  
    39 # helper method that makes a new object of a particular class and saves it in  
    40 # the @items array 
    41 sub make_item 
    42 { 
    43   my $implements = shift; 
    44   my $item = $item_factory->make_item(implements => $implements); 
    45   push @items, $item; 
    46   if ($item->valid_field('organism')) { 
    47     $item->set('organism', $org_item); 
    48   } 
    49   if ($item->valid_field('dataSets') && $implements ne 'DataSource') { 
    50     $item->set('dataSets', [$data_set_item]); 
    51   } 
    52   if ($item->valid_field('dataSource')) { 
    53     $item->set('dataSource', $data_source_item); 
    54   } 
    55   return $item; 
    56 } 
    57  
    58 # make the objects that the new Pathway and Gene objects will refer to 
    59 $data_source_item = make_item("DataSource"); 
    60 $data_source_item->set('name', $data_source); 
    61  
    62 $data_set_item = make_item("DataSet"); 
    63 $data_set_item->set('title', "$data_source data set for taxon id: $taxon_id"); 
    64  
    65 $org_item = make_item("Organism"); 
    66 $org_item->set("taxonId", $taxon_id); 
    67  
    68  
    69 # a map from pathway id to pathway item 
    70 my %pathways = (); 
    71  
     27my $datasource_item = $item_factory->make_item( 
     28    'DataSource', 
     29    name => $data_source, 
     30); 
     31my $dataset_item = $item_factory->make_item( 
     32    'DataSet', 
     33    name       => $data_source . ' data set for taxon id: ' . $taxon_id, 
     34    dataSource => $datasource_item, 
     35); 
     36my $org_item = $item_factory->make_item( 
     37    'Organism', 
     38    taxonId  => $taxon_id, 
     39    dataSets => [$dataset_item], 
     40); 
     41push @items, $datasource_item, $org_item, $dataset_item; 
    7242 
    7343# read the map title file into %pathways 
    7444 
    75 open my $map_title_file, '<', $map_title_filename 
    76   or die "can't open $map_title_filename for reading: $!\n"; 
     45open(my $input, '<', $map_title_filename)  
     46    or die "Could not open $map_title_filename for reading, $!"; 
    7747 
    78 while (my $line = <$map_title_file>) { 
    79   chomp $line; 
    80  
    81   my $pathway_item = make_item("Pathway"); 
    82  
    83   my ($pathway_identifier, $pathway_title) = split /\t/, $line; 
    84  
    85   $pathway_item->set('identifier', $pathway_identifier); 
    86   $pathway_item->set('name', $pathway_title); 
    87  
    88   $pathways{$pathway_identifier} = $pathway_item; 
    89 } 
    90  
    91 close $map_title_file or die "can't close $map_title_filename\n"; 
    92  
     48while (<$input>) { 
     49    chomp; 
     50    my ($id, $title) = split(/\t/); 
     51    my $item = $item_factory->make_item( 
     52       'Pathway', 
     53       identifier => $id, 
     54       name       => $title, 
     55       dataSets   => [$dataset_item], 
     56    ); 
     57    push @items, $item; 
     58    $pathway_with{$id} = $item; 
     59}    
     60close($input) or die "Could not close $map_title_filename, $!"; 
    9361 
    9462# read the gene map file and create item objects 
    9563 
    96 open my $gene_map_file, '<', $gene_map_filename 
    97   or die "can't open $gene_map_filename for reading: $!\n"; 
     64open(my $input, '<', $gene_map_filename)  
     65    or die "Could not open $gene_map_filename for reading, $!"; 
    9866 
    99 while (my $line = <$gene_map_file>) { 
    100   chomp $line; 
     67while (<$input>) { 
     68    chomp; 
     69    my ($id, $pathways) = split(/\t/); 
    10170 
    102   # make_item() automatically adds the new item to the @items array and sets  
    103   # the organism reference if appropriate 
    104   my $gene_item = make_item("Gene"); 
    105  
    106   my ($gene_name, $pathways_string) = split /\t/, $line; 
    107  
    108   $gene_item->set('primaryIdentifier', $gene_name); 
    109  
    110   my @pathway_identifiers = split / /, $pathways_string; 
    111  
    112   $gene_item->set('pathways', [map {$pathways{$_}} @pathway_identifiers]); 
    113 } 
    114  
     71    my @pathway_items = @pathway_with{split(/\s/, $pathways)}; 
     72     
     73    my $item = $item_factory->make_item( 
     74       'Gene', 
     75       primaryIdentifier => $id, 
     76       organism          => $org_item, 
     77       pathways          => [@pathway_items], 
     78       dataSets          => [$dataset_item], 
     79    ); 
     80    push @items, $item; 
     81}    
     82close($input) or die "Could not close $gene_map_filename, $!"; 
    11583 
    11684# write the items 
    117 my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 3); 
     85open(my $output, '>', $out_file)  
     86    or die "Cannot open $outfile for writing, $!"; 
     87 
     88my $writer = new XML::Writer( 
     89    DATA_MODE   => 1,  
     90    DATA_INDENT => 3,  
     91    OUTPUT      => $output, 
     92); 
    11893 
    11994$writer->startTag("items"); 
    120 for my $item (@items) { 
    121   $item->as_xml($writer); 
    122 } 
     95$_->as_xml($writer) for @items; 
    12396$writer->endTag("items"); 
     97 
     98close($output) or die "Cannot close $outfile, $!"; 
     99exit; 
Note: See TracChangeset for help on using the changeset viewer.