Changeset 22809
- Timestamp:
- 08/09/10 21:40:39 (17 months ago)
- File:
-
- 1 edited
-
trunk/bio/scripts/kegg_to_items (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/bio/scripts/kegg_to_items
r17402 r22809 3 3 use warnings; 4 4 use strict; 5 use lib '/../intermine/perl/lib'; 5 6 6 BEGIN { 7 push (@INC, ($0 =~ m:(.*)/.*:)[0] . '/../../intermine/perl/lib'); 7 use XML::Writer; 8 use InterMine::Model; 9 use InterMine::ItemFactory; 10 11 if (@ARGV != 4) { 12 die "usage: $0 model_file kegg_map_title_file kegg_gene_map out_file\n"; 8 13 } 9 14 10 use XML::Writer; 11 use InterMine::Item; 12 use InterMine::ItemFactory; 13 use InterMine::Model; 14 15 if (@ARGV != 3) { 16 die "usage: $0 model_file kegg_map_title_file kegg_gene_map\n"; 17 } 18 19 my ($model_file, $map_title_filename, $gene_map_filename) = @ARGV; 15 my ($model_file, $map_title_filename, $gene_map_filename, $outfile) = @ARGV; 20 16 21 17 my $data_source = 'Kegg'; 22 my $taxon_id = 36329; 23 24 my @items = (); 25 18 my $taxon_id = 36329; 26 19 27 20 # The item factory needs the model so that it can check that new objects have 28 21 # valid classnames and fields 29 my $model = new InterMine::Model(file => $model_file);30 my $item_factory = new InterMine::ItemFactory(model => $model);22 my $model = InterMine::Model->new(file => $model_file); 23 my $item_factory = InterMine::ItemFactory->new(model => $model); 31 24 25 my (@items, %pathway_with); 32 26 33 # objects that the new Pathway and Gene objects will refer to 34 my $data_source_item; 35 my $data_set_item; 36 my $org_item; 37 38 39 # helper method that makes a new object of a particular class and saves it in 40 # the @items array 41 sub make_item 42 { 43 my $implements = shift; 44 my $item = $item_factory->make_item(implements => $implements); 45 push @items, $item; 46 if ($item->valid_field('organism')) { 47 $item->set('organism', $org_item); 48 } 49 if ($item->valid_field('dataSets') && $implements ne 'DataSource') { 50 $item->set('dataSets', [$data_set_item]); 51 } 52 if ($item->valid_field('dataSource')) { 53 $item->set('dataSource', $data_source_item); 54 } 55 return $item; 56 } 57 58 # make the objects that the new Pathway and Gene objects will refer to 59 $data_source_item = make_item("DataSource"); 60 $data_source_item->set('name', $data_source); 61 62 $data_set_item = make_item("DataSet"); 63 $data_set_item->set('title', "$data_source data set for taxon id: $taxon_id"); 64 65 $org_item = make_item("Organism"); 66 $org_item->set("taxonId", $taxon_id); 67 68 69 # a map from pathway id to pathway item 70 my %pathways = (); 71 27 my $datasource_item = $item_factory->make_item( 28 'DataSource', 29 name => $data_source, 30 ); 31 my $dataset_item = $item_factory->make_item( 32 'DataSet', 33 name => $data_source . ' data set for taxon id: ' . $taxon_id, 34 dataSource => $datasource_item, 35 ); 36 my $org_item = $item_factory->make_item( 37 'Organism', 38 taxonId => $taxon_id, 39 dataSets => [$dataset_item], 40 ); 41 push @items, $datasource_item, $org_item, $dataset_item; 72 42 73 43 # read the map title file into %pathways 74 44 75 open my $map_title_file, '<', $map_title_filename76 or die "can't open $map_title_filename for reading: $!\n";45 open(my $input, '<', $map_title_filename) 46 or die "Could not open $map_title_filename for reading, $!"; 77 47 78 while (my $line = <$map_title_file>) { 79 chomp $line; 80 81 my $pathway_item = make_item("Pathway"); 82 83 my ($pathway_identifier, $pathway_title) = split /\t/, $line; 84 85 $pathway_item->set('identifier', $pathway_identifier); 86 $pathway_item->set('name', $pathway_title); 87 88 $pathways{$pathway_identifier} = $pathway_item; 89 } 90 91 close $map_title_file or die "can't close $map_title_filename\n"; 92 48 while (<$input>) { 49 chomp; 50 my ($id, $title) = split(/\t/); 51 my $item = $item_factory->make_item( 52 'Pathway', 53 identifier => $id, 54 name => $title, 55 dataSets => [$dataset_item], 56 ); 57 push @items, $item; 58 $pathway_with{$id} = $item; 59 } 60 close($input) or die "Could not close $map_title_filename, $!"; 93 61 94 62 # read the gene map file and create item objects 95 63 96 open my $gene_map_file, '<', $gene_map_filename97 or die "can't open $gene_map_filename for reading: $!\n";64 open(my $input, '<', $gene_map_filename) 65 or die "Could not open $gene_map_filename for reading, $!"; 98 66 99 while (my $line = <$gene_map_file>) { 100 chomp $line; 67 while (<$input>) { 68 chomp; 69 my ($id, $pathways) = split(/\t/); 101 70 102 # make_item() automatically adds the new item to the @items array and sets 103 # the organism reference if appropriate 104 my $gene_item = make_item("Gene"); 105 106 my ($gene_name, $pathways_string) = split /\t/, $line; 107 108 $gene_item->set('primaryIdentifier', $gene_name); 109 110 my @pathway_identifiers = split / /, $pathways_string; 111 112 $gene_item->set('pathways', [map {$pathways{$_}} @pathway_identifiers]); 113 } 114 71 my @pathway_items = @pathway_with{split(/\s/, $pathways)}; 72 73 my $item = $item_factory->make_item( 74 'Gene', 75 primaryIdentifier => $id, 76 organism => $org_item, 77 pathways => [@pathway_items], 78 dataSets => [$dataset_item], 79 ); 80 push @items, $item; 81 } 82 close($input) or die "Could not close $gene_map_filename, $!"; 115 83 116 84 # write the items 117 my $writer = new XML::Writer(DATA_MODE => 1, DATA_INDENT => 3); 85 open(my $output, '>', $out_file) 86 or die "Cannot open $outfile for writing, $!"; 87 88 my $writer = new XML::Writer( 89 DATA_MODE => 1, 90 DATA_INDENT => 3, 91 OUTPUT => $output, 92 ); 118 93 119 94 $writer->startTag("items"); 120 for my $item (@items) { 121 $item->as_xml($writer); 122 } 95 $_->as_xml($writer) for @items; 123 96 $writer->endTag("items"); 97 98 close($output) or die "Cannot close $outfile, $!"; 99 exit;
Note: See TracChangeset
for help on using the changeset viewer.
