| 1 | package org.intermine.bio.dataconversion; |
|---|
| 2 | |
|---|
| 3 | /* |
|---|
| 4 | * Copyright (C) 2002-2011 FlyMine |
|---|
| 5 | * |
|---|
| 6 | * This code may be freely distributed and modified under the |
|---|
| 7 | * terms of the GNU Lesser General Public Licence. This should |
|---|
| 8 | * be distributed with the code. See the LICENSE file for more |
|---|
| 9 | * information or http://www.gnu.org/copyleft/lesser.html. |
|---|
| 10 | * |
|---|
| 11 | */ |
|---|
| 12 | |
|---|
| 13 | import java.util.ArrayList; |
|---|
| 14 | import java.util.HashMap; |
|---|
| 15 | import java.util.List; |
|---|
| 16 | import java.util.Map; |
|---|
| 17 | |
|---|
| 18 | import org.intermine.bio.util.OrganismData; |
|---|
| 19 | import org.intermine.objectstore.ObjectStoreException; |
|---|
| 20 | import org.intermine.xml.full.Item; |
|---|
| 21 | import org.intermine.xml.full.ReferenceList; |
|---|
| 22 | |
|---|
| 23 | import java.sql.Connection; |
|---|
| 24 | import java.sql.ResultSet; |
|---|
| 25 | import java.sql.SQLException; |
|---|
| 26 | import java.sql.Statement; |
|---|
| 27 | |
|---|
| 28 | import org.apache.log4j.Logger; |
|---|
| 29 | import org.apache.commons.lang.StringUtils; |
|---|
| 30 | |
|---|
| 31 | /** |
|---|
| 32 | * A ChadoProcessor for the chado stock module. |
|---|
| 33 | * @author Kim Rutherford |
|---|
| 34 | */ |
|---|
| 35 | public class StockProcessor extends ChadoProcessor |
|---|
| 36 | { |
|---|
| 37 | private static final Logger LOG = Logger.getLogger(SequenceProcessor.class); |
|---|
| 38 | private Map<String, Item> stockItems = new HashMap<String, Item>(); |
|---|
| 39 | |
|---|
| 40 | /** |
|---|
| 41 | * Create a new ChadoProcessor |
|---|
| 42 | * @param chadoDBConverter the Parent ChadoDBConverter |
|---|
| 43 | */ |
|---|
| 44 | public StockProcessor(ChadoDBConverter chadoDBConverter) { |
|---|
| 45 | super(chadoDBConverter); |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | /** |
|---|
| 49 | * {@inheritDoc} |
|---|
| 50 | */ |
|---|
| 51 | @Override |
|---|
| 52 | public void process(Connection connection) throws Exception { |
|---|
| 53 | processSocks(connection); |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | /** |
|---|
| 57 | * Process the stocks and genotypes tables in a chado database |
|---|
| 58 | * @param connection |
|---|
| 59 | */ |
|---|
| 60 | private void processSocks(Connection connection) |
|---|
| 61 | throws SQLException, ObjectStoreException { |
|---|
| 62 | Map<Integer, FeatureData> features = getFeatures(); |
|---|
| 63 | |
|---|
| 64 | ResultSet res = getStocksResultSet(connection); |
|---|
| 65 | int count = 0; |
|---|
| 66 | Integer lastFeatureId = null; |
|---|
| 67 | List<Item> stocks = new ArrayList<Item>(); |
|---|
| 68 | while (res.next()) { |
|---|
| 69 | Integer featureId = new Integer(res.getInt("feature_id")); |
|---|
| 70 | if (lastFeatureId != null && !featureId.equals(lastFeatureId)) { |
|---|
| 71 | storeStocks(features, lastFeatureId, stocks); |
|---|
| 72 | stocks = new ArrayList<Item>(); |
|---|
| 73 | } |
|---|
| 74 | if (!features.containsKey(featureId)) { |
|---|
| 75 | // probably an allele of an unlocated genes |
|---|
| 76 | continue; |
|---|
| 77 | } |
|---|
| 78 | |
|---|
| 79 | String stockUniqueName = res.getString("stock_uniquename"); |
|---|
| 80 | String stockDescription = res.getString("stock_description"); |
|---|
| 81 | String stockCenterUniquename = res.getString("stock_center_uniquename"); |
|---|
| 82 | String stockType = res.getString("stock_type_name"); |
|---|
| 83 | Integer organismId = new Integer(res.getInt("stock_organism_id")); |
|---|
| 84 | OrganismData organismData = |
|---|
| 85 | getChadoDBConverter().getChadoIdToOrgDataMap().get(organismId); |
|---|
| 86 | if (organismData == null) { |
|---|
| 87 | throw new RuntimeException("can't get OrganismData for: " + organismId); |
|---|
| 88 | } |
|---|
| 89 | Item organismItem = getChadoDBConverter().getOrganismItem(organismData.getTaxonId()); |
|---|
| 90 | Item stock = makeStock(stockUniqueName, stockDescription, stockType, |
|---|
| 91 | stockCenterUniquename, organismItem); |
|---|
| 92 | stocks.add(stock); |
|---|
| 93 | lastFeatureId = featureId; |
|---|
| 94 | } |
|---|
| 95 | if (lastFeatureId != null) { |
|---|
| 96 | storeStocks(features, lastFeatureId, stocks); |
|---|
| 97 | } |
|---|
| 98 | LOG.info("created " + count + " stocks"); |
|---|
| 99 | res.close(); |
|---|
| 100 | } |
|---|
| 101 | |
|---|
| 102 | private Map<Integer, FeatureData> getFeatures() { |
|---|
| 103 | Class<SequenceProcessor> seqProcessorClass = SequenceProcessor.class; |
|---|
| 104 | SequenceProcessor sequenceProcessor = |
|---|
| 105 | (SequenceProcessor) getChadoDBConverter().findProcessor(seqProcessorClass); |
|---|
| 106 | |
|---|
| 107 | Map<Integer, FeatureData> features = sequenceProcessor.getFeatureMap(); |
|---|
| 108 | return features; |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | private Item makeStock(String uniqueName, String description, String stockType, |
|---|
| 112 | String stockCenterUniqueName, Item organismItem) throws ObjectStoreException { |
|---|
| 113 | if (stockItems.containsKey(uniqueName)) { |
|---|
| 114 | return stockItems.get(uniqueName); |
|---|
| 115 | } |
|---|
| 116 | Item stock = getChadoDBConverter().createItem("Stock"); |
|---|
| 117 | stock.setAttribute("primaryIdentifier", uniqueName); |
|---|
| 118 | stock.setAttribute("secondaryIdentifier", description); |
|---|
| 119 | stock.setAttribute("type", stockType); |
|---|
| 120 | stock.setAttribute("stockCenter", stockCenterUniqueName); |
|---|
| 121 | stock.setReference("organism", organismItem); |
|---|
| 122 | stockItems.put(uniqueName, stock); |
|---|
| 123 | getChadoDBConverter().store(stock); |
|---|
| 124 | return stock; |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | private void storeStocks(Map<Integer, FeatureData> features, Integer lastFeatureId, |
|---|
| 128 | List<Item> stocks) throws ObjectStoreException { |
|---|
| 129 | FeatureData featureData = features.get(lastFeatureId); |
|---|
| 130 | if (featureData == null) { |
|---|
| 131 | throw new RuntimeException("can't find feature data for: " + lastFeatureId); |
|---|
| 132 | } |
|---|
| 133 | Integer intermineObjectId = featureData.getIntermineObjectId(); |
|---|
| 134 | ReferenceList referenceList = new ReferenceList(); |
|---|
| 135 | referenceList.setName("stocks"); |
|---|
| 136 | for (Item stock: stocks) { |
|---|
| 137 | referenceList.addRefId(stock.getIdentifier()); |
|---|
| 138 | } |
|---|
| 139 | getChadoDBConverter().store(referenceList, intermineObjectId); |
|---|
| 140 | } |
|---|
| 141 | |
|---|
| 142 | /** |
|---|
| 143 | * Return the interesting rows from the features table. |
|---|
| 144 | * This is a protected method so that it can be overriden for testing |
|---|
| 145 | * @param connection the db connection |
|---|
| 146 | * @return the SQL result set |
|---|
| 147 | * @throws SQLException if a database problem occurs |
|---|
| 148 | */ |
|---|
| 149 | protected ResultSet getStocksResultSet(Connection connection) |
|---|
| 150 | throws SQLException { |
|---|
| 151 | String organismConstraint = getOrganismConstraint(); |
|---|
| 152 | String orgConstraintForQuery = ""; |
|---|
| 153 | if (!StringUtils.isEmpty(organismConstraint)) { |
|---|
| 154 | orgConstraintForQuery = " AND " + organismConstraint; |
|---|
| 155 | } |
|---|
| 156 | |
|---|
| 157 | String query = |
|---|
| 158 | "SELECT feature.feature_id, stock.uniquename AS stock_uniquename, " |
|---|
| 159 | + " stock.description AS stock_description, type_cvterm.name AS stock_type_name, " |
|---|
| 160 | + " stock.organism_id AS stock_organism_id, " |
|---|
| 161 | + " (SELECT stockcollection.uniquename " |
|---|
| 162 | + " FROM stockcollection, stockcollection_stock join_table " |
|---|
| 163 | + " WHERE stockcollection.stockcollection_id = join_table.stockcollection_id " |
|---|
| 164 | + " AND join_table.stock_id = stock.stock_id) " |
|---|
| 165 | + " AS stock_center_uniquename " |
|---|
| 166 | + " FROM stock_genotype, feature, stock, feature_genotype, cvterm type_cvterm " |
|---|
| 167 | + "WHERE stock.stock_id = stock_genotype.stock_id " |
|---|
| 168 | + "AND feature_genotype.feature_id = feature.feature_id " |
|---|
| 169 | + "AND feature_genotype.genotype_id = stock_genotype.genotype_id " |
|---|
| 170 | + "AND feature.uniquename LIKE 'FBal%' " |
|---|
| 171 | + "AND stock.type_id = type_cvterm.cvterm_id " |
|---|
| 172 | + orgConstraintForQuery + " " |
|---|
| 173 | + "AND stock.organism_id = feature.organism_id " |
|---|
| 174 | + "ORDER BY feature.feature_id"; |
|---|
| 175 | LOG.info("executing: " + query); |
|---|
| 176 | Statement stmt = connection.createStatement(); |
|---|
| 177 | ResultSet res = stmt.executeQuery(query); |
|---|
| 178 | return res; |
|---|
| 179 | } |
|---|
| 180 | |
|---|
| 181 | /** |
|---|
| 182 | * Return a comma separated string containing the organism_ids that with with to query from |
|---|
| 183 | * chado. |
|---|
| 184 | */ |
|---|
| 185 | private String getOrganismIdsString() { |
|---|
| 186 | return StringUtils.join(getChadoDBConverter().getChadoIdToOrgDataMap().keySet(), ", "); |
|---|
| 187 | } |
|---|
| 188 | |
|---|
| 189 | /** |
|---|
| 190 | * Return some SQL that can be included in the WHERE part of query that restricts features |
|---|
| 191 | * by organism. "organism_id" must be selected. |
|---|
| 192 | * @return the SQL |
|---|
| 193 | */ |
|---|
| 194 | protected String getOrganismConstraint() { |
|---|
| 195 | String organismIdsString = getOrganismIdsString(); |
|---|
| 196 | if (StringUtils.isEmpty(organismIdsString)) { |
|---|
| 197 | return ""; |
|---|
| 198 | } |
|---|
| 199 | return "feature.organism_id IN (" + organismIdsString + ")"; |
|---|
| 200 | } |
|---|
| 201 | } |
|---|