| 1 | package org.intermine.bio.util; |
|---|
| 2 | |
|---|
| 3 | /* |
|---|
| 4 | * Copyright (C) 2002-2011 FlyMine |
|---|
| 5 | * |
|---|
| 6 | * This code may be freely distributed and modified under the |
|---|
| 7 | * terms of the GNU Lesser General Public Licence. This should |
|---|
| 8 | * be distributed with the code. See the LICENSE file for more |
|---|
| 9 | * information or http://www.gnu.org/copyleft/lesser.html. |
|---|
| 10 | * |
|---|
| 11 | */ |
|---|
| 12 | |
|---|
| 13 | |
|---|
| 14 | import java.util.ArrayList; |
|---|
| 15 | import java.util.Collection; |
|---|
| 16 | import java.util.Iterator; |
|---|
| 17 | |
|---|
| 18 | import org.intermine.api.profile.InterMineBag; |
|---|
| 19 | import org.intermine.metadata.Model; |
|---|
| 20 | import org.intermine.objectstore.ObjectStore; |
|---|
| 21 | import org.intermine.objectstore.query.BagConstraint; |
|---|
| 22 | import org.intermine.objectstore.query.ConstraintOp; |
|---|
| 23 | import org.intermine.objectstore.query.ConstraintSet; |
|---|
| 24 | import org.intermine.objectstore.query.ContainsConstraint; |
|---|
| 25 | import org.intermine.objectstore.query.Query; |
|---|
| 26 | import org.intermine.objectstore.query.QueryClass; |
|---|
| 27 | import org.intermine.objectstore.query.QueryExpression; |
|---|
| 28 | import org.intermine.objectstore.query.QueryField; |
|---|
| 29 | import org.intermine.objectstore.query.QueryObjectReference; |
|---|
| 30 | import org.intermine.objectstore.query.Results; |
|---|
| 31 | import org.intermine.objectstore.query.ResultsRow; |
|---|
| 32 | |
|---|
| 33 | /** |
|---|
| 34 | * Utility methods for the flymine package. |
|---|
| 35 | * @author Julie Sullivan |
|---|
| 36 | */ |
|---|
| 37 | public final class BioUtil |
|---|
| 38 | { |
|---|
| 39 | |
|---|
| 40 | private static final OrganismRepository OR = OrganismRepository.getOrganismRepository(); |
|---|
| 41 | |
|---|
| 42 | private BioUtil() { |
|---|
| 43 | // don't |
|---|
| 44 | } |
|---|
| 45 | |
|---|
| 46 | /** |
|---|
| 47 | * For a bag of objects, returns a list of organisms |
|---|
| 48 | * @param os ObjectStore |
|---|
| 49 | * @param lowercase if true, the organism names will be returned in lowercase |
|---|
| 50 | * @param bag InterMineBag |
|---|
| 51 | * @return collection of organism names |
|---|
| 52 | */ |
|---|
| 53 | public static Collection<String> getOrganisms(ObjectStore os, InterMineBag bag, |
|---|
| 54 | boolean lowercase) { |
|---|
| 55 | return getOrganisms(os, bag, lowercase, "name"); |
|---|
| 56 | } |
|---|
| 57 | |
|---|
| 58 | /** |
|---|
| 59 | * For a bag of objects, returns a list of organisms. |
|---|
| 60 | * @param os ObjectStore |
|---|
| 61 | * @param lowercase if true, the organism names will be returned in lowercase |
|---|
| 62 | * @param bag InterMineBag |
|---|
| 63 | * @param organismFieldName eg. name, shortName or taxonId |
|---|
| 64 | * @return collection of organism names |
|---|
| 65 | */ |
|---|
| 66 | public static Collection<String> getOrganisms(ObjectStore os, InterMineBag bag, |
|---|
| 67 | boolean lowercase, String organismFieldName) { |
|---|
| 68 | |
|---|
| 69 | Query q = new Query(); |
|---|
| 70 | Model model = os.getModel(); |
|---|
| 71 | QueryClass qcObject = null; |
|---|
| 72 | try { |
|---|
| 73 | String className = model.getPackageName() + "." + bag.getType(); |
|---|
| 74 | qcObject = new QueryClass(Class.forName(className)); |
|---|
| 75 | } catch (ClassNotFoundException e) { |
|---|
| 76 | return null; |
|---|
| 77 | } |
|---|
| 78 | QueryClass qcOrganism |
|---|
| 79 | = new QueryClass(model.getClassDescriptorByName("Organism").getType()); |
|---|
| 80 | |
|---|
| 81 | QueryField qfOrganismName = new QueryField(qcOrganism, "name"); |
|---|
| 82 | |
|---|
| 83 | QueryField qfGeneId = new QueryField(qcObject, "id"); |
|---|
| 84 | |
|---|
| 85 | q.addFrom(qcObject); |
|---|
| 86 | q.addFrom(qcOrganism); |
|---|
| 87 | |
|---|
| 88 | if ("name".equals(organismFieldName)) { |
|---|
| 89 | q.addToSelect(qfOrganismName); |
|---|
| 90 | q.addToOrderBy(qfOrganismName); |
|---|
| 91 | } else if ("taxonId".equals(organismFieldName) || "shortName".equals(organismFieldName)) { |
|---|
| 92 | // will either be taxonId or shortname |
|---|
| 93 | QueryField qfOrganism = new QueryField(qcOrganism, organismFieldName); |
|---|
| 94 | q.addToSelect(qfOrganism); |
|---|
| 95 | q.addToOrderBy(qfOrganism); |
|---|
| 96 | } else { |
|---|
| 97 | throw new RuntimeException(organismFieldName + " is not a valid field for Organism"); |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | ConstraintSet cs = new ConstraintSet(ConstraintOp.AND); |
|---|
| 101 | BagConstraint bc = new BagConstraint(qfGeneId, ConstraintOp.IN, bag.getOsb()); |
|---|
| 102 | cs.addConstraint(bc); |
|---|
| 103 | |
|---|
| 104 | QueryObjectReference qr = new QueryObjectReference(qcObject, "organism"); |
|---|
| 105 | ContainsConstraint cc = new ContainsConstraint(qr, ConstraintOp.CONTAINS, qcOrganism); |
|---|
| 106 | cs.addConstraint(cc); |
|---|
| 107 | |
|---|
| 108 | q.setConstraint(cs); |
|---|
| 109 | |
|---|
| 110 | Results r = os.execute(q); |
|---|
| 111 | Iterator<ResultsRow> it = (Iterator) r.iterator(); |
|---|
| 112 | Collection<String> orgs = new ArrayList(); |
|---|
| 113 | |
|---|
| 114 | while (it.hasNext()) { |
|---|
| 115 | ResultsRow rr = it.next(); |
|---|
| 116 | Object org = rr.get(0); |
|---|
| 117 | if (org != null) { |
|---|
| 118 | if (lowercase) { |
|---|
| 119 | orgs.add(org.toString().toLowerCase()); |
|---|
| 120 | } else { |
|---|
| 121 | orgs.add(org.toString()); |
|---|
| 122 | } |
|---|
| 123 | } |
|---|
| 124 | } |
|---|
| 125 | return orgs; |
|---|
| 126 | } |
|---|
| 127 | |
|---|
| 128 | |
|---|
| 129 | /** |
|---|
| 130 | * Return a list of chromosomes for specified organism |
|---|
| 131 | * @param os ObjectStore |
|---|
| 132 | * @param organisms Organism names. Assumes they are lowercase. |
|---|
| 133 | * @param lowercase if true returns lowercase chromosome names. the precomputed tables indexes |
|---|
| 134 | * are all lowercase, so the chromosome names need to be lowercase when used in queries |
|---|
| 135 | * @return collection of chromosome names |
|---|
| 136 | */ |
|---|
| 137 | @SuppressWarnings("unchecked") |
|---|
| 138 | public static Collection<String> getChromosomes(ObjectStore os, Collection<String> organisms, |
|---|
| 139 | boolean lowercase) { |
|---|
| 140 | Model model = os.getModel(); |
|---|
| 141 | |
|---|
| 142 | final String dmel = "drosophila melanogaster"; |
|---|
| 143 | ArrayList<String> chromosomes = new ArrayList(); |
|---|
| 144 | |
|---|
| 145 | if (organisms.contains("homo sapiens")) { |
|---|
| 146 | chromosomes.add("1"); |
|---|
| 147 | chromosomes.add("2"); |
|---|
| 148 | chromosomes.add("3"); |
|---|
| 149 | chromosomes.add("4"); |
|---|
| 150 | chromosomes.add("5"); |
|---|
| 151 | chromosomes.add("6"); |
|---|
| 152 | chromosomes.add("7"); |
|---|
| 153 | chromosomes.add("8"); |
|---|
| 154 | chromosomes.add("9"); |
|---|
| 155 | chromosomes.add("10"); |
|---|
| 156 | chromosomes.add("11"); |
|---|
| 157 | chromosomes.add("12"); |
|---|
| 158 | chromosomes.add("13"); |
|---|
| 159 | chromosomes.add("14"); |
|---|
| 160 | chromosomes.add("15"); |
|---|
| 161 | chromosomes.add("16"); |
|---|
| 162 | chromosomes.add("17"); |
|---|
| 163 | chromosomes.add("18"); |
|---|
| 164 | chromosomes.add("19"); |
|---|
| 165 | chromosomes.add("20"); |
|---|
| 166 | chromosomes.add("21"); |
|---|
| 167 | chromosomes.add("22"); |
|---|
| 168 | if (lowercase) { |
|---|
| 169 | chromosomes.add("x"); |
|---|
| 170 | chromosomes.add("y"); |
|---|
| 171 | } else { |
|---|
| 172 | chromosomes.add("X"); |
|---|
| 173 | chromosomes.add("Y"); |
|---|
| 174 | } |
|---|
| 175 | if (organisms.size() == 1) { |
|---|
| 176 | return chromosomes; |
|---|
| 177 | } |
|---|
| 178 | organisms.remove("homo sapiens"); |
|---|
| 179 | } |
|---|
| 180 | |
|---|
| 181 | // TODO this may well go away once chromosomes sorted out in #1186 |
|---|
| 182 | if (organisms.contains(dmel)) { |
|---|
| 183 | if (lowercase) { |
|---|
| 184 | chromosomes.add("2l"); |
|---|
| 185 | chromosomes.add("2r"); |
|---|
| 186 | chromosomes.add("3l"); |
|---|
| 187 | chromosomes.add("3r"); |
|---|
| 188 | chromosomes.add("4"); |
|---|
| 189 | chromosomes.add("u"); |
|---|
| 190 | chromosomes.add("x"); |
|---|
| 191 | } else { |
|---|
| 192 | chromosomes.add("2L"); |
|---|
| 193 | chromosomes.add("2R"); |
|---|
| 194 | chromosomes.add("3L"); |
|---|
| 195 | chromosomes.add("3R"); |
|---|
| 196 | chromosomes.add("4"); |
|---|
| 197 | chromosomes.add("U"); |
|---|
| 198 | chromosomes.add("X"); |
|---|
| 199 | } |
|---|
| 200 | if (organisms.size() == 1) { |
|---|
| 201 | return chromosomes; |
|---|
| 202 | } |
|---|
| 203 | organisms.remove(dmel); |
|---|
| 204 | } |
|---|
| 205 | |
|---|
| 206 | Query q = new Query(); |
|---|
| 207 | |
|---|
| 208 | QueryClass qcChromosome |
|---|
| 209 | = new QueryClass(model.getClassDescriptorByName("Chromosome").getType()); |
|---|
| 210 | QueryClass qcOrganism |
|---|
| 211 | = new QueryClass(model.getClassDescriptorByName("Organism").getType()); |
|---|
| 212 | QueryField qfChromosome = new QueryField(qcChromosome, "primaryIdentifier"); |
|---|
| 213 | QueryField qfOrganismName = new QueryField(qcOrganism, "name"); |
|---|
| 214 | q.addFrom(qcChromosome); |
|---|
| 215 | q.addFrom(qcOrganism); |
|---|
| 216 | |
|---|
| 217 | q.addToSelect(qfChromosome); |
|---|
| 218 | |
|---|
| 219 | ConstraintSet cs = new ConstraintSet(ConstraintOp.AND); |
|---|
| 220 | |
|---|
| 221 | QueryObjectReference qr = new QueryObjectReference(qcChromosome, "organism"); |
|---|
| 222 | ContainsConstraint cc = new ContainsConstraint(qr, ConstraintOp.CONTAINS, qcOrganism); |
|---|
| 223 | cs.addConstraint(cc); |
|---|
| 224 | |
|---|
| 225 | QueryExpression qf = new QueryExpression(QueryExpression.LOWER, qfOrganismName); |
|---|
| 226 | BagConstraint bc = new BagConstraint(qf, ConstraintOp.IN, organisms); |
|---|
| 227 | cs.addConstraint(bc); |
|---|
| 228 | |
|---|
| 229 | q.setConstraint(cs); |
|---|
| 230 | |
|---|
| 231 | q.addToOrderBy(qfChromosome); |
|---|
| 232 | |
|---|
| 233 | Results r = os.execute(q); |
|---|
| 234 | Iterator it = r.iterator(); |
|---|
| 235 | |
|---|
| 236 | while (it.hasNext()) { |
|---|
| 237 | ResultsRow rr = (ResultsRow) it.next(); |
|---|
| 238 | String chromosome = (String) rr.get(0); |
|---|
| 239 | if (lowercase) { |
|---|
| 240 | chromosome.toLowerCase(); |
|---|
| 241 | } |
|---|
| 242 | chromosomes.add(chromosome); |
|---|
| 243 | } |
|---|
| 244 | return chromosomes; |
|---|
| 245 | } |
|---|
| 246 | |
|---|
| 247 | /** |
|---|
| 248 | * Get the extra attributes needed for the DataSetLoader |
|---|
| 249 | * @param os the objectstore |
|---|
| 250 | * @param bag the bag |
|---|
| 251 | * @return a collection of strings to pass to the datasetloader |
|---|
| 252 | */ |
|---|
| 253 | public static Collection<String> getExtraAttributes(ObjectStore os, InterMineBag bag) { |
|---|
| 254 | return getOrganisms(os, bag, false); |
|---|
| 255 | } |
|---|
| 256 | |
|---|
| 257 | /** |
|---|
| 258 | * Looks in the organism repo for the taxon ID provided. If the taxon ID is not there, it looks |
|---|
| 259 | * for strains that use that ID. Will return NULL if there is no strain and no taxon ID in |
|---|
| 260 | * the organism data. |
|---|
| 261 | * |
|---|
| 262 | * @param taxonId original taxon ID |
|---|
| 263 | * @return taxonId for organism, not the strain |
|---|
| 264 | */ |
|---|
| 265 | public static Integer replaceStrain(Integer taxonId) { |
|---|
| 266 | OrganismData od = OR.getOrganismDataByTaxon(taxonId); |
|---|
| 267 | if (od == null) { |
|---|
| 268 | return taxonId; |
|---|
| 269 | } |
|---|
| 270 | return new Integer(od.getTaxonId()); |
|---|
| 271 | } |
|---|
| 272 | } |
|---|