| 1 | package org.intermine.bio.dataconversion; |
|---|
| 2 | |
|---|
| 3 | /* |
|---|
| 4 | * Copyright (C) 2002-2011 FlyMine |
|---|
| 5 | * |
|---|
| 6 | * This code may be freely distributed and modified under the |
|---|
| 7 | * terms of the GNU Lesser General Public Licence. This should |
|---|
| 8 | * be distributed with the code. See the LICENSE file for more |
|---|
| 9 | * information or http://www.gnu.org/copyleft/lesser.html. |
|---|
| 10 | * |
|---|
| 11 | */ |
|---|
| 12 | |
|---|
| 13 | import java.util.HashMap; |
|---|
| 14 | import java.util.List; |
|---|
| 15 | import java.util.Map; |
|---|
| 16 | import java.util.regex.Matcher; |
|---|
| 17 | import java.util.regex.Pattern; |
|---|
| 18 | |
|---|
| 19 | import org.apache.log4j.Logger; |
|---|
| 20 | import org.intermine.bio.io.gff3.GFF3Record; |
|---|
| 21 | import org.intermine.metadata.Model; |
|---|
| 22 | import org.intermine.xml.full.Item; |
|---|
| 23 | |
|---|
| 24 | /** |
|---|
| 25 | * A converter/retriever for flyreg GFF3 files. |
|---|
| 26 | * |
|---|
| 27 | * @author Kim Rutherford |
|---|
| 28 | */ |
|---|
| 29 | |
|---|
| 30 | public class FlyRegGFF3RecordHandler extends GFF3RecordHandler |
|---|
| 31 | { |
|---|
| 32 | private final Map<String, Item> pubmedIdMap = new HashMap<String, Item>(); |
|---|
| 33 | private final Map<String, Item> geneIdMap = new HashMap<String, Item>(); |
|---|
| 34 | protected IdResolverFactory resolverFactory; |
|---|
| 35 | private static final String TAXON_ID = "7227"; |
|---|
| 36 | |
|---|
| 37 | protected static final Logger LOG = Logger.getLogger(FlyRegGFF3RecordHandler.class); |
|---|
| 38 | |
|---|
| 39 | /** |
|---|
| 40 | * Create a new FlyRegGFF3RecordHandler for the given target model. |
|---|
| 41 | * @param tgtModel the model for which items will be created |
|---|
| 42 | */ |
|---|
| 43 | public FlyRegGFF3RecordHandler(Model tgtModel) { |
|---|
| 44 | super(tgtModel); |
|---|
| 45 | // only construct factory here so can be replaced by mock factory in tests |
|---|
| 46 | resolverFactory = new FlyBaseIdResolverFactory("gene"); |
|---|
| 47 | } |
|---|
| 48 | |
|---|
| 49 | /** |
|---|
| 50 | * {@inheritDoc} |
|---|
| 51 | */ |
|---|
| 52 | @Override |
|---|
| 53 | public void process(GFF3Record record) { |
|---|
| 54 | getFeature().setClassName("TFBindingSite"); |
|---|
| 55 | |
|---|
| 56 | Item bindingSite = getFeature(); |
|---|
| 57 | |
|---|
| 58 | String name = record.getId(); |
|---|
| 59 | |
|---|
| 60 | Pattern p = Pattern.compile(".*:REDFLY:(.*)"); |
|---|
| 61 | Matcher m = p.matcher(name); |
|---|
| 62 | |
|---|
| 63 | if (!m.matches()) { |
|---|
| 64 | LOG.warn("Binding site identifier didn't match pattern: " + name); |
|---|
| 65 | bindingSite.setAttribute("primaryIdentifier", name); |
|---|
| 66 | } else { |
|---|
| 67 | bindingSite.setAttribute("primaryIdentifier", m.group(1)); |
|---|
| 68 | } |
|---|
| 69 | bindingSite.setAttribute("name", name); |
|---|
| 70 | |
|---|
| 71 | if (record.getAttributes().containsKey("Evidence")) { |
|---|
| 72 | List<String> evidenceList = record.getAttributes().get("Evidence"); |
|---|
| 73 | String elementEvidence = evidenceList.get(0); |
|---|
| 74 | bindingSite.setAttribute("evidenceMethod", elementEvidence); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | List<String> dbxrefs = record.getAttributes().get("Dbxref"); |
|---|
| 78 | |
|---|
| 79 | String redflyID = null; |
|---|
| 80 | String pmid = null; |
|---|
| 81 | |
|---|
| 82 | for (String dbxref: dbxrefs) { |
|---|
| 83 | if (dbxref.startsWith("PMID:")) { |
|---|
| 84 | pmid = dbxref.substring(5); |
|---|
| 85 | } else { |
|---|
| 86 | if (dbxref.startsWith("REDfly:")) { |
|---|
| 87 | redflyID = dbxref.substring(7); |
|---|
| 88 | } |
|---|
| 89 | } |
|---|
| 90 | } |
|---|
| 91 | |
|---|
| 92 | if (pmid == null) { |
|---|
| 93 | throw new RuntimeException("no pubmed id for: " + bindingSite); |
|---|
| 94 | } |
|---|
| 95 | |
|---|
| 96 | if (redflyID == null) { |
|---|
| 97 | throw new RuntimeException("no REDfly: id for: " + bindingSite); |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | bindingSite.setAttribute("secondaryIdentifier", redflyID); |
|---|
| 101 | Item pubmedItem; |
|---|
| 102 | if (pubmedIdMap.containsKey(pmid)) { |
|---|
| 103 | pubmedItem = pubmedIdMap.get(pmid); |
|---|
| 104 | } else { |
|---|
| 105 | pubmedItem = converter.createItem("Publication"); |
|---|
| 106 | pubmedIdMap.put(pmid, pubmedItem); |
|---|
| 107 | pubmedItem.setAttribute("pubMedId", pmid); |
|---|
| 108 | addItem(pubmedItem); |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | addPublication(pubmedItem); |
|---|
| 112 | |
|---|
| 113 | String factorGeneName = record.getAttributes().get("Factor").get(0); |
|---|
| 114 | if (!("unknown").equals(factorGeneName.toLowerCase()) |
|---|
| 115 | && !("unspecified").equals(factorGeneName.toLowerCase())) { |
|---|
| 116 | Item gene = getGene(factorGeneName); |
|---|
| 117 | if (gene != null) { |
|---|
| 118 | bindingSite.setReference("factor", gene.getIdentifier()); |
|---|
| 119 | } |
|---|
| 120 | } |
|---|
| 121 | |
|---|
| 122 | String targetGeneName = record.getAttributes().get("Target").get(0); |
|---|
| 123 | |
|---|
| 124 | if (!("unknown").equals(targetGeneName.toLowerCase()) |
|---|
| 125 | && !("unspecified").equals(targetGeneName.toLowerCase())) { |
|---|
| 126 | Item gene = getGene(targetGeneName); |
|---|
| 127 | if (gene != null) { |
|---|
| 128 | bindingSite.setReference("gene", gene.getIdentifier()); |
|---|
| 129 | } |
|---|
| 130 | } |
|---|
| 131 | } |
|---|
| 132 | |
|---|
| 133 | private Item getGene(String symbol) { |
|---|
| 134 | IdResolver resolver = resolverFactory.getIdResolver(); |
|---|
| 135 | int resCount = resolver.countResolutions(TAXON_ID, symbol); |
|---|
| 136 | if (resCount != 1) { |
|---|
| 137 | LOG.info("RESOLVER: failed to resolve gene to one identifier, ignoring gene: " |
|---|
| 138 | + symbol + " count: " + resCount + " FBgn: " |
|---|
| 139 | + resolver.resolveId(TAXON_ID, symbol)); |
|---|
| 140 | return null; |
|---|
| 141 | } |
|---|
| 142 | String primaryIdentifier = resolver.resolveId(TAXON_ID, symbol).iterator().next(); |
|---|
| 143 | Item gene = geneIdMap.get(primaryIdentifier); |
|---|
| 144 | if (gene == null) { |
|---|
| 145 | gene = converter.createItem("Gene"); |
|---|
| 146 | geneIdMap.put(primaryIdentifier, gene); |
|---|
| 147 | gene.setAttribute("primaryIdentifier", primaryIdentifier); |
|---|
| 148 | gene.setReference("organism", getOrganism()); |
|---|
| 149 | addItem(gene); |
|---|
| 150 | } |
|---|
| 151 | return gene; |
|---|
| 152 | } |
|---|
| 153 | } |
|---|