diff --git a/orthopairs/cov1_cov2_mapping.tsv b/orthopairs/cov1_cov2_mapping.tsv new file mode 100644 index 00000000..c9d0a36d --- /dev/null +++ b/orthopairs/cov1_cov2_mapping.tsv @@ -0,0 +1,59 @@ +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P59594 P0DTC2 +P59632 P0DTC3 +P59596 P0DTC5 +P59637 P0DTC4 +P59634 P0DTC6 +P59635 P0DTC7 +Q7TFA1 P0DTD8 +Q7TFA0 P0DTC8 +P59595 P0DTC9 +P59636 P0DTD2 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 diff --git a/orthopairs/cov2_coordinate_mapping.tsv b/orthopairs/cov2_coordinate_mapping.tsv new file mode 100644 index 00000000..2431a3e6 --- /dev/null +++ b/orthopairs/cov2_coordinate_mapping.tsv @@ -0,0 +1,59 @@ +full genome NC_004718.3 1 29751 MN908947.3 1 29903 +mRNA1 NC_004718.3 1 21485 MN908947.3 1 21555 +mRNA2 NC_004718.3 21492 29751 MN908947.3 21563 29903 +mRNA3 NC_004718.3 25268 29751 MN908947.3 25393 29903 +mRNA4 NC_004718.3 26117 29751 MN908947.3 26245 29903 +mRNA5 NC_004718.3 26398 29751 MN908947.3 26523 29903 +mRNA6 NC_004718.3 26913 29751 MN908947.3 27202 29903 +mRNA7 NC_004718.3 27273 29751 MN908947.3 27394 29903 +mRNA8 NC_004718.3 27779 29751 MN908947.3 27894 29903 +mRNA9 NC_004718.3 28120 29751 MN908947.3 28274 29903 +full minus strand NC_004718.3 -29751 -1 MN908947.3 -29903 -1 +minus mRNA2 NC_004718.3 -29751 -21492 MN908947.3 -29903 -21563 +minus mRNA3 NC_004718.3 -29751 -25268 MN908947.3 -29903 -25393 +minus mRNA4 NC_004718.3 -29751 -26117 MN908947.3 -29903 -26245 +minus mRNA5 NC_004718.3 -29751 -26398 MN908947.3 -29903 -26523 +minus mRNA6 NC_004718.3 -29751 -26913 MN908947.3 -29903 -27202 +minus mRNA7 NC_004718.3 -29751 -27273 MN908947.3 -29903 -27394 +minus mRNA8 NC_004718.3 -29751 -27779 MN908947.3 -29903 -27894 +minus mRNA9 NC_004718.3 -29751 -28120 MN908947.3 -29903 -28274 +pp1a P0C6U8 P0DTC1 +pp1a-nsp1 P0C6U8 1 180 P0DTC1 1 180 +pp1a-nsp2 P0C6U8 181 818 P0DTC1 181 818 +pp1a-nsp3 P0C6U8 819 2740 P0DTC1 819 2763 +pp1a-nsp4 P0C6U8 2741 3240 P0DTC1 2764 3263 +pp1a-nsp5 P0C6U8 3241 3546 P0DTC1 3264 3569 +pp1a-nsp6 P0C6U8 3547 3836 P0DTC1 3570 3859 +pp1a-nsp7 P0C6U8 3837 3919 P0DTC1 3860 3942 +pp1a-nsp8 P0C6U8 3920 4117 P0DTC1 3943 4140 +pp1a-nsp9 P0C6U8 4118 4230 P0DTC1 4141 4253 +pp1a-nsp10 P0C6U8 4231 4369 P0DTC1 4254 4392 +pp1a-nsp11 P0C6U8 4370 4382 P0DTC1 4393 4405 +pp1a-nsp3-4 P0C6U8 819 3240 P0DTC1 819 3263 +pp1a-nsp6-11 P0C6U8 3547 4382 P0DTC1 3570 4382 +rep P0C6X7 P0DTD1 +pp1ab-nsp1 P0C6X7 1 180 P0DTD1 1 180 +pp1ab-nsp2 P0C6X7 181 818 P0DTD1 181 818 +pp1ab-nsp3 P0C6X7 819 2740 P0DTD1 819 2763 +pp1ab-nsp4 P0C6X7 2741 3240 P0DTD1 2764 3263 +pp1ab-nsp5 P0C6X7 3241 3546 P0DTD1 3264 3569 +pp1ab-nsp6 P0C6X7 3547 3836 P0DTD1 3570 3859 +pp1ab-nsp7 P0C6X7 3837 3919 P0DTD1 3860 3942 +pp1ab-nsp8 P0C6X7 3920 4117 P0DTD1 3943 4140 +pp1ab-nsp9 P0C6X7 4118 4230 P0DTD1 4141 4253 +pp1ab-nsp10 P0C6X7 4231 4369 P0DTD1 4254 4392 +pp1ab-nsp12 P0C6X7 4370 5301 P0DTD1 4393 5324 +pp1ab-nsp13 P0C6X7 5302 5902 P0DTD1 5325 5925 +pp1ab-nsp14 P0C6X7 5903 6429 P0DTD1 5926 6452 +pp1ab-nsp15 P0C6X7 6430 6775 P0DTD1 6453 6798 +pp1ab-nsp15 P0C6X7 6776 7073 P0DTD1 6799 7096 +S P59594 P0DTC2 +3a P59632 P0DTC3 +M P59596 P0DTC5 +E P59637 P0DTC4 +6 P59634 P0DTC6 +7a P59635 P0DTC7 +7b Q7TFA1 P0DTD8 +8a Q7TFA0 P0DTC8 +N P59595 P0DTC9 +9b P59636 P0DTD2 diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 685a2b9f..8011f417 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -30,6 +30,7 @@ public class EWASInferrer { private static GKInstance enspDbInst; private static GKInstance alternateDbInst; private static GKInstance uniprotDbInst; + private static GKInstance ncbiNucleotideInst; private static GKInstance speciesInst; private static Map homologueMappings = new HashMap<>(); private static Map> ensgMappings = new HashMap<>(); @@ -38,6 +39,8 @@ public class EWASInferrer { private static Map residueIdenticals = new HashMap<>(); private static Map> wormbaseMappings = new HashMap<>(); private static Map geneNameMappings = new HashMap<>(); + private static Map> coordinateMappings = new HashMap<>(); + private static Map> modifiedResidueMappings = new HashMap<>(); // Creates an array of inferred EWAS instances from the homologue mappings file (hsap_species_mapping.txt). @SuppressWarnings("unchecked") @@ -55,47 +58,67 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr String homologueSource = homologue.contains(":") ? homologue.split(":")[0] : ""; String homologueId = homologue.contains(":") ? homologue.split(":")[1] : homologue; - if (checkValidSpeciesProtein(homologueId)) { +// if (checkValidSpeciesProtein(homologueId)) { GKInstance infReferenceGeneProductInst; - if (referenceGeneProductIdenticals.get(homologueId) == null) { +// if (referenceGeneProductIdenticals.get(homologueId) == null) { logger.info("Creating ReferenceGeneProduct for " + homologue); - infReferenceGeneProductInst = InstanceUtilities.createNewInferredGKInstance((GKInstance) ewasInst.getAttributeValue(referenceEntity)); + GKInstance referenceEntityInst = (GKInstance) ewasInst.getAttributeValue(referenceEntity); + infReferenceGeneProductInst = InstanceUtilities.createNewInferredGKInstance(referenceEntityInst); infReferenceGeneProductInst.addAttributeValue(identifier, homologueId); // Reference DB can differ between homologue mappings, but can be differentiated by the 'homologueSource' found in each mapping. // With PANTHER data, the Protein IDs are exclusively UniProt - GKInstance referenceDatabaseInst = homologueSource.equals("ENSP") ? enspDbInst : uniprotDbInst; + GKInstance rgpInst = (GKInstance) ewasInst.getAttributeValue(referenceEntity); + GKInstance refDBInst = (GKInstance) rgpInst.getAttributeValue(referenceDatabase); + String refDbName = refDBInst.getAttributeValue(name).toString(); + GKInstance referenceDatabaseInst = refDbName.contains("NCBI") ? ncbiNucleotideInst : uniprotDbInst; infReferenceGeneProductInst.addAttributeValue(referenceDatabase, referenceDatabaseInst); // Creates ReferenceDNASequence instance from ReferenceEntity - List inferredReferenceDNAInstances = createReferenceDNASequence(homologueId); - infReferenceGeneProductInst.addAttributeValue(referenceGene, inferredReferenceDNAInstances); +// List inferredReferenceDNAInstances = createReferenceDNASequence(homologueId); +// infReferenceGeneProductInst.addAttributeValue(referenceGene, inferredReferenceDNAInstances); infReferenceGeneProductInst.addAttributeValue(species, speciesInst); - String referenceGeneProductSource = homologueSource.equals("ENSP") ? "ENSEMBL:" : "UniProt:"; - infReferenceGeneProductInst.setAttributeValue(_displayName, referenceGeneProductSource + homologueId); + String referenceGeneProductSource = refDbName.contains("NCBI") ? "NCBI Nucleotide:" : "UniProt:"; + infReferenceGeneProductInst.setAttributeValue(_displayName, referenceGeneProductSource + homologueId + " " + referenceEntityInst.getAttributeValue(name)); + infReferenceGeneProductInst.setAttributeValue(name, referenceEntityInst.getAttributeValue(name)); + infReferenceGeneProductInst.setAttributeValue(geneName, referenceEntityInst.getAttributeValue(geneName)); + if (referenceEntityInst.getAttributeValue(keyword) != null) { + infReferenceGeneProductInst.setAttributeValue(keyword, referenceEntityInst.getAttributeValuesList(keyword)); + } // GeneName value comes from UniProt's identifier mapping service. if (geneNameMappings.containsKey(homologueId)) { infReferenceGeneProductInst.addAttributeValue(geneName, geneNameMappings.get(homologueId)); } - + if (referenceEntityInst.getAttributeValue(comment) != null) { + infReferenceGeneProductInst.setAttributeValue(comment, referenceEntityInst.getAttributeValuesList(comment)); + } logger.info("ReferenceGeneProduct instance created"); infReferenceGeneProductInst = InstanceUtilities.checkForIdenticalInstances(infReferenceGeneProductInst, null); referenceGeneProductIdenticals.put(homologueId, infReferenceGeneProductInst); - } else { - logger.info("Orthologous ReferenceGeneProduct already exists"); - infReferenceGeneProductInst = referenceGeneProductIdenticals.get(homologueId); - } +// } else { +// logger.info("Orthologous ReferenceGeneProduct already exists"); +// infReferenceGeneProductInst = referenceGeneProductIdenticals.get(homologueId); +// } // Creating inferred EWAS GKInstance infEWASInst = InstanceUtilities.createNewInferredGKInstance(ewasInst); infEWASInst.addAttributeValue(referenceEntity, infReferenceGeneProductInst); +// infEWASInst.addAttributeValue(name, ewasInst.getAttributeValue(name)); // Method for adding start/end coordinates. It is convoluted due to a quirk with assigning the name differently based on coordinate value (see infer_events.pl lines 1190-1192). // The name of the entity needs to be at the front of the 'name' array if the coordinate is over 1, and rearranging arrays in Java for this was a bit tricky. + + String coordKey = getCoordKey(ewasInst); for (int startCoord : (Collection) ewasInst.getAttributeValuesList(startCoordinate)) { + if (coordinateMappings.get(coordKey) != null) { + startCoord = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); + } infEWASInst.addAttributeValue(startCoordinate, startCoord); } for (int endCoord : (Collection) ewasInst.getAttributeValuesList(endCoordinate)) { + if (coordinateMappings.get(coordKey) != null) { + endCoord = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); + } infEWASInst.addAttributeValue(endCoordinate, endCoord); } if (infEWASInst.getAttributeValue(startCoordinate) != null && (int) infEWASInst.getAttributeValue(startCoordinate) > 1 || infEWASInst.getAttributeValue(endCoordinate) != null && (int) infEWASInst.getAttributeValue(endCoordinate) > 1) { @@ -103,6 +126,9 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infEWASInst.addAttributeValue(name, infEWASInstNames.get(0)); infEWASInst.addAttributeValue(name, homologueId); } else { + // Added for COV-1-to-COV-2 projections + infEWASInst.addAttributeValue(name, ewasInst.getAttributeValue(name)); + // infEWASInst.addAttributeValue(name, homologueId); } @@ -125,11 +151,31 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr boolean phosFlag = true; for (GKInstance modifiedResidueInst : (Collection) ewasInst.getAttributeValuesList(hasModifiedResidue)) { logger.info("Inferring ModifiedResidue: " + modifiedResidueInst); + + if (modifiedResidueMappings.get(ewasInst) != null) { + modifiedResidueMappings.get(ewasInst).add(modifiedResidueInst); + } else { + List singleList = new ArrayList<>(); + singleList.add(modifiedResidueInst); + modifiedResidueMappings.put(ewasInst, singleList); + } + + String infModifiedResidueDisplayName = ""; GKInstance infModifiedResidueInst = InstanceUtilities.createNewInferredGKInstance(modifiedResidueInst); infModifiedResidueInst.addAttributeValue(referenceSequence, infReferenceGeneProductInst); infModifiedResidueDisplayName += infReferenceGeneProductInst.getDisplayName(); for (int coordinateValue : (Collection) modifiedResidueInst.getAttributeValuesList(coordinate)) { + if (coordinateMappings.get(coordKey) != null) { + String ewasStartCoord = ewasInst.getAttributeValue(startCoordinate).toString(); + String ewasEndCoord = ewasInst.getAttributeValue(endCoordinate).toString(); + if (ewasStartCoord.equals(String.valueOf(coordinateValue))) { + coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); + } + if (ewasEndCoord.equals(String.valueOf(coordinateValue))) { + coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); + } + } infModifiedResidueInst.addAttributeValue(coordinate, coordinateValue); } if (infModifiedResidueInst.getSchemClass().isValidAttribute(modification)) { @@ -140,34 +186,42 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(modification)).getDisplayName(); } } - // Update name depending on the presence of 'phospho' in the Psimod's name attribute - GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); - if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { - String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); - List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); - String originalName = ewasNames.remove(0); - infEWASInst.setAttributeValue(name, phosphoName); - // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. - // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. - // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) - infEWASInst.addAttributeValue(name, ewasNames); - String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; - infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); - // This flag ensures the 'phospho-' is only prepended once. - logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); - phosFlag = false; - } - for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { - infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); + if (modifiedResidueInst.getSchemClass().isValidAttribute(psiMod)) { + // Update name depending on the presence of 'phospho' in the Psimod's name attribute + GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); + if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { + String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); + List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); + String originalName = ewasNames.remove(0); + infEWASInst.setAttributeValue(name, phosphoName); + // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. + // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. + // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) + infEWASInst.addAttributeValue(name, ewasNames); + String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; + infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); + // This flag ensures the 'phospho-' is only prepended once. + logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); + phosFlag = false; + } + for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { + infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); + } + if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { + infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); + } } - if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { - infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); + + if (infModifiedResidueInst.getSchemClass().isa("ModifiedNucleotide")) { + infModifiedResidueDisplayName = createModifiedNucleotideDisplayName(modifiedResidueInst, infModifiedResidueInst); } - infModifiedResidueInst.setAttributeValue(_displayName, modifiedResidueInst.getAttributeValue(_displayName)); + infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. if (modifiedResidueInst.getAttributeValue(coordinate) != null) { - String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString() + " (in Homo sapiens)"; - infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); + // Commented out during COV-1 to COV-2 projection +// String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString(); // + " (in Homo sapiens)"; +// infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); + // } else { if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { @@ -188,14 +242,16 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr } } } + String modifiedResidueDisplayName = "[INFERRED] " + infModifiedResidueInst.getDisplayName(); + infModifiedResidueInst.setDisplayName(modifiedResidueDisplayName); // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infModifiedResidueInst.getSchemClass(), infModifiedResidueInst); - if (residueIdenticals.get(cacheKey) != null) { - infModifiedResidueInst = residueIdenticals.get(cacheKey); - } else { +// if (residueIdenticals.get(cacheKey) != null) { +// infModifiedResidueInst = residueIdenticals.get(cacheKey); +// } else { infModifiedResidueInst = InstanceUtilities.checkForIdenticalInstances(infModifiedResidueInst, null); - residueIdenticals.put(cacheKey, infModifiedResidueInst); - } +// residueIdenticals.put(cacheKey, infModifiedResidueInst); +// } infModifiedResidueInstances.add(infModifiedResidueInst); logger.info("Successfully inferred ModifiedResidue"); } @@ -215,9 +271,9 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr dba.updateInstanceAttribute(ewasInst, inferredTo); logger.info("Successfully inferred EWAS instance for " + homologue + " homologue"); infEWASInstances.add(infEWASInst); - } else { - logger.info("Gene ID corresponding to " + homologue + " not found in gene_protein_mapping file -- skipping EWAS inference"); - } +// } else { +// logger.info("Gene ID corresponding to " + homologue + " not found in gene_protein_mapping file -- skipping EWAS inference"); +// } } } else { logger.info("Could not infer EWAS, unable to find homologue for " + referenceEntityId); @@ -226,6 +282,24 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr return infEWASInstances; } + private static String createModifiedNucleotideDisplayName(GKInstance modifiedResidueInst, GKInstance infModifiedResidueInst) throws Exception { + String coordinateString = infModifiedResidueInst.getAttributeValue(coordinate).toString() + " "; + GKInstance modificationInst = (GKInstance) infModifiedResidueInst.getAttributeValue(modification); + String modificationName = modificationInst.getAttributeValue(name).toString() + " "; + GKInstance refSeqInst = (GKInstance) infModifiedResidueInst.getAttributeValue(referenceSequence); + String refSeqIdentifier = refSeqInst.getAttributeValue(identifier).toString() + " "; + String refSeqName = refSeqInst.getAttributeValue(name).toString(); + return coordinateString + modificationName + refSeqIdentifier + refSeqName; + } + + private static String getCoordKey(GKInstance ewasInst) throws Exception { + GKInstance rgpInst = (GKInstance) ewasInst.getAttributeValue(referenceEntity); + String rgpIdentifier = rgpInst.getAttributeValue(identifier).toString(); + String startCoord = ewasInst.getAttributeValue(startCoordinate).toString(); + String endCoord = ewasInst.getAttributeValue(endCoordinate).toString(); + return rgpIdentifier + startCoord + endCoord; + } + /** * Retrieve all Wormbase gene names that match the homologue Id. * @param homologueId -- String homologue ID value from Orthopair file. @@ -339,10 +413,12 @@ public static void readENSGMappingFile(String toSpecies, String pathToOrthopairs // Fetches Uniprot DB instance @SuppressWarnings("unchecked") - public static void fetchAndSetUniprotDbInstance() throws Exception + public static void fetchAndSetDbInstances() throws Exception { Collection uniprotDbInstances = (Collection) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "UniProt"); + Collection ncbiNucleotideInstances = (Collection) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "NCBI Nucleotide"); uniprotDbInst = uniprotDbInstances.iterator().next(); + ncbiNucleotideInst = ncbiNucleotideInstances.iterator().next(); } // Creates instance pertaining to the species Ensembl Protein DB @@ -412,4 +488,41 @@ public static void setWormbaseMappings(Map> wormbaseMapping public static void setGeneNameMappingFile(Map geneNameMappingsCopy) { geneNameMappings = geneNameMappingsCopy; } + + public static void readAndSetCoordinateMappingFile(String targetSpecies) throws IOException { + String mappingFileName = targetSpecies + "_coordinate_mapping.tsv"; + String mappingFilePath = Paths.get("orthopairs", mappingFileName).toString(); + logger.info("Reading in " + mappingFilePath); + FileReader fr = new FileReader(mappingFilePath); + BufferedReader br = new BufferedReader(fr); + + String currentLine; + Set coords = new HashSet<>(); + while ((currentLine = br.readLine()) != null) + { + String[] tabSplit = currentLine.split("\t"); + String name = tabSplit[0]; + String cov1Identifier = tabSplit[1]; + String startCoordCov1 = tabSplit[2] != null ? tabSplit[2] : ""; + String endCoordCov1 = tabSplit[3] != null ? tabSplit[3] : ""; + if (!startCoordCov1.isEmpty() && !endCoordCov1.isEmpty()) { + + String cov1Joined = cov1Identifier + startCoordCov1 + endCoordCov1; + + String startCoordCov2 = tabSplit[5] != null ? tabSplit[5] : ""; + String endCoordCov2 = tabSplit[6] != null ? tabSplit[6] : ""; + + Map coordMap = new HashMap<>(); + coordMap.put("start", startCoordCov2); + coordMap.put("end", endCoordCov2); + coordinateMappings.put(cov1Joined, coordMap); + } + } + br.close(); + fr.close(); + } + + public static Map> getModifiedResiduesMapping() { + return modifiedResidueMappings; + } } diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index a2c25ad0..47748614 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -51,9 +51,10 @@ public class EventsInferrer private static List manualHumanEvents = new ArrayList<>(); private static StableIdentifierGenerator stableIdentifierGenerator; private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator; + private static Long sarsCOV1InfectionsPathwayDbId = 9678108L; @SuppressWarnings("unchecked") - public static void inferEvents(Properties props, String species) throws Exception + public static void inferEvents(Properties props, String referenceSpecies, String targetSpecies) throws Exception { logger.info("Preparing DB Adaptor and setting project variables"); // Set up DB adaptor using config.properties file @@ -65,6 +66,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio int port = Integer.valueOf(props.getProperty("release.database.port")); dbAdaptor = new MySQLAdaptor(host, database, username, password, port); + System.out.println(dbAdaptor.fetchMaxDbId()); dbAdaptorPrev = new MySQLAdaptor(host, prevDatabase, username, password, port); if (dbAdaptor == null || dbAdaptorPrev == null) { logger.fatal("Null MySQLAdaptor, terminating orthoinference"); @@ -91,68 +93,89 @@ public static void inferEvents(Properties props, String species) throws Exceptio JSONObject jsonObject = (JSONObject) obj; // Parse Species information (found in Species.json config file) - JSONObject speciesObject = (JSONObject) jsonObject.get(species); - JSONArray speciesNames = (JSONArray) speciesObject.get("name"); - String speciesName = (String) speciesNames.get(0); - logger.info("Beginning orthoinference of " + speciesName); + JSONObject targetSpeciesObject = (JSONObject) jsonObject.get(targetSpecies); + JSONArray targetSpeciesNames = (JSONArray) targetSpeciesObject.get("name"); + String targetSpeciesName = (String) targetSpeciesNames.get(0); - JSONObject refDb = (JSONObject) speciesObject.get("refdb"); - String refDbUrl = (String) refDb.get("url"); - String refDbProteinUrl = (String) refDb.get("access"); - String refDbGeneUrl = (String) refDb.get("ensg_access"); + JSONObject referenceSpeciesObject = (JSONObject) jsonObject.get(referenceSpecies); + JSONArray referenceSpeciesNames = (JSONArray) referenceSpeciesObject.get("name"); + String referenceSpeciesName = (String) referenceSpeciesNames.get(0); + + logger.info("Beginning orthoinference of " + targetSpeciesName); + JSONObject targetSpeciesRefDb = (JSONObject) targetSpeciesObject.get("refdb"); +// String refDbUrl = (String) refDb.get("url"); +// String refDbProteinUrl = (String) refDb.get("access"); +// String refDbGeneUrl = (String) refDb.get("ensg_access"); // Creates two files that a) list reactions that are eligible for inference and b) those that are successfully inferred - String eligibleFilename = "eligible_" + species + "_75.txt"; - String inferredFilename = "inferred_" + species + "_75.txt"; + String eligibleFilename = "eligible_" + targetSpecies + "_75.txt"; + String inferredFilename = "inferred_" + targetSpecies + "_75.txt"; createNewFile(eligibleFilename); createNewFile(inferredFilename); ReactionInferrer.setEligibleFilename(eligibleFilename); ReactionInferrer.setInferredFilename(inferredFilename); - stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) speciesObject.get("abbreviation")); + stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) targetSpeciesObject.get("abbreviation")); // Set static variables (DB/Species Instances, mapping files) that will be repeatedly used setInstanceEdits(personId); try { - readAndSetHomologueMappingFile(species, "hsap", pathToOrthopairs); - readAndSetGeneNameMappingFile(species, pathToOrthopairs); + readAndSetHomologueMappingFile(targetSpecies, referenceSpecies, pathToOrthopairs); +// readAndSetGeneNameMappingFile(targetSpecies, pathToOrthopairs); } catch (Exception e) { - logger.fatal("Unable to locate " + speciesName +" mapping file: hsap_" + species + "_mapping.tsv. Orthology prediction not possible."); + logger.fatal("Unable to locate " + targetSpeciesName +" mapping file: hsap_" + targetSpecies + "_mapping.tsv. Orthology prediction not possible."); e.printStackTrace(); System.exit(1); } - EWASInferrer.readENSGMappingFile(species, pathToOrthopairs); - EWASInferrer.fetchAndSetUniprotDbInstance(); - EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); - EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); + try { + EWASInferrer.readAndSetCoordinateMappingFile(targetSpecies); + } catch (Exception e) { + logger.fatal("Unable to locate coordinate mapping file"); + e.printStackTrace(); + System.exit(1); + } +// EWASInferrer.readENSGMappingFile(targetSpecies, pathToOrthopairs); + EWASInferrer.fetchAndSetDbInstances(); +// EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); +// EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); - JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb"); + JSONObject altRefDbJSON = (JSONObject) targetSpeciesObject.get("alt_refdb"); if (altRefDbJSON != null) { - logger.info("Alternate DB exists for " + speciesName); + logger.info("Alternate DB exists for " + targetSpeciesName); EWASInferrer.createAlternateReferenceDBInstance(altRefDbJSON); } else { EWASInferrer.setAltRefDbToFalse(); } - createAndSetSpeciesInstance(speciesName); - setSummationInstance(); + createAndSetSpeciesInstance(targetSpeciesName); +// setSummationInstance(); setEvidenceTypeInstance(); - OrthologousEntityGenerator.setComplexSummationInstance(); + InstanceUtilities.setDiseaseInstance(dbAdaptor.fetchInstance(9683915L)); +// OrthologousEntityGenerator.setComplexSummationInstance(); /** - * Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the species. + * Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the targetSpecies. */ - // Gets DB instance of source species (human) - Collection sourceSpeciesInst = (Collection) dbAdaptor.fetchInstanceByAttribute("Species", "name", "=", "Homo sapiens"); - if (sourceSpeciesInst.isEmpty()) + // Gets DB instance of source targetSpecies (human) + Collection referenceSpeciesInst = (Collection) dbAdaptor.fetchInstanceByAttribute("Species", "name", "=", referenceSpeciesName); + if (referenceSpeciesInst.isEmpty()) { - logger.fatal("Could not find Species instance for Homo sapiens"); + logger.fatal("Could not find Species instance for " + referenceSpeciesName); System.exit(1); } - long humanInstanceDbId = sourceSpeciesInst.iterator().next().getDBID(); - orthologousPathwayDiagramGenerator = new OrthologousPathwayDiagramGenerator(dbAdaptor, dbAdaptorPrev, speciesInst, personId, humanInstanceDbId); - // Gets Reaction instances of source species (human) - Collection reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute("ReactionlikeEvent", "species", "=", humanInstanceDbId); - + long referenceSpeciesInstanceDbId = referenceSpeciesInst.iterator().next().getDBID(); + orthologousPathwayDiagramGenerator = new OrthologousPathwayDiagramGenerator(dbAdaptor, dbAdaptorPrev, speciesInst, personId, referenceSpeciesInstanceDbId); + // Gets Reaction instances of source targetSpecies (human) + Collection reactionInstances = new ArrayList<>(); // + if (referenceSpeciesName.equals("Human SARS coronavirus")) { + GKInstance covPathwayInst = dbAdaptor.fetchInstance(sarsCOV1InfectionsPathwayDbId); + Set uniqueReactionInstances = new HashSet<>(); + for (GKInstance hasEventInst : (Collection) covPathwayInst.getAttributeValuesList(hasEvent)) { + uniqueReactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst)); + } + reactionInstances.addAll(uniqueReactionInstances); + } else { + reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute(ReactionlikeEvent, species, "=", referenceSpeciesInstanceDbId); + } List dbids = new ArrayList<>(); Map reactionMap = new HashMap<>(); for (GKInstance reactionInst : reactionInstances) { @@ -160,13 +183,12 @@ public static void inferEvents(Properties props, String species) throws Exceptio reactionMap.put(reactionInst.getDBID(), reactionInst); } Collections.sort(dbids); - - logger.info(sourceSpeciesInst.iterator().next().getDisplayName() + " ReactionlikeEvent instances: " + dbids.size()); + logger.info(referenceSpeciesInst.iterator().next().getDisplayName() + " ReactionlikeEvent instances: " + dbids.size()); for (Long dbid : dbids) { GKInstance reactionInst = reactionMap.get(dbid); logger.info("Attempting RlE inference: " + reactionInst); - // Check if the current Reaction already exists for this species, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute. + // Check if the current Reaction already exists for this targetSpecies, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute. // Adds to manualHumanEvents array if it passes conditions. This code block allows you to re-run the code without re-inferring instances. List previouslyInferredInstances = new ArrayList(); previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, orthologousEvent, previouslyInferredInstances); @@ -185,7 +207,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio continue; } - // An inferred ReactionlikeEvent doesn't already exist for this species, and an orthologous inference will be attempted. + // An inferred ReactionlikeEvent doesn't already exist for this targetSpecies, and an orthologous inference will be attempted. try { ReactionInferrer.inferReaction(reactionInst); logger.info("Successfully inferred " + reactionInst); @@ -194,11 +216,58 @@ public static void inferEvents(Properties props, String species) throws Exceptio System.exit(1); } } + + // Outputs a file that contains all EWAS' that were inferred and their contained hasModifiedResidues. +// Map> modifiedResiduesMapping = EWASInferrer.getModifiedResiduesMapping(); +// String header = "COV-1 EWAS\tCOV-1 ModifiedResidues\n"; +// Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), header.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); +// for (GKInstance ewasInst : modifiedResiduesMapping.keySet()) { +// String outputLine = ewasInst + "\t"; +// List modifiedResidues = modifiedResiduesMapping.get(ewasInst); +// int count = 0; +// for (GKInstance modifiedResidueInst : modifiedResidues) { +// if (count != 0) { +// outputLine += "|" + modifiedResidueInst; +// } else { +// outputLine += modifiedResidueInst; +// count++; +// } +// } +// outputLine += "\n"; +// Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), outputLine.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); +// } + PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); - outputReport(species); - logger.info("Finished orthoinference of " + speciesName); + outputReport(targetSpecies); + logger.info("Finished orthoinference of " + targetSpeciesName); + + System.out.println(dbAdaptor.fetchMaxDbId()); + + // Find inferred instances that are referred to by multiple curated instances + // Likely due to lack of distinguishing 'defining' attributes. +// Collection inferredInstances = dbAdaptor.fetchInstancesByClass(Event); +// inferredInstances.addAll(dbAdaptor.fetchInstancesByClass(PhysicalEntity)); +// for (GKInstance inferredInst : inferredInstances) { +// GKInstance createdInst = (GKInstance) inferredInst.getAttributeValue(created); +// Collection inferredFromInstances = inferredInst.getAttributeValuesList(inferredFrom); +// if (createdInst != null && createdInst.getDisplayName().contains("Justin") && inferredFromInstances.size() > 1) { +// System.out.println(inferredInst); +// } +// } + } + + private static Set getReactionsInEventHierarchy(GKInstance eventInst) throws Exception { + Set reactionInstances = new HashSet<>(); + if (eventInst.getSchemClass().isa(ReactionlikeEvent)) { + reactionInstances.add(eventInst); + } else { + for (GKInstance hasEventInst : (Collection) eventInst.getAttributeValuesList(hasEvent)) { + reactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst)); + } + } + return reactionInstances; } /** diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 2774322d..f3b2280c 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -1,19 +1,14 @@ package org.reactome.orthoinference; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; + import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.GKSchemaAttribute; -import org.gk.schema.GKSchemaClass; -import org.gk.schema.SchemaClass; +import org.gk.schema.*; // GenerateInstance is meant to act as a catch-all for functions that are instance-oriented, such as creating, mocking, or identical-checking. @@ -23,8 +18,10 @@ public class InstanceUtilities { private static MySQLAdaptor dba; private static GKInstance speciesInst; private static GKInstance instanceEditInst; + private static GKInstance diseaseInst; private static Map mockedIdenticals = new HashMap<>(); - + private static String inferredEventsReactomeURL = "https://reactome.org/documentation/inferred-events"; + // Creates new instance that will be inferred based on the incoming instances class public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInferred) throws Exception { @@ -43,7 +40,7 @@ public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInfe for (Object compartmentInst : instanceToBeInferred.getAttributeValuesList(compartment)) { GKInstance compartmentInstGk = (GKInstance) compartmentInst; - if (compartmentInstGk.getSchemClass().isa(Compartment)) + if (compartmentInstGk.getSchemClass().isa(Compartment)) { inferredInst.addAttributeValue(compartment, compartmentInstGk); } else { @@ -54,7 +51,18 @@ public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInfe } if (instanceToBeInferred.getSchemClass().isValidAttribute(species) && instanceToBeInferred.getAttributeValue(species) != null) { - inferredInst.addAttributeValue(species, speciesInst); + GKInstance originalSpeciesInst = (GKInstance) instanceToBeInferred.getAttributeValue(species); + if (originalSpeciesInst.getDBID().equals(48887L)) { + inferredInst.addAttributeValue(species, instanceToBeInferred.getAttributeValue(species)); + } else { + inferredInst.addAttributeValue(species, speciesInst); + } + } + if (instanceToBeInferred.getSchemClass().isValidAttribute(relatedSpecies) && instanceToBeInferred.getAttributeValue(relatedSpecies) != null) { + List relatedSpeciesList = instanceToBeInferred.getAttributeValuesList(relatedSpecies); +// if (relatedSpeciesList.contains(speciesInst)) { + inferredInst.addAttributeValue(relatedSpecies, speciesInst); +// } } return inferredInst; } @@ -129,14 +137,93 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI return identicalInstances.iterator().next(); } } else { + if (inferredInst.getSchemClass().isa(PhysicalEntity)) { - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); - inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); +// inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + } + + // COV-1-to-COV-2 Projection additions. + if (originalInst != null) { + if (inferredInst.getSchemClass().isValidAttribute(literatureReference) && originalInst.getAttributeValue(literatureReference) != null) { + inferredInst.setAttributeValue(literatureReference, originalInst.getAttributeValuesList(literatureReference)); + } + if (inferredInst.getSchemClass().isValidAttribute(crossReference) && originalInst.getAttributeValue(crossReference) != null) { + inferredInst.setAttributeValue(crossReference, originalInst.getAttributeValuesList(crossReference)); + } + if (inferredInst.getSchemClass().isValidAttribute(disease) && originalInst.getAttributeValue(disease) != null) { + inferredInst.setAttributeValue(disease, diseaseInst); + } + if (inferredInst.getSchemClass().isValidAttribute(isChimeric) && originalInst.getAttributeValue(isChimeric) != null) { + inferredInst.setAttributeValue(isChimeric, originalInst.getAttributeValue(isChimeric)); + } + if (inferredInst.getSchemClass().isValidAttribute(includedLocation) && originalInst.getAttributeValuesList(includedLocation) != null) { + inferredInst.setAttributeValue(includedLocation, originalInst.getAttributeValuesList(includedLocation)); + } + + if (inferredInst.getSchemClass().isValidAttribute(summation)) { + createCOVSummationInstances(inferredInst, originalInst); + } + } + + // Inferred Summations should keep the normal displayName + if (!inferredInst.getSchemClass().isa(Summation)) { + String updatedDisplayName = inferredInst.getDisplayName().replace("CoV-1", "CoV-2"); + inferredInst.setDisplayName(updatedDisplayName); + } + if (inferredInst.getSchemClass().isValidAttribute(name)) { + List names = inferredInst.getAttributeValuesList(name); + List newNames = new ArrayList<>(); + for (String name : names) { + String newName = name.replace("CoV-1", "CoV-2"); + newNames.add(newName); + } + inferredInst.setAttributeValue(name, newNames); } + // + dba.storeInstance(inferredInst); + +// if (inferredInst.getSchemClass().isa(PhysicalEntity)) { +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); +// inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// dba.updateInstanceAttribute(inferredInst, stableIdentifier); +// } + return inferredInst; } } + + public static void createCOVSummationInstances(GKInstance inferredInst, GKInstance originalInst) throws Exception { + + List originalSummationInstances = originalInst.getAttributeValuesList(summation); + String summationText = "This COVID-19 " + originalInst.getSchemClass().getName() + " instance was generated via electronic inference from a curated CoV-1 (Human SARS coronavirus) Reactome instance. In Reactome, inference is the process used to automatically create orthologous Pathways, Reactions and PhysicalEntities from our expertly curated data (" + inferredEventsReactomeURL + ")."; + if (originalSummationInstances.size() > 0) { + for (GKInstance summationInst : originalSummationInstances) { + inferredInst.addAttributeValue(summation, createCOVSummationInst(summationInst, summationText)); + } + } else { + inferredInst.addAttributeValue(summation, createCOVSummationInst(null, summationText)); + } + } + + private static GKInstance createCOVSummationInst(GKInstance summationInst, String summationText) throws Exception { + + GKInstance infSummationInst = new GKInstance(dba.getSchema().getClassByName(Summation)); + infSummationInst.setDbAdaptor(dba); + infSummationInst.setAttributeValue(created, instanceEditInst); +// String summationDisplayName = summationInst != null ? summationInst.getDisplayName() : summationText; +// infSummationInst.setDisplayName(summationDisplayName); + String updatedSummationText = summationInst != null ? summationText + "\n\n" + summationInst.getAttributeValue(text).toString() : summationText; + infSummationInst.setAttributeValue(text, updatedSummationText); + infSummationInst.setDisplayName(updatedSummationText); + if (summationInst != null) { + infSummationInst.setAttributeValue(literatureReference, summationInst.getAttributeValuesList(literatureReference)); + } + infSummationInst = checkForIdenticalInstances(infSummationInst, summationInst); + return infSummationInst; + } + // Checks if the instanceToCheck already contains the instanceToUse in the multi-value attribute @SuppressWarnings("unchecked") public static GKInstance addAttributeValueIfNecessary(GKInstance instanceToBeCheckedForExistingAttribute, GKInstance instanceContainingAttributeToBeChecked, String attribute) throws Exception @@ -219,4 +306,12 @@ public static void setInstanceEdit(GKInstance instanceEditCopy) { instanceEditInst = instanceEditCopy; } + + public static void setDiseaseInstance(GKInstance diseaseInstanceCopy) { + diseaseInst = diseaseInstanceCopy; + } + + public static GKInstance getDiseaseInst() { + return diseaseInst; + } } diff --git a/src/main/java/org/reactome/orthoinference/Main.java b/src/main/java/org/reactome/orthoinference/Main.java index a82d8ca0..bdeb4c9c 100644 --- a/src/main/java/org/reactome/orthoinference/Main.java +++ b/src/main/java/org/reactome/orthoinference/Main.java @@ -24,12 +24,14 @@ public static void main(String[] args) throws Exception { speciesCode = args[0]; } else { logger.fatal("Please include a 4-letter species code as the first argument (eg: mmus)"); - System.exit(0); +// System.exit(0); } + String referenceSpeciesCode = "cov1"; + speciesCode="cov2"; Properties props = new Properties(); props.load(new FileInputStream(pathToConfig)); - EventsInferrer.inferEvents(props, speciesCode); + EventsInferrer.inferEvents(props, referenceSpeciesCode, speciesCode); } } diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 92533995..66ccce56 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -1,22 +1,15 @@ package org.reactome.orthoinference; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; + +import org.gk.model.ReactomeJavaConstants; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.GKSchemaClass; -import org.gk.schema.InvalidAttributeException; -import org.gk.schema.InvalidAttributeValueException; -import org.gk.schema.SchemaClass; +import org.gk.schema.*; public class OrthologousEntityGenerator { @@ -33,8 +26,11 @@ public class OrthologousEntityGenerator { private static Map definedSetIdenticals = new HashMap<>(); private static Map complexIdenticals = new HashMap<>(); private static Map entitySetIdenticals = new HashMap<>(); + private static Map> nonHumanParticpants = new HashMap<>(); + private static Map inferredSARSIdenticals = new HashMap<>(); + private static Map humanComplexIdenticals = new HashMap<>(); -/** The heart of the OrthoInference process. This function takes PhysicalEntity (PE) instances and will infer those that are EWAS', Complexes/Polymers, or EntitySets. + /** The heart of the OrthoInference process. This function takes PhysicalEntity (PE) instances and will infer those that are EWAS', Complexes/Polymers, or EntitySets. The function's arguments are an incoming PE instance and an override attribute. Instances that are comprised of PE's will often recursively call this createOrthoEntity function on constituent PE's with the override attribute set to 'true'. This ensures that these PE's are inferred, despite the fact that they might not pass some filter criteria. This is often handled using 'mock' instances (i.e. 'ghost instances' from Perl script), which allow a PE to be inferred without having to commit a 'real' instance to the DB. @@ -56,6 +52,12 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri return orthologousEntityIdenticals.get(entityInst); } + GKInstance entitySpeciesInst = (GKInstance) entityInst.getAttributeValue(species); + if (entitySpeciesInst != null && entitySpeciesInst.getDBID().equals(48887L)) { + inferSARSParticipants(entityInst); + return entityInst; + } + // Checks that a species attribute exists in either the current instance or in constituent instances. if (!SpeciesCheckUtility.checkForSpeciesAttribute(entityInst)) { @@ -108,7 +110,161 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri logger.info("PE inference completed: " + entityInst); return infEntityInst; } - + + private static GKInstance inferSARSParticipants(GKInstance entityInst) throws Exception { + + if (humanComplexIdenticals.get(entityInst) == null) { + Set containedInstances = getComplexEntitySetContainedInstances(entityInst); + + boolean hasContainedSARSInstance = false; + for (GKInstance containedInst : containedInstances) { + if (hasSARSSpecies(containedInst)) { + hasContainedSARSInstance = true; + if (inferredSARSIdenticals.get(containedInst) == null) { + GKInstance inferredSARSEntityInst = createOrthoEntity(containedInst, false); + inferredSARSIdenticals.put(containedInst, inferredSARSEntityInst); + } + } + } + + if (hasContainedSARSInstance) { + // Outputs Human Complexes/EntitySets that contain CoV-1 instances. +// System.out.println(entityInst); + GKInstance copiedHumanComplex = InstanceUtilities.createNewInferredGKInstance(entityInst); + for (SchemaAttribute complexAttr : (Collection) entityInst.getSchemClass().getAttributes()) { + if (!complexAttr.getName().equals(authored) + && !complexAttr.getName().equals(created) + && !complexAttr.getName().equals(modified) + && !complexAttr.getName().equals(relatedSpecies) + && !complexAttr.getName().equals(disease) + && !complexAttr.getName().equals(reviewed) + && !complexAttr.getName().equals(inferredFrom) + && !complexAttr.getName().equals(inferredTo) + && !complexAttr.getName().equals(DB_ID) + && !complexAttr.getName().equals(stableIdentifier) + && !complexAttr.getName().equals(revised) + && !complexAttr.getName().equals(edited) + && !complexAttr.getName().equals(compartment) + && !complexAttr.getName().equals(species)) { + + if (entityInst.getAttributeValuesList(complexAttr).size() > 0) { + for (Object attrValue : entityInst.getAttributeValuesList(complexAttr)) { + copiedHumanComplex.addAttributeValue(complexAttr, attrValue); + } + } + } + } + + List components = (List) copiedHumanComplex.getAttributeValuesList(hasComponent); + List updatedComponents = new ArrayList<>(); + for (GKInstance component : components) { + if (hasSARSSpecies(component)) { + updatedComponents.add(inferredSARSIdenticals.get(component)); + } else { + updatedComponents.add(component); + } + } + copiedHumanComplex.setAttributeValue(hasComponent, updatedComponents); + + copiedHumanComplex = InstanceUtilities.checkForIdenticalInstances(copiedHumanComplex, entityInst); + + copiedHumanComplex = InstanceUtilities.addAttributeValueIfNecessary(copiedHumanComplex, entityInst, inferredFrom); + dba.updateInstanceAttribute(copiedHumanComplex, inferredFrom); + entityInst = InstanceUtilities.addAttributeValueIfNecessary(entityInst, copiedHumanComplex, inferredTo); + dba.updateInstanceAttribute(entityInst, inferredTo); + + humanComplexIdenticals.put(entityInst, copiedHumanComplex); + + + /////// This code was used for troubleshooting and to see how far down 'multi-species' instances went in the Complex/EntitySet hierarchy +// for (String attr : complexAttrs) { +// System.out.println(attr); +// } +// for (GKInstance containedInst : containedInstances) { + +// if (hasSARSSpecies(containedInst)) { +////// System.out.println("\t" + containedInst); +//// Set subContainedInstances = getComplexEntitySetContainedInstances(containedInst); +//// for (GKInstance subContainedInst : subContainedInstances) { +//// if (hasSARSSpecies(subContainedInst)) { +////// System.out.println("\t\t" + subContainedInst); +////// Set subSubContainedInstances = getComplexEntitySetContainedInstances(subContainedInst); +////// for (GKInstance subSubContainedInst : subSubContainedInstances) { +////// if (hasSARSSpecies(subSubContainedInst)) { +//////// System.out.println("\t\t\t" + subSubContainedInst); +////// Set subSubSubContainedInstances = getComplexEntitySetContainedInstances(subSubContainedInst); +////// for (GKInstance subSubSubContainedInst : subSubSubContainedInstances) { +////// if (hasSARSSpecies(subSubSubContainedInst)) { +//////// System.out.println("\t\t\t\t" + subSubSubContainedInst); +////// } else if (hasContainedSARSInstance(subSubSubContainedInst)) { +////// +////// } else { +////// System.out.println(subSubSubContainedInst.getAttributeValue(species) + "\t\t" + subSubSubContainedInst); +////// } +////// } +////// } else if (hasContainedSARSInstance(subSubContainedInst)) { +////// +////// } +////// } +//// } else if (hasContainedSARSInstance(subContainedInst)) { +//// +//// } else { +//// System.out.println(subContainedInst.getAttributeValue(species) + "\t\t" + subContainedInst); +//// } +//// } +// } else if (hasContainedSARSInstance(containedInst)) { +//// Set subContainedInstances = getComplexEntitySetContainedInstances(containedInst); +//// System.out.println(subContainedInstances.size()); +//// for (GKInstance subContainedInst : subContainedInstances) { +//// System.out.println(subContainedInst); +//// if (hasSARSSpecies(subContainedInst)) { +//// System.out.println("\t\tTWOO: " + subContainedInst); +//// } else if (hasContainedSARSInstance(subContainedInst)) { +//// System.out.println("\t\t\tTEE: " + subContainedInst); +//// } else { +//// System.out.println("\t\t\t\t\t\t\tDUDDD: " + subContainedInst); +//// } +//// } +// } else { +//// System.out.println("\t\t\t\t\tDUD: " + containedInst); +//// System.out.println(containedInst.getAttributeValue(species) + "\t\t" + containedInst); +// } +// } + ///////////// + + } + + } + return humanComplexIdenticals.get(entityInst); + } + + public static boolean hasSARSSpecies(GKInstance entityInst) throws Exception { + if (entityInst.getSchemClass().isValidAttribute(species)) { + GKInstance speciesInst = (GKInstance) entityInst.getAttributeValue(species); + return speciesInst != null && speciesInst.getDBID().equals(9678119L); + } + return false; + } + + private static boolean hasContainedSARSInstance(GKInstance subEntityInst) throws Exception { + boolean hasContainedSARSInstance = false; + for (GKInstance subContainedInst : getComplexEntitySetContainedInstances(subEntityInst)) { + if (hasSARSSpecies(subContainedInst)) { + hasContainedSARSInstance = true; + } + } + return hasContainedSARSInstance; + } + + private static Set getComplexEntitySetContainedInstances(GKInstance entityInst) throws Exception { + return org.gk.model.InstanceUtilities.getContainedInstances(entityInst, + ReactomeJavaConstants.hasMember, + ReactomeJavaConstants.hasCandidate, + ReactomeJavaConstants.hasComponent, + ReactomeJavaConstants.repeatedUnit + ); + } + // Function that first tries to infer any EWAS' associated with the instance. For those that have more than 1 returned EWAS instance, // it's re-structured to a DefinedSet instance. If there is no EWAS instances inferred, it will either return null or, if override is set, return a mock instance. private static GKInstance createInfEWAS(GKInstance ewasInst, boolean override) throws InvalidAttributeException, Exception @@ -199,7 +355,7 @@ private static GKInstance createInfComplexPolymer(GKInstance complexInst, boolea } logger.info("Complex protein counts. Total: " + complexTotalProteinCounts + " Inferrable: " + complexInferrableProteinCounts); GKInstance infComplexInst = InstanceUtilities.createNewInferredGKInstance(complexInst); - infComplexInst.addAttributeValue(summation, complexSummationInst); +// infComplexInst.addAttributeValue(summation, complexSummationInst); infComplexInst.addAttributeValue(name, complexInst.getAttributeValue(name)); List infComponentInstances = new ArrayList<>(); // Inference handling is different depending on if it is a Complex or a Polymer. Complexes will infer all 'components' while Polymers will infer all 'repeatedUnits'. @@ -438,4 +594,8 @@ public static void setComplexSummationInstance() throws Exception complexSummationInst.setAttributeValue(_displayName, complexSummationText); complexSummationInst = InstanceUtilities.checkForIdenticalInstances(complexSummationInst, null); } + + public static Map> getNonHumanParticipants() { + return nonHumanParticpants; + } } diff --git a/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java index e3ec4e90..eb50fa1d 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java @@ -45,7 +45,7 @@ public void generateOrthologousPathwayDiagrams() throws Exception { // Iterate through each PathwayDiagram instance looking for those associated with the reference species. for (GKInstance diagramInst: (Collection) dba.fetchInstancesByClass(ReactomeJavaConstants.PathwayDiagram)) { GKInstance pathwayInst = (GKInstance) diagramInst.getAttributeValue(ReactomeJavaConstants.representedPathway); - if (isSameSpecies(pathwayInst, referenceSpeciesInst)) { + if (pathwayInst.getAttributeValue(ReactomeJavaConstants.disease) != null) { // When a PathwayDiagram instance associated with the reference species is found, iterate through all of it's OrthologousEvent instances. for (GKInstance orthoPathwayInst : (Collection) pathwayInst.getAttributeValuesList(ReactomeJavaConstants.orthologousEvent)) { // Look for OrthologousEvent instances that match the current target species and that are electronically inferred. @@ -60,7 +60,7 @@ public void generateOrthologousPathwayDiagrams() throws Exception { public GKInstance generateOrthologousPathwayDiagram(GKInstance orthoPathwayInst, GKInstance pathwayInst, GKInstance diagramInst, PredictedPathwayDiagramGeneratorFromDB diagramGenerator) throws Exception { GKInstance orthoDiagram = null; - if (isSameSpecies(orthoPathwayInst, targetSpeciesInst) && isElectronicallyInferred(orthoPathwayInst)) { + if (isElectronicallyInferred(orthoPathwayInst)) { // Generate Orthologous PathwayDiagram instance using generatePredictedDiagram method from PredictedPathwayDiagramGeneratorFromDB. // This method is the one needed to build PathwayDiagrams for species-specific Pathway instances. logger.info("Building inferred Pathway diagram for " + orthoPathwayInst); @@ -72,6 +72,9 @@ public GKInstance generateOrthologousPathwayDiagram(GKInstance orthoPathwayInst, // Compare the species attribute in a Pathway with another species instance for equality public boolean isSameSpecies(GKInstance pathwayInst, GKInstance speciesInst) throws Exception { GKInstance pathwaySpeciesInst = (GKInstance) pathwayInst.getAttributeValue(ReactomeJavaConstants.species); + if (pathwayInst.getAttributeValue(ReactomeJavaConstants.relatedSpecies) != null) { + pathwaySpeciesInst = (GKInstance) pathwayInst.getAttributeValue(ReactomeJavaConstants.relatedSpecies); + } return pathwaySpeciesInst.equals(speciesInst); } diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index db4ca991..1462b707 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -74,7 +74,7 @@ private static void createInferredPathwayHierarchy(GKInstance sourceEventInst) t for (GKInstance sourcePathwayReferralInst : sourcePathwayReferralInstances) { logger.info("Generating inferred Pathway: " + sourcePathwayReferralInst); - if (inferredEventIdenticals.get(sourcePathwayReferralInst) == null) + if (inferredEventIdenticals.get(sourcePathwayReferralInst) == null && !sourcePathwayReferralInst.getDBID().equals(9679191L)) { inferPathway(sourcePathwayReferralInst); } else { @@ -91,7 +91,8 @@ private static List safeList(Collection collection) { private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Exception { GKInstance infPathwayInst = InstanceUtilities.createNewInferredGKInstance(sourcePathwayReferralInst); infPathwayInst.addAttributeValue(name, sourcePathwayReferralInst.getAttributeValuesList(name)); - infPathwayInst.addAttributeValue(summation, summationInst); +// infPathwayInst.addAttributeValue(summation, summationInst); + InstanceUtilities.createCOVSummationInstances(infPathwayInst, sourcePathwayReferralInst); if (infPathwayInst.getSchemClass().isValidAttribute(releaseDate)) { infPathwayInst.addAttributeValue(releaseDate, dateOfRelease); @@ -109,11 +110,42 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex logger.warn(sourcePathwayReferralInst + " is a ReactionLikeEvent, which is unexpected -- refer to infer_events.pl"); } infPathwayInst.setDisplayName(sourcePathwayReferralInst.getDisplayName()); + + // COV-1-to-COV-2 Projection code + if (sourcePathwayReferralInst.getAttributeValue(disease) != null) { + infPathwayInst.setAttributeValue(disease, InstanceUtilities.getDiseaseInst()); + } + + if (sourcePathwayReferralInst.getAttributeValuesList(literatureReference) != null) { + infPathwayInst.setAttributeValue(literatureReference, sourcePathwayReferralInst.getAttributeValuesList(literatureReference)); + } + + if (sourcePathwayReferralInst.getAttributeValuesList(definition) != null) { + for (String definitionString : (Collection) sourcePathwayReferralInst.getAttributeValuesList(definition)) { + infPathwayInst.addAttributeValue(definition, definitionString); + } + } + String updatedDisplayName = infPathwayInst.getDisplayName().replace("CoV-1", "CoV-2"); + infPathwayInst.setDisplayName(updatedDisplayName); + List names = infPathwayInst.getAttributeValuesList(name); + List newNames = new ArrayList<>(); + for (String name : names) { + String newName = name.replace("CoV-1", "CoV-2"); + newNames.add(newName); + } + infPathwayInst.setAttributeValue(name, newNames); + inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); - infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); +// infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + // dba.storeInstance(infPathwayInst); +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); +// infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// dba.updateInstanceAttribute(infPathwayInst, stableIdentifier); + // This was replaced with addAttributeValueIfNecessary due to a bug where a Pathway instance's 'OrthologousEvent' attribute was being replaced, // instead of being added to the existing array when the script was executed from a jar (rather than from Eclipse) (Justin Cook 2018) sourcePathwayReferralInst = InstanceUtilities.addAttributeValueIfNecessary(sourcePathwayReferralInst, infPathwayInst, orthologousEvent); diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index b422e1e5..152c4649 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -3,18 +3,13 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.InvalidAttributeException; public class ReactionInferrer { @@ -45,10 +40,39 @@ public static void inferReaction(GKInstance reactionInst) throws Exception { ///// The beginning of an inference process: // Creates inferred instance of reaction. + + // This code screens Reactions that will not need to be inferred. + Collection reactionComponents = org.gk.model.InstanceUtilities.getReactionParticipants(reactionInst); + Set containedComponents = new HashSet<>(); + boolean hasContainedSARSInstance = false; + for (GKInstance reactionComponent : reactionComponents) { + containedComponents.add(reactionComponent); + containedComponents.addAll(org.gk.model.InstanceUtilities.getContainedInstances(reactionComponent, + hasComponent, + hasCandidate, + hasMember, + repeatedUnit)); + } + + for (GKInstance containedComponent : containedComponents) { + if (OrthologousEntityGenerator.hasSARSSpecies(containedComponent)) { + hasContainedSARSInstance = true; + } + } + + if (!hasContainedSARSInstance) { + inferredEvent.put(reactionInst, reactionInst); + inferrableHumanEvents.add(reactionInst); + return; + } + // End of screening code. + + GKInstance infReactionInst = InstanceUtilities.createNewInferredGKInstance(reactionInst); infReactionInst.addAttributeValue(name, reactionInst.getAttributeValuesList(name)); infReactionInst.addAttributeValue(goBiologicalProcess, reactionInst.getAttributeValue(goBiologicalProcess)); - infReactionInst.addAttributeValue(summation, summationInst); +// infReactionInst.addAttributeValue(summation, summationInst); + InstanceUtilities.createCOVSummationInstances(infReactionInst, reactionInst); infReactionInst.addAttributeValue(evidenceType, evidenceTypeInst); infReactionInst.addAttributeValue(_displayName, reactionInst.getAttributeValue(_displayName)); @@ -57,10 +81,10 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // Reactions with no proteins/EWAS (Total = 0) are not inferred. List reactionProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionInst); int reactionTotalProteinCounts = reactionProteinCounts.get(0); - if (reactionTotalProteinCounts > 0) - { +// if (reactionTotalProteinCounts > 0) +// { logger.info("Total protein count for RlE: " + reactionTotalProteinCounts); - String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; + String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; // Having passed all tests/filters until now, the reaction is recorded in the 'eligible reactions' file, meaning inference is continued. eligibleCount++; Files.write(Paths.get(eligibleFilehandle), eligibleEventName.getBytes(), StandardOpenOption.APPEND); @@ -75,7 +99,7 @@ public static void inferReaction(GKInstance reactionInst) throws Exception logger.info("Inferring catalysts..."); if (inferReactionCatalysts(reactionInst, infReactionInst)) { - // Many reactions are not regulated at all, meaning inference is attempted but will not end the process if there is nothing to infer. + // Many reactions are not regulated at all, meaning inference is attempted but will not end the process if there is nothing to infer. // The inference process will end though if inferRegulations returns an invalid value. logger.info("Inferring regulations..."); List inferredRegulations = inferReactionRegulations(reactionInst); @@ -83,15 +107,47 @@ public static void inferReaction(GKInstance reactionInst) throws Exception { return; } - if (infReactionInst.getSchemClass().isValidAttribute(releaseDate)) + if (infReactionInst.getSchemClass().isValidAttribute(releaseDate)) { infReactionInst.addAttributeValue(releaseDate, dateOfRelease); } + +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); +// infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // FetchIdenticalInstances would just return the instance being inferred. Since this step is meant to always // add a new inferred instance, the storeInstance method is just called here. - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); - infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + + // COV-1-to-COV-2 Projection additions. + if (reactionInst.getAttributeValuesList(literatureReference) != null) { + infReactionInst.setAttributeValue(literatureReference, reactionInst.getAttributeValuesList(literatureReference)); + } + if (reactionInst.getAttributeValue(disease) != null) { + infReactionInst.setAttributeValue(disease, InstanceUtilities.getDiseaseInst()); + } + if (reactionInst.getAttributeValue(isChimeric) != null) { + infReactionInst.setAttributeValue(isChimeric, reactionInst.getAttributeValue(isChimeric)); + } + if (reactionInst.getAttributeValuesList(definition) != null) { + for (String definitionString : (Collection) reactionInst.getAttributeValuesList(definition)) { + infReactionInst.addAttributeValue(definition, definitionString); + } + } + // + String updatedDisplayName = infReactionInst.getDisplayName().replace("CoV-1", "CoV-2"); + infReactionInst.setDisplayName(updatedDisplayName); + List names = infReactionInst.getAttributeValuesList(name); + List newNames = new ArrayList<>(); + for (String name : names) { + String newName = name.replace("CoV-1", "CoV-2"); + newNames.add(newName); + } + infReactionInst.setAttributeValue(name, newNames); dba.storeInstance(infReactionInst); + +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); +// infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// dba.updateInstanceAttribute(infReactionInst, stableIdentifier); + logger.info("Inferred RlE instance: " + infReactionInst); if (infReactionInst.getSchemClass().isValidAttribute(inferredFrom)) @@ -103,9 +159,9 @@ public static void inferReaction(GKInstance reactionInst) throws Exception dba.updateInstanceAttribute(infReactionInst, orthologousEvent); reactionInst.addAttributeValue(orthologousEvent, infReactionInst); dba.updateInstanceAttribute(reactionInst, orthologousEvent); - + inferredEvent.put(reactionInst, infReactionInst); - + // Regulations instances require the DB to contain the inferred ReactionlikeEvent, so Regulations inference happens post-inference if (inferredRegulations.size() > 0) { @@ -120,7 +176,7 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // After successfully adding a new inferred instance to the DB, it is recorded in the 'inferred reactions' file inferredCount++; inferrableHumanEvents.add(reactionInst); - String inferredEvent = infReactionInst.getAttributeValue(DB_ID).toString() + "\t" + infReactionInst.getDisplayName() + "\n"; + String inferredEvent = infReactionInst.getAttributeValue(DB_ID).toString() + "\t" + infReactionInst.getDisplayName() + "\n"; Files.write(Paths.get(inferredFilehandle), inferredEvent.getBytes(), StandardOpenOption.APPEND); } else { logger.info("Catalyst inference unsuccessful -- terminating inference for " + reactionInst); @@ -131,9 +187,9 @@ public static void inferReaction(GKInstance reactionInst) throws Exception } else { logger.info("Input inference unsuccessful -- terminating inference for " + reactionInst); } - } else { - logger.info("No distinct proteins found in instance -- terminating inference for " + reactionInst); - } +// } else { +// logger.info("No distinct proteins found in instance -- terminating inference for " + reactionInst); +// } } } @@ -147,6 +203,7 @@ private static boolean inferReactionInputsOrOutputs(GKInstance reactionInst, GKI logger.info(attribute.substring(0,1).toUpperCase() + attribute.substring(1) + " instances: " + attributeInstances); for (GKInstance attributeInst : attributeInstances) { +// System.out.println("\t" + attribute + "\t" + attributeInst); GKInstance infAttributeInst = OrthologousEntityGenerator.createOrthoEntity(attributeInst, false); if (infAttributeInst == null) { @@ -172,6 +229,7 @@ private static boolean inferReactionCatalysts(GKInstance reactionInst, GKInstanc for (GKInstance catalystInst : catalystInstances) { logger.info("Attempting catalyst inference: " + catalystInst); +// System.out.println("\tcatalyst\t" + catalystInst); if (inferredCatalyst.get(catalystInst) == null) { GKInstance infCatalystInst = InstanceUtilities.createNewInferredGKInstance(catalystInst); @@ -227,6 +285,7 @@ private static List inferReactionRegulations(GKInstance reactionInst logger.info("Regulation instances: " + regulationInstances); for (GKInstance regulationInst : regulationInstances) { logger.info("Attempting Regulation inference: " + regulationInst); +// System.out.println("\tregulation\t" + regulationInst); GKInstance regulatorInst = (GKInstance) regulationInst.getAttributeValue(regulator); logger.info("Regulator: " + regulatorInst); GKInstance infRegulatorInst = null; diff --git a/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java b/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java index e21016b8..b87e3180 100644 --- a/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java +++ b/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java @@ -80,13 +80,13 @@ public static boolean checkIfInstanceShouldBeSkipped(GKInstance reactionInst) th if (reactionInst.getAttributeValue("relatedSpecies") != null) { logger.info(reactionInst + " has related species -- skipping"); - return true; +// return true; } // it is a disease reaction if (reactionInst.getAttributeValue(disease) != null) { logger.info(reactionInst + " is a disease reaction -- skipping"); - return true; +// return true; } // it is manually inferred if (reactionInst.getAttributeValue(inferredFrom) != null) @@ -99,7 +99,7 @@ public static boolean checkIfInstanceShouldBeSkipped(GKInstance reactionInst) th if (speciesInstances.size() > 1) { logger.info(reactionInst + " has multiple species -- skipping"); - return true; +// return true; } return false; } diff --git a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java index a052c992..b63ff5cf 100644 --- a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java +++ b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java @@ -43,14 +43,30 @@ public GKInstance generateOrthologousStableId(GKInstance inferredInst, GKInstanc // For now, Human is hard-coded as the source species, so we replace the stableIdentifier source species based on that assumption String sourceIdentifier = (String) stableIdentifierInst.getAttributeValue(identifier); - String targetIdentifier = sourceIdentifier.replace("HSA", speciesAbbreviation); + + // COV-1-to-COV-2 Projection Code + String sourceAbbreviation = "HSA"; + if (sourceIdentifier.contains("COV") && !originalInst.getSchemClass().isa(ReactionlikeEvent)) { + sourceAbbreviation = "COV"; + } + String targetIdentifier = sourceIdentifier.replace(sourceAbbreviation, speciesAbbreviation); + if (originalInst.getSchemClass().isa(Event)) { + targetIdentifier = targetIdentifier.replace("COV", "HSA"); + } + // Paralogs will have the same base stable identifier, but we want to denote when that happens. // We pull the value from `seenOrthoIds`, increment it and then add it to the stable identifier name (eg: R-MMU-123456-2) - int paralogCount = Optional.ofNullable(seenOrthoIds.get(targetIdentifier)).orElse(0) + 1; - seenOrthoIds.put(targetIdentifier, paralogCount); - if (paralogCount > 1) { - targetIdentifier += "-" + paralogCount; - } +// int paralogCount = Optional.ofNullable(seenOrthoIds.get(targetIdentifier)).orElse(0) + 1; +// seenOrthoIds.put(targetIdentifier, paralogCount); +// if (paralogCount > 1) { +// targetIdentifier += "-" + paralogCount; +// } + + + targetIdentifier += "-2"; +// String sourceDBID = originalInst.getDBID().toString(); +// targetIdentifier = targetIdentifier.replace(sourceDBID, inferredInst.getDBID().toString()); + // // Check that the stable identifier instance does not already exist in DB Collection existingStableIdentifier = (Collection) dba.fetchInstanceByAttribute("StableIdentifier", "identifier", "=", targetIdentifier); diff --git a/src/main/resources/Species.json b/src/main/resources/Species.json index 4020dc72..9ad1c0f6 100644 --- a/src/main/resources/Species.json +++ b/src/main/resources/Species.json @@ -328,5 +328,17 @@ "Rattus norvegicus" ], "abbreviation": "RNO" + }, + "cov1": { + "name" : [ + "Human SARS coronavirus" + ], + "abbreviation": "COV" + }, + "cov2": { + "name": [ + "Severe acute respiratory syndrome coronavirus 2" + ], + "abbreviation": "COV" } } \ No newline at end of file