From f1cd37ca1d247533d26cd5c27b3b322043f85c7a Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Wed, 24 Jun 2020 10:13:29 -0400 Subject: [PATCH 01/21] Initial cov1 to cov2 projection code --- .../reactome/orthoinference/EWASInferrer.java | 12 +-- .../orthoinference/EventsInferrer.java | 81 ++++++++++--------- .../org/reactome/orthoinference/Main.java | 6 +- .../orthoinference/SkipInstanceChecker.java | 6 +- .../StableIdentifierGenerator.java | 6 +- src/main/resources/Species.json | 12 +++ 6 files changed, 75 insertions(+), 48 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 685a2b9f..795c2ff9 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -55,7 +55,7 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr String homologueSource = homologue.contains(":") ? homologue.split(":")[0] : ""; String homologueId = homologue.contains(":") ? homologue.split(":")[1] : homologue; - if (checkValidSpeciesProtein(homologueId)) { +// if (checkValidSpeciesProtein(homologueId)) { GKInstance infReferenceGeneProductInst; if (referenceGeneProductIdenticals.get(homologueId) == null) { logger.info("Creating ReferenceGeneProduct for " + homologue); @@ -67,8 +67,8 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infReferenceGeneProductInst.addAttributeValue(referenceDatabase, referenceDatabaseInst); // Creates ReferenceDNASequence instance from ReferenceEntity - List inferredReferenceDNAInstances = createReferenceDNASequence(homologueId); - infReferenceGeneProductInst.addAttributeValue(referenceGene, inferredReferenceDNAInstances); +// List inferredReferenceDNAInstances = createReferenceDNASequence(homologueId); +// infReferenceGeneProductInst.addAttributeValue(referenceGene, inferredReferenceDNAInstances); infReferenceGeneProductInst.addAttributeValue(species, speciesInst); String referenceGeneProductSource = homologueSource.equals("ENSP") ? "ENSEMBL:" : "UniProt:"; @@ -215,9 +215,9 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr dba.updateInstanceAttribute(ewasInst, inferredTo); logger.info("Successfully inferred EWAS instance for " + homologue + " homologue"); infEWASInstances.add(infEWASInst); - } else { - logger.info("Gene ID corresponding to " + homologue + " not found in gene_protein_mapping file -- skipping EWAS inference"); - } +// } else { +// logger.info("Gene ID corresponding to " + homologue + " not found in gene_protein_mapping file -- skipping EWAS inference"); +// } } } else { logger.info("Could not infer EWAS, unable to find homologue for " + referenceEntityId); diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index a2c25ad0..845538af 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -11,6 +11,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; +import java.sql.SQLOutput; import java.util.*; import java.util.zip.GZIPInputStream; @@ -53,7 +54,7 @@ public class EventsInferrer private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator; @SuppressWarnings("unchecked") - public static void inferEvents(Properties props, String species) throws Exception + public static void inferEvents(Properties props, String referenceSpecies, String targetSpecies) throws Exception { logger.info("Preparing DB Adaptor and setting project variables"); // Set up DB adaptor using config.properties file @@ -91,67 +92,76 @@ public static void inferEvents(Properties props, String species) throws Exceptio JSONObject jsonObject = (JSONObject) obj; // Parse Species information (found in Species.json config file) - JSONObject speciesObject = (JSONObject) jsonObject.get(species); - JSONArray speciesNames = (JSONArray) speciesObject.get("name"); - String speciesName = (String) speciesNames.get(0); - logger.info("Beginning orthoinference of " + speciesName); + JSONObject targetSpeciesObject = (JSONObject) jsonObject.get(targetSpecies); + JSONArray targetSpeciesNames = (JSONArray) targetSpeciesObject.get("name"); + String targetSpeciesName = (String) targetSpeciesNames.get(0); - JSONObject refDb = (JSONObject) speciesObject.get("refdb"); - String refDbUrl = (String) refDb.get("url"); - String refDbProteinUrl = (String) refDb.get("access"); - String refDbGeneUrl = (String) refDb.get("ensg_access"); + JSONObject referenceSpeciesObject = (JSONObject) jsonObject.get(referenceSpecies); + JSONArray referenceSpeciesNames = (JSONArray) referenceSpeciesObject.get("name"); + String referenceSpeciesName = (String) referenceSpeciesNames.get(0); + + logger.info("Beginning orthoinference of " + targetSpeciesName); + JSONObject targetSpeciesRefDb = (JSONObject) targetSpeciesObject.get("refdb"); +// String refDbUrl = (String) refDb.get("url"); +// String refDbProteinUrl = (String) refDb.get("access"); +// String refDbGeneUrl = (String) refDb.get("ensg_access"); // Creates two files that a) list reactions that are eligible for inference and b) those that are successfully inferred - String eligibleFilename = "eligible_" + species + "_75.txt"; - String inferredFilename = "inferred_" + species + "_75.txt"; + String eligibleFilename = "eligible_" + targetSpecies + "_75.txt"; + String inferredFilename = "inferred_" + targetSpecies + "_75.txt"; createNewFile(eligibleFilename); createNewFile(inferredFilename); ReactionInferrer.setEligibleFilename(eligibleFilename); ReactionInferrer.setInferredFilename(inferredFilename); - stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) speciesObject.get("abbreviation")); + stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) targetSpeciesObject.get("abbreviation")); // Set static variables (DB/Species Instances, mapping files) that will be repeatedly used setInstanceEdits(personId); try { - readAndSetHomologueMappingFile(species, "hsap", pathToOrthopairs); - readAndSetGeneNameMappingFile(species, pathToOrthopairs); + readAndSetHomologueMappingFile(targetSpecies, referenceSpecies, pathToOrthopairs); +// readAndSetGeneNameMappingFile(targetSpecies, pathToOrthopairs); } catch (Exception e) { - logger.fatal("Unable to locate " + speciesName +" mapping file: hsap_" + species + "_mapping.tsv. Orthology prediction not possible."); + logger.fatal("Unable to locate " + targetSpeciesName +" mapping file: hsap_" + targetSpecies + "_mapping.tsv. Orthology prediction not possible."); e.printStackTrace(); System.exit(1); } - EWASInferrer.readENSGMappingFile(species, pathToOrthopairs); +// EWASInferrer.readENSGMappingFile(targetSpecies, pathToOrthopairs); EWASInferrer.fetchAndSetUniprotDbInstance(); - EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); - EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); +// EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); +// EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); - JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb"); + JSONObject altRefDbJSON = (JSONObject) targetSpeciesObject.get("alt_refdb"); if (altRefDbJSON != null) { - logger.info("Alternate DB exists for " + speciesName); + logger.info("Alternate DB exists for " + targetSpeciesName); EWASInferrer.createAlternateReferenceDBInstance(altRefDbJSON); } else { EWASInferrer.setAltRefDbToFalse(); } - createAndSetSpeciesInstance(speciesName); + createAndSetSpeciesInstance(targetSpeciesName); setSummationInstance(); setEvidenceTypeInstance(); OrthologousEntityGenerator.setComplexSummationInstance(); /** - * Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the species. + * Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the targetSpecies. */ - // Gets DB instance of source species (human) - Collection sourceSpeciesInst = (Collection) dbAdaptor.fetchInstanceByAttribute("Species", "name", "=", "Homo sapiens"); - if (sourceSpeciesInst.isEmpty()) + // Gets DB instance of source targetSpecies (human) + Collection referenceSpeciesInst = (Collection) dbAdaptor.fetchInstanceByAttribute("Species", "name", "=", referenceSpeciesName); + if (referenceSpeciesInst.isEmpty()) { - logger.fatal("Could not find Species instance for Homo sapiens"); + logger.fatal("Could not find Species instance for " + referenceSpeciesName); System.exit(1); } - long humanInstanceDbId = sourceSpeciesInst.iterator().next().getDBID(); - orthologousPathwayDiagramGenerator = new OrthologousPathwayDiagramGenerator(dbAdaptor, dbAdaptorPrev, speciesInst, personId, humanInstanceDbId); - // Gets Reaction instances of source species (human) - Collection reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute("ReactionlikeEvent", "species", "=", humanInstanceDbId); + long referenceSpeciesInstanceDbId = referenceSpeciesInst.iterator().next().getDBID(); + orthologousPathwayDiagramGenerator = new OrthologousPathwayDiagramGenerator(dbAdaptor, dbAdaptorPrev, speciesInst, personId, referenceSpeciesInstanceDbId); + // Gets Reaction instances of source targetSpecies (human) + Collection reactionInstances = new ArrayList<>(); // + if (referenceSpeciesName.equals("Human SARS coronavirus")) { + reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute(ReactionlikeEvent, relatedSpecies, "=", referenceSpeciesInst); + } else { + reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute(ReactionlikeEvent, species, "=", referenceSpeciesInstanceDbId); + } List dbids = new ArrayList<>(); Map reactionMap = new HashMap<>(); @@ -160,13 +170,12 @@ public static void inferEvents(Properties props, String species) throws Exceptio reactionMap.put(reactionInst.getDBID(), reactionInst); } Collections.sort(dbids); - - logger.info(sourceSpeciesInst.iterator().next().getDisplayName() + " ReactionlikeEvent instances: " + dbids.size()); + logger.info(referenceSpeciesInst.iterator().next().getDisplayName() + " ReactionlikeEvent instances: " + dbids.size()); for (Long dbid : dbids) { GKInstance reactionInst = reactionMap.get(dbid); logger.info("Attempting RlE inference: " + reactionInst); - // Check if the current Reaction already exists for this species, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute. + // Check if the current Reaction already exists for this targetSpecies, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute. // Adds to manualHumanEvents array if it passes conditions. This code block allows you to re-run the code without re-inferring instances. List previouslyInferredInstances = new ArrayList(); previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, orthologousEvent, previouslyInferredInstances); @@ -185,7 +194,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio continue; } - // An inferred ReactionlikeEvent doesn't already exist for this species, and an orthologous inference will be attempted. + // An inferred ReactionlikeEvent doesn't already exist for this targetSpecies, and an orthologous inference will be attempted. try { ReactionInferrer.inferReaction(reactionInst); logger.info("Successfully inferred " + reactionInst); @@ -197,8 +206,8 @@ public static void inferEvents(Properties props, String species) throws Exceptio PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); - outputReport(species); - logger.info("Finished orthoinference of " + speciesName); + outputReport(targetSpecies); + logger.info("Finished orthoinference of " + targetSpeciesName); } /** diff --git a/src/main/java/org/reactome/orthoinference/Main.java b/src/main/java/org/reactome/orthoinference/Main.java index a82d8ca0..bdeb4c9c 100644 --- a/src/main/java/org/reactome/orthoinference/Main.java +++ b/src/main/java/org/reactome/orthoinference/Main.java @@ -24,12 +24,14 @@ public static void main(String[] args) throws Exception { speciesCode = args[0]; } else { logger.fatal("Please include a 4-letter species code as the first argument (eg: mmus)"); - System.exit(0); +// System.exit(0); } + String referenceSpeciesCode = "cov1"; + speciesCode="cov2"; Properties props = new Properties(); props.load(new FileInputStream(pathToConfig)); - EventsInferrer.inferEvents(props, speciesCode); + EventsInferrer.inferEvents(props, referenceSpeciesCode, speciesCode); } } diff --git a/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java b/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java index e21016b8..b87e3180 100644 --- a/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java +++ b/src/main/java/org/reactome/orthoinference/SkipInstanceChecker.java @@ -80,13 +80,13 @@ public static boolean checkIfInstanceShouldBeSkipped(GKInstance reactionInst) th if (reactionInst.getAttributeValue("relatedSpecies") != null) { logger.info(reactionInst + " has related species -- skipping"); - return true; +// return true; } // it is a disease reaction if (reactionInst.getAttributeValue(disease) != null) { logger.info(reactionInst + " is a disease reaction -- skipping"); - return true; +// return true; } // it is manually inferred if (reactionInst.getAttributeValue(inferredFrom) != null) @@ -99,7 +99,7 @@ public static boolean checkIfInstanceShouldBeSkipped(GKInstance reactionInst) th if (speciesInstances.size() > 1) { logger.info(reactionInst + " has multiple species -- skipping"); - return true; +// return true; } return false; } diff --git a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java index a052c992..4f0f0455 100644 --- a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java +++ b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java @@ -43,7 +43,11 @@ public GKInstance generateOrthologousStableId(GKInstance inferredInst, GKInstanc // For now, Human is hard-coded as the source species, so we replace the stableIdentifier source species based on that assumption String sourceIdentifier = (String) stableIdentifierInst.getAttributeValue(identifier); - String targetIdentifier = sourceIdentifier.replace("HSA", speciesAbbreviation); + String sourceAbbreviation = "HSA"; + if (sourceIdentifier.contains("COV")) { + sourceAbbreviation = "COV"; + } + String targetIdentifier = sourceIdentifier.replace(sourceAbbreviation, speciesAbbreviation); // Paralogs will have the same base stable identifier, but we want to denote when that happens. // We pull the value from `seenOrthoIds`, increment it and then add it to the stable identifier name (eg: R-MMU-123456-2) int paralogCount = Optional.ofNullable(seenOrthoIds.get(targetIdentifier)).orElse(0) + 1; diff --git a/src/main/resources/Species.json b/src/main/resources/Species.json index 4020dc72..9ad1c0f6 100644 --- a/src/main/resources/Species.json +++ b/src/main/resources/Species.json @@ -328,5 +328,17 @@ "Rattus norvegicus" ], "abbreviation": "RNO" + }, + "cov1": { + "name" : [ + "Human SARS coronavirus" + ], + "abbreviation": "COV" + }, + "cov2": { + "name": [ + "Severe acute respiratory syndrome coronavirus 2" + ], + "abbreviation": "COV" } } \ No newline at end of file From b6becf114abd342382d1731f6c65b68f860b15b0 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Wed, 24 Jun 2020 12:38:27 -0400 Subject: [PATCH 02/21] PE & RlE projections rounded out; StableIdentifier notation adjusted --- .../orthoinference/EventsInferrer.java | 1 + .../orthoinference/InstanceUtilities.java | 45 ++++++++++++++++--- .../orthoinference/ReactionInferrer.java | 33 +++++++++++--- .../StableIdentifierGenerator.java | 20 ++++++--- 4 files changed, 80 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 845538af..07afe30c 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -141,6 +141,7 @@ public static void inferEvents(Properties props, String referenceSpecies, String createAndSetSpeciesInstance(targetSpeciesName); setSummationInstance(); setEvidenceTypeInstance(); + InstanceUtilities.setDiseaseInstance(dbAdaptor.fetchInstance(9683915L)); OrthologousEntityGenerator.setComplexSummationInstance(); /** diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 2774322d..44b90173 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -1,10 +1,6 @@ package org.reactome.orthoinference; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -23,6 +19,7 @@ public class InstanceUtilities { private static MySQLAdaptor dba; private static GKInstance speciesInst; private static GKInstance instanceEditInst; + private static GKInstance diseaseInst; private static Map mockedIdenticals = new HashMap<>(); // Creates new instance that will be inferred based on the incoming instances class @@ -54,7 +51,18 @@ public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInfe } if (instanceToBeInferred.getSchemClass().isValidAttribute(species) && instanceToBeInferred.getAttributeValue(species) != null) { - inferredInst.addAttributeValue(species, speciesInst); + GKInstance originalSpeciesInst = (GKInstance) instanceToBeInferred.getAttributeValue(species); + if (originalSpeciesInst.getDBID().equals(48887L)) { + inferredInst.addAttributeValue(species, instanceToBeInferred.getAttributeValue(species)); + } else { + inferredInst.addAttributeValue(species, speciesInst); + } + } + if (instanceToBeInferred.getSchemClass().isValidAttribute(relatedSpecies) && instanceToBeInferred.getAttributeValue(relatedSpecies) != null) { + List relatedSpeciesList = instanceToBeInferred.getAttributeValuesList(relatedSpecies); + if (relatedSpeciesList.contains(speciesInst)) { + inferredInst.addAttributeValue(relatedSpecies, speciesInst); + } } return inferredInst; } @@ -133,6 +141,23 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); } + // COV-1-to-COV-2 Projection additions. + if (originalInst != null) { + if (inferredInst.getSchemClass().isValidAttribute(literatureReference) && originalInst.getAttributeValue(literatureReference) != null) { + inferredInst.setAttributeValue(literatureReference, originalInst.getAttributeValuesList(literatureReference)); + System.out.println("LIT REF\t\t:" + originalInst); + } + if (inferredInst.getSchemClass().isValidAttribute(disease) && originalInst.getAttributeValue(disease) != null) { + inferredInst.setAttributeValue(disease, diseaseInst); + } + if (inferredInst.getSchemClass().isValidAttribute(isChimeric) && originalInst.getAttributeValue(isChimeric) != null) { + inferredInst.setAttributeValue(isChimeric, originalInst.getAttributeValue(isChimeric)); + } + if (inferredInst.getSchemClass().isValidAttribute(includedLocation) && originalInst.getAttributeValuesList(includedLocation) != null) { + inferredInst.setAttributeValue(includedLocation, originalInst.getAttributeValuesList(includedLocation)); + } + } + // dba.storeInstance(inferredInst); return inferredInst; } @@ -219,4 +244,12 @@ public static void setInstanceEdit(GKInstance instanceEditCopy) { instanceEditInst = instanceEditCopy; } + + public static void setDiseaseInstance(GKInstance diseaseInstanceCopy) { + diseaseInst = diseaseInstanceCopy; + } + + public static GKInstance getDiseaseInst() { + return diseaseInst; + } } diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index b422e1e5..0cd2e051 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -56,11 +56,12 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // Total proteins are stored in reactionProteinCounts[0], inferrable proteins in [1], and the maximum number of homologues for any entity involved in index [2]. // Reactions with no proteins/EWAS (Total = 0) are not inferred. List reactionProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionInst); + System.out.println(reactionProteinCounts); int reactionTotalProteinCounts = reactionProteinCounts.get(0); - if (reactionTotalProteinCounts > 0) + if (reactionTotalProteinCounts > 0) { logger.info("Total protein count for RlE: " + reactionTotalProteinCounts); - String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; + String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; // Having passed all tests/filters until now, the reaction is recorded in the 'eligible reactions' file, meaning inference is continued. eligibleCount++; Files.write(Paths.get(eligibleFilehandle), eligibleEventName.getBytes(), StandardOpenOption.APPEND); @@ -75,7 +76,7 @@ public static void inferReaction(GKInstance reactionInst) throws Exception logger.info("Inferring catalysts..."); if (inferReactionCatalysts(reactionInst, infReactionInst)) { - // Many reactions are not regulated at all, meaning inference is attempted but will not end the process if there is nothing to infer. + // Many reactions are not regulated at all, meaning inference is attempted but will not end the process if there is nothing to infer. // The inference process will end though if inferRegulations returns an invalid value. logger.info("Inferring regulations..."); List inferredRegulations = inferReactionRegulations(reactionInst); @@ -83,7 +84,7 @@ public static void inferReaction(GKInstance reactionInst) throws Exception { return; } - if (infReactionInst.getSchemClass().isValidAttribute(releaseDate)) + if (infReactionInst.getSchemClass().isValidAttribute(releaseDate)) { infReactionInst.addAttributeValue(releaseDate, dateOfRelease); } @@ -91,6 +92,24 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // add a new inferred instance, the storeInstance method is just called here. GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + + // COV-1-to-COV-2 Projection additions. + if (reactionInst.getAttributeValuesList(literatureReference) != null) { + infReactionInst.setAttributeValue(literatureReference, reactionInst.getAttributeValuesList(literatureReference)); + } + if (reactionInst.getAttributeValue(disease) != null) { + infReactionInst.setAttributeValue(disease, InstanceUtilities.getDiseaseInst()); + } + if (reactionInst.getAttributeValue(isChimeric) != null) { + infReactionInst.setAttributeValue(isChimeric, reactionInst.getAttributeValue(isChimeric)); + } + if (reactionInst.getAttributeValuesList(definition) != null) { + for (String definitionString : (Collection) reactionInst.getAttributeValuesList(definition)) { + infReactionInst.addAttributeValue(definition, definitionString); + } + } + // + dba.storeInstance(infReactionInst); logger.info("Inferred RlE instance: " + infReactionInst); @@ -103,9 +122,9 @@ public static void inferReaction(GKInstance reactionInst) throws Exception dba.updateInstanceAttribute(infReactionInst, orthologousEvent); reactionInst.addAttributeValue(orthologousEvent, infReactionInst); dba.updateInstanceAttribute(reactionInst, orthologousEvent); - + inferredEvent.put(reactionInst, infReactionInst); - + // Regulations instances require the DB to contain the inferred ReactionlikeEvent, so Regulations inference happens post-inference if (inferredRegulations.size() > 0) { @@ -120,7 +139,7 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // After successfully adding a new inferred instance to the DB, it is recorded in the 'inferred reactions' file inferredCount++; inferrableHumanEvents.add(reactionInst); - String inferredEvent = infReactionInst.getAttributeValue(DB_ID).toString() + "\t" + infReactionInst.getDisplayName() + "\n"; + String inferredEvent = infReactionInst.getAttributeValue(DB_ID).toString() + "\t" + infReactionInst.getDisplayName() + "\n"; Files.write(Paths.get(inferredFilehandle), inferredEvent.getBytes(), StandardOpenOption.APPEND); } else { logger.info("Catalyst inference unsuccessful -- terminating inference for " + reactionInst); diff --git a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java index 4f0f0455..a08f7bae 100644 --- a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java +++ b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java @@ -43,18 +43,26 @@ public GKInstance generateOrthologousStableId(GKInstance inferredInst, GKInstanc // For now, Human is hard-coded as the source species, so we replace the stableIdentifier source species based on that assumption String sourceIdentifier = (String) stableIdentifierInst.getAttributeValue(identifier); + + // COV-1-to-COV-2 Projection Code String sourceAbbreviation = "HSA"; - if (sourceIdentifier.contains("COV")) { + if (sourceIdentifier.contains("COV") && !originalInst.getSchemClass().isa(ReactionlikeEvent)) { sourceAbbreviation = "COV"; } String targetIdentifier = sourceIdentifier.replace(sourceAbbreviation, speciesAbbreviation); + if (originalInst.getSchemClass().isa(ReactionlikeEvent)) { + targetIdentifier = targetIdentifier.replace("COV", "HSA"); + } + // Paralogs will have the same base stable identifier, but we want to denote when that happens. // We pull the value from `seenOrthoIds`, increment it and then add it to the stable identifier name (eg: R-MMU-123456-2) - int paralogCount = Optional.ofNullable(seenOrthoIds.get(targetIdentifier)).orElse(0) + 1; - seenOrthoIds.put(targetIdentifier, paralogCount); - if (paralogCount > 1) { - targetIdentifier += "-" + paralogCount; - } +// int paralogCount = Optional.ofNullable(seenOrthoIds.get(targetIdentifier)).orElse(0) + 1; +// seenOrthoIds.put(targetIdentifier, paralogCount); +// if (paralogCount > 1) { +// targetIdentifier += "-" + paralogCount; +// } + targetIdentifier += "-2"; + // // Check that the stable identifier instance does not already exist in DB Collection existingStableIdentifier = (Collection) dba.fetchInstanceByAttribute("StableIdentifier", "identifier", "=", targetIdentifier); From 85e96d35290ae0a6fb5e1923462ccfad89ce41e7 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Wed, 24 Jun 2020 14:03:48 -0400 Subject: [PATCH 03/21] Coordinate mapping from external file; RlEs/PEs/REs inferred properly --- .../reactome/orthoinference/EWASInferrer.java | 54 +++++++++++++++---- .../orthoinference/EventsInferrer.java | 7 +++ .../orthoinference/InstanceUtilities.java | 1 - 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 795c2ff9..09d88608 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -1,6 +1,7 @@ package org.reactome.orthoinference; import java.io.BufferedReader; +import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.file.Paths; @@ -38,6 +39,7 @@ public class EWASInferrer { private static Map residueIdenticals = new HashMap<>(); private static Map> wormbaseMappings = new HashMap<>(); private static Map geneNameMappings = new HashMap<>(); + private static Map> coordinateMappings = new HashMap<>(); // Creates an array of inferred EWAS instances from the homologue mappings file (hsap_species_mapping.txt). @SuppressWarnings("unchecked") @@ -57,7 +59,7 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // if (checkValidSpeciesProtein(homologueId)) { GKInstance infReferenceGeneProductInst; - if (referenceGeneProductIdenticals.get(homologueId) == null) { +// if (referenceGeneProductIdenticals.get(homologueId) == null) { logger.info("Creating ReferenceGeneProduct for " + homologue); infReferenceGeneProductInst = InstanceUtilities.createNewInferredGKInstance((GKInstance) ewasInst.getAttributeValue(referenceEntity)); infReferenceGeneProductInst.addAttributeValue(identifier, homologueId); @@ -82,20 +84,27 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr logger.info("ReferenceGeneProduct instance created"); infReferenceGeneProductInst = InstanceUtilities.checkForIdenticalInstances(infReferenceGeneProductInst, null); referenceGeneProductIdenticals.put(homologueId, infReferenceGeneProductInst); - } else { - logger.info("Orthologous ReferenceGeneProduct already exists"); - infReferenceGeneProductInst = referenceGeneProductIdenticals.get(homologueId); - } +// } else { +// logger.info("Orthologous ReferenceGeneProduct already exists"); +// infReferenceGeneProductInst = referenceGeneProductIdenticals.get(homologueId); +// } // Creating inferred EWAS GKInstance infEWASInst = InstanceUtilities.createNewInferredGKInstance(ewasInst); infEWASInst.addAttributeValue(referenceEntity, infReferenceGeneProductInst); // Method for adding start/end coordinates. It is convoluted due to a quirk with assigning the name differently based on coordinate value (see infer_events.pl lines 1190-1192). // The name of the entity needs to be at the front of the 'name' array if the coordinate is over 1, and rearranging arrays in Java for this was a bit tricky. + String ewasNameSimple = ewasInst.getAttributeValue(name).toString(); for (int startCoord : (Collection) ewasInst.getAttributeValuesList(startCoordinate)) { + if (coordinateMappings.get(ewasNameSimple) != null) { + startCoord = Integer.valueOf(coordinateMappings.get(ewasNameSimple).get("start")); + } infEWASInst.addAttributeValue(startCoordinate, startCoord); } for (int endCoord : (Collection) ewasInst.getAttributeValuesList(endCoordinate)) { + if (coordinateMappings.get(ewasNameSimple) != null) { + endCoord = Integer.valueOf(coordinateMappings.get(ewasNameSimple).get("end")); + } infEWASInst.addAttributeValue(endCoordinate, endCoord); } if (infEWASInst.getAttributeValue(startCoordinate) != null && (int) infEWASInst.getAttributeValue(startCoordinate) > 1 || infEWASInst.getAttributeValue(endCoordinate) != null && (int) infEWASInst.getAttributeValue(endCoordinate) > 1) { @@ -190,12 +199,12 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr } // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infModifiedResidueInst.getSchemClass(), infModifiedResidueInst); - if (residueIdenticals.get(cacheKey) != null) { - infModifiedResidueInst = residueIdenticals.get(cacheKey); - } else { +// if (residueIdenticals.get(cacheKey) != null) { +// infModifiedResidueInst = residueIdenticals.get(cacheKey); +// } else { infModifiedResidueInst = InstanceUtilities.checkForIdenticalInstances(infModifiedResidueInst, null); - residueIdenticals.put(cacheKey, infModifiedResidueInst); - } +// residueIdenticals.put(cacheKey, infModifiedResidueInst); +// } infModifiedResidueInstances.add(infModifiedResidueInst); logger.info("Successfully inferred ModifiedResidue"); } @@ -412,4 +421,29 @@ public static void setWormbaseMappings(Map> wormbaseMapping public static void setGeneNameMappingFile(Map geneNameMappingsCopy) { geneNameMappings = geneNameMappingsCopy; } + + public static void readAndSetCoordinateMappingFile(String targetSpecies) throws IOException { + String mappingFileName = targetSpecies + "_coordinate_mapping.tsv"; + String mappingFilePath = Paths.get("orthopairs", mappingFileName).toString(); + logger.info("Reading in " + mappingFilePath); + FileReader fr = new FileReader(mappingFilePath); + BufferedReader br = new BufferedReader(fr); + + String currentLine; + while ((currentLine = br.readLine()) != null) + { + String[] tabSplit = currentLine.split("\t"); + String name = tabSplit[0]; + String startCoord = tabSplit.length > 1 ? tabSplit[1] : ""; + String endCoord = tabSplit.length > 2 ? tabSplit[2] : ""; + if (!startCoord.isEmpty() && !endCoord.isEmpty()) { + Map coordMap = new HashMap<>(); + coordMap.put("start", startCoord); + coordMap.put("end", endCoord); + coordinateMappings.put(name, coordMap); + } + } + br.close(); + fr.close(); + } } diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 07afe30c..9607a48e 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -125,6 +125,13 @@ public static void inferEvents(Properties props, String referenceSpecies, String e.printStackTrace(); System.exit(1); } + try { + EWASInferrer.readAndSetCoordinateMappingFile(targetSpecies); + } catch (Exception e) { + logger.fatal("Unable to locate coordinate mapping file"); + e.printStackTrace(); + System.exit(1); + } // EWASInferrer.readENSGMappingFile(targetSpecies, pathToOrthopairs); EWASInferrer.fetchAndSetUniprotDbInstance(); // EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 44b90173..8563b278 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -145,7 +145,6 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI if (originalInst != null) { if (inferredInst.getSchemClass().isValidAttribute(literatureReference) && originalInst.getAttributeValue(literatureReference) != null) { inferredInst.setAttributeValue(literatureReference, originalInst.getAttributeValuesList(literatureReference)); - System.out.println("LIT REF\t\t:" + originalInst); } if (inferredInst.getSchemClass().isValidAttribute(disease) && originalInst.getAttributeValue(disease) != null) { inferredInst.setAttributeValue(disease, diseaseInst); From 6fa26a76eb46f38d801cff1c4580fbdac7a0a444 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Wed, 24 Jun 2020 15:47:00 -0400 Subject: [PATCH 04/21] Human PEs are included; Disease pathway projection complete --- .../reactome/orthoinference/EWASInferrer.java | 58 +++++++++++-------- .../orthoinference/EventsInferrer.java | 2 +- .../orthoinference/InstanceUtilities.java | 6 +- .../OrthologousEntityGenerator.java | 6 ++ .../orthoinference/PathwaysInferrer.java | 17 ++++++ .../orthoinference/ReactionInferrer.java | 10 ++-- .../StableIdentifierGenerator.java | 2 +- 7 files changed, 67 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 09d88608..c4d025a8 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -31,6 +31,7 @@ public class EWASInferrer { private static GKInstance enspDbInst; private static GKInstance alternateDbInst; private static GKInstance uniprotDbInst; + private static GKInstance ncbiNucleotideInst; private static GKInstance speciesInst; private static Map homologueMappings = new HashMap<>(); private static Map> ensgMappings = new HashMap<>(); @@ -65,7 +66,10 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infReferenceGeneProductInst.addAttributeValue(identifier, homologueId); // Reference DB can differ between homologue mappings, but can be differentiated by the 'homologueSource' found in each mapping. // With PANTHER data, the Protein IDs are exclusively UniProt - GKInstance referenceDatabaseInst = homologueSource.equals("ENSP") ? enspDbInst : uniprotDbInst; + GKInstance rgpInst = (GKInstance) ewasInst.getAttributeValue(referenceEntity); + GKInstance refDBInst = (GKInstance) rgpInst.getAttributeValue(referenceDatabase); + String refDbName = refDBInst.getAttributeValue(name).toString(); + GKInstance referenceDatabaseInst = refDbName.contains("NCBI") ? ncbiNucleotideInst : uniprotDbInst; infReferenceGeneProductInst.addAttributeValue(referenceDatabase, referenceDatabaseInst); // Creates ReferenceDNASequence instance from ReferenceEntity @@ -73,7 +77,7 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // infReferenceGeneProductInst.addAttributeValue(referenceGene, inferredReferenceDNAInstances); infReferenceGeneProductInst.addAttributeValue(species, speciesInst); - String referenceGeneProductSource = homologueSource.equals("ENSP") ? "ENSEMBL:" : "UniProt:"; + String referenceGeneProductSource = refDbName.contains("NCBI") ? "NCBI Nucleotide:" : "UniProt:"; infReferenceGeneProductInst.setAttributeValue(_displayName, referenceGeneProductSource + homologueId); // GeneName value comes from UniProt's identifier mapping service. @@ -149,28 +153,30 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(modification)).getDisplayName(); } } - // Update name depending on the presence of 'phospho' in the Psimod's name attribute - GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); - if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { - String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); - List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); - String originalName = ewasNames.remove(0); - infEWASInst.setAttributeValue(name, phosphoName); - // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. - // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. - // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) - infEWASInst.addAttributeValue(name, ewasNames); - String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; - infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); - // This flag ensures the 'phospho-' is only prepended once. - logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); - phosFlag = false; - } - for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { - infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); - } - if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { - infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); + if (modifiedResidueInst.getSchemClass().isValidAttribute(psiMod)) { + // Update name depending on the presence of 'phospho' in the Psimod's name attribute + GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); + if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { + String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); + List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); + String originalName = ewasNames.remove(0); + infEWASInst.setAttributeValue(name, phosphoName); + // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. + // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. + // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) + infEWASInst.addAttributeValue(name, ewasNames); + String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; + infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); + // This flag ensures the 'phospho-' is only prepended once. + logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); + phosFlag = false; + } + for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { + infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); + } + if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { + infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); + } } infModifiedResidueInst.setAttributeValue(_displayName, modifiedResidueInst.getAttributeValue(_displayName)); // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. @@ -348,10 +354,12 @@ public static void readENSGMappingFile(String toSpecies, String pathToOrthopairs // Fetches Uniprot DB instance @SuppressWarnings("unchecked") - public static void fetchAndSetUniprotDbInstance() throws Exception + public static void fetchAndSetDbInstances() throws Exception { Collection uniprotDbInstances = (Collection) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "UniProt"); + Collection ncbiNucleotideInstances = (Collection) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "NCBI Nucleotide"); uniprotDbInst = uniprotDbInstances.iterator().next(); + ncbiNucleotideInst = ncbiNucleotideInstances.iterator().next(); } // Creates instance pertaining to the species Ensembl Protein DB diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 9607a48e..464fbf4f 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -133,7 +133,7 @@ public static void inferEvents(Properties props, String referenceSpecies, String System.exit(1); } // EWASInferrer.readENSGMappingFile(targetSpecies, pathToOrthopairs); - EWASInferrer.fetchAndSetUniprotDbInstance(); + EWASInferrer.fetchAndSetDbInstances(); // EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); // EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 8563b278..a3cce950 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -6,6 +6,8 @@ import org.apache.logging.log4j.Logger; import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; + +import org.gk.model.ReactomeJavaConstants; import org.gk.persistence.MySQLAdaptor; import org.gk.schema.GKSchemaAttribute; import org.gk.schema.GKSchemaClass; @@ -60,9 +62,9 @@ public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInfe } if (instanceToBeInferred.getSchemClass().isValidAttribute(relatedSpecies) && instanceToBeInferred.getAttributeValue(relatedSpecies) != null) { List relatedSpeciesList = instanceToBeInferred.getAttributeValuesList(relatedSpecies); - if (relatedSpeciesList.contains(speciesInst)) { +// if (relatedSpeciesList.contains(speciesInst)) { inferredInst.addAttributeValue(relatedSpecies, speciesInst); - } +// } } return inferredInst; } diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 92533995..06f57372 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -12,6 +12,8 @@ import org.apache.logging.log4j.Logger; import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; + +import org.gk.model.ReactomeJavaConstants; import org.gk.persistence.MySQLAdaptor; import org.gk.schema.GKSchemaClass; import org.gk.schema.InvalidAttributeException; @@ -100,6 +102,10 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri } else { logger.warn("Unknown PhysicalEntity class: " + entityInst.getClass()); } + GKInstance speciesInst = (GKInstance) entityInst.getAttributeValue(species); + if (speciesInst != null && speciesInst.getDBID().equals(48887L)) { + return entityInst; + } if (override) { return infEntityInst; diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index db4ca991..378569a8 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -112,6 +112,23 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + + // COV-1-to-COV-2 Projection code + if (sourcePathwayReferralInst.getAttributeValue(disease) != null) { + infPathwayInst.setAttributeValue(disease, InstanceUtilities.getDiseaseInst()); + } + + if (sourcePathwayReferralInst.getAttributeValuesList(literatureReference) != null) { + infPathwayInst.setAttributeValue(literatureReference, sourcePathwayReferralInst.getAttributeValuesList(literatureReference)); + } + + if (sourcePathwayReferralInst.getAttributeValuesList(definition) != null) { + for (String definitionString : (Collection) sourcePathwayReferralInst.getAttributeValuesList(definition)) { + infPathwayInst.addAttributeValue(definition, definitionString); + } + } + // + dba.storeInstance(infPathwayInst); // This was replaced with addAttributeValueIfNecessary due to a bug where a Pathway instance's 'OrthologousEvent' attribute was being replaced, diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index 0cd2e051..a8fb4ef8 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -58,8 +58,8 @@ public static void inferReaction(GKInstance reactionInst) throws Exception List reactionProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionInst); System.out.println(reactionProteinCounts); int reactionTotalProteinCounts = reactionProteinCounts.get(0); - if (reactionTotalProteinCounts > 0) - { +// if (reactionTotalProteinCounts > 0) +// { logger.info("Total protein count for RlE: " + reactionTotalProteinCounts); String eligibleEventName = reactionInst.getAttributeValue(DB_ID).toString() + "\t" + reactionInst.getDisplayName() + "\n"; // Having passed all tests/filters until now, the reaction is recorded in the 'eligible reactions' file, meaning inference is continued. @@ -150,9 +150,9 @@ public static void inferReaction(GKInstance reactionInst) throws Exception } else { logger.info("Input inference unsuccessful -- terminating inference for " + reactionInst); } - } else { - logger.info("No distinct proteins found in instance -- terminating inference for " + reactionInst); - } +// } else { +// logger.info("No distinct proteins found in instance -- terminating inference for " + reactionInst); +// } } } diff --git a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java index a08f7bae..6617424c 100644 --- a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java +++ b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java @@ -50,7 +50,7 @@ public GKInstance generateOrthologousStableId(GKInstance inferredInst, GKInstanc sourceAbbreviation = "COV"; } String targetIdentifier = sourceIdentifier.replace(sourceAbbreviation, speciesAbbreviation); - if (originalInst.getSchemClass().isa(ReactionlikeEvent)) { + if (originalInst.getSchemClass().isa(Event)) { targetIdentifier = targetIdentifier.replace("COV", "HSA"); } From fb5901ed558871c5797a2a28b9482c0c904c2f0c Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 25 Jun 2020 13:23:59 -0400 Subject: [PATCH 05/21] StableIdentifier made from inferred instance DBID; Disease/RelatedSpecies PathwayDiagrams --- .../org/reactome/orthoinference/InstanceUtilities.java | 10 ++++++---- .../OrthologousPathwayDiagramGenerator.java | 7 +++++-- .../org/reactome/orthoinference/PathwaysInferrer.java | 5 +++-- .../org/reactome/orthoinference/ReactionInferrer.java | 6 ++++-- .../orthoinference/StableIdentifierGenerator.java | 4 +++- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index a3cce950..73652786 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -139,10 +139,6 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI return identicalInstances.iterator().next(); } } else { - if (inferredInst.getSchemClass().isa(PhysicalEntity)) { - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); - inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); - } // COV-1-to-COV-2 Projection additions. if (originalInst != null) { if (inferredInst.getSchemClass().isValidAttribute(literatureReference) && originalInst.getAttributeValue(literatureReference) != null) { @@ -160,6 +156,12 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI } // dba.storeInstance(inferredInst); + + if (inferredInst.getSchemClass().isa(PhysicalEntity)) { + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); + inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + dba.updateInstanceAttribute(inferredInst, stableIdentifier); + } return inferredInst; } } diff --git a/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java index e3ec4e90..eb50fa1d 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousPathwayDiagramGenerator.java @@ -45,7 +45,7 @@ public void generateOrthologousPathwayDiagrams() throws Exception { // Iterate through each PathwayDiagram instance looking for those associated with the reference species. for (GKInstance diagramInst: (Collection) dba.fetchInstancesByClass(ReactomeJavaConstants.PathwayDiagram)) { GKInstance pathwayInst = (GKInstance) diagramInst.getAttributeValue(ReactomeJavaConstants.representedPathway); - if (isSameSpecies(pathwayInst, referenceSpeciesInst)) { + if (pathwayInst.getAttributeValue(ReactomeJavaConstants.disease) != null) { // When a PathwayDiagram instance associated with the reference species is found, iterate through all of it's OrthologousEvent instances. for (GKInstance orthoPathwayInst : (Collection) pathwayInst.getAttributeValuesList(ReactomeJavaConstants.orthologousEvent)) { // Look for OrthologousEvent instances that match the current target species and that are electronically inferred. @@ -60,7 +60,7 @@ public void generateOrthologousPathwayDiagrams() throws Exception { public GKInstance generateOrthologousPathwayDiagram(GKInstance orthoPathwayInst, GKInstance pathwayInst, GKInstance diagramInst, PredictedPathwayDiagramGeneratorFromDB diagramGenerator) throws Exception { GKInstance orthoDiagram = null; - if (isSameSpecies(orthoPathwayInst, targetSpeciesInst) && isElectronicallyInferred(orthoPathwayInst)) { + if (isElectronicallyInferred(orthoPathwayInst)) { // Generate Orthologous PathwayDiagram instance using generatePredictedDiagram method from PredictedPathwayDiagramGeneratorFromDB. // This method is the one needed to build PathwayDiagrams for species-specific Pathway instances. logger.info("Building inferred Pathway diagram for " + orthoPathwayInst); @@ -72,6 +72,9 @@ public GKInstance generateOrthologousPathwayDiagram(GKInstance orthoPathwayInst, // Compare the species attribute in a Pathway with another species instance for equality public boolean isSameSpecies(GKInstance pathwayInst, GKInstance speciesInst) throws Exception { GKInstance pathwaySpeciesInst = (GKInstance) pathwayInst.getAttributeValue(ReactomeJavaConstants.species); + if (pathwayInst.getAttributeValue(ReactomeJavaConstants.relatedSpecies) != null) { + pathwaySpeciesInst = (GKInstance) pathwayInst.getAttributeValue(ReactomeJavaConstants.relatedSpecies); + } return pathwaySpeciesInst.equals(speciesInst); } diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index 378569a8..46ec6866 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -110,8 +110,6 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex } infPathwayInst.setDisplayName(sourcePathwayReferralInst.getDisplayName()); inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); - infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // COV-1-to-COV-2 Projection code if (sourcePathwayReferralInst.getAttributeValue(disease) != null) { @@ -130,6 +128,9 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex // dba.storeInstance(infPathwayInst); + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); + infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + dba.updateInstanceAttribute(infPathwayInst, stableIdentifier); // This was replaced with addAttributeValueIfNecessary due to a bug where a Pathway instance's 'OrthologousEvent' attribute was being replaced, // instead of being added to the existing array when the script was executed from a jar (rather than from Eclipse) (Justin Cook 2018) diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index a8fb4ef8..d6c5e3bd 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -90,8 +90,6 @@ public static void inferReaction(GKInstance reactionInst) throws Exception } // FetchIdenticalInstances would just return the instance being inferred. Since this step is meant to always // add a new inferred instance, the storeInstance method is just called here. - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); - infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // COV-1-to-COV-2 Projection additions. if (reactionInst.getAttributeValuesList(literatureReference) != null) { @@ -111,6 +109,10 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // dba.storeInstance(infReactionInst); + + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); + infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + dba.updateInstanceAttribute(infReactionInst, stableIdentifier); logger.info("Inferred RlE instance: " + infReactionInst); if (infReactionInst.getSchemClass().isValidAttribute(inferredFrom)) diff --git a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java index 6617424c..fad60472 100644 --- a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java +++ b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java @@ -61,7 +61,9 @@ public GKInstance generateOrthologousStableId(GKInstance inferredInst, GKInstanc // if (paralogCount > 1) { // targetIdentifier += "-" + paralogCount; // } - targetIdentifier += "-2"; +// targetIdentifier += "-2"; + String sourceDBID = originalInst.getDBID().toString(); + targetIdentifier = targetIdentifier.replace(sourceDBID, inferredInst.getDBID().toString()); // // Check that the stable identifier instance does not already exist in DB From 01436069f547a52b846eba863ee4024afef4eb50 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 25 Jun 2020 15:25:23 -0400 Subject: [PATCH 06/21] Updated inferred coordinate population; crossRef and keyword attributes added --- .../reactome/orthoinference/EWASInferrer.java | 83 +++++++++++++++---- .../orthoinference/InstanceUtilities.java | 3 + 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index c4d025a8..f60e8e86 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -62,7 +62,8 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr GKInstance infReferenceGeneProductInst; // if (referenceGeneProductIdenticals.get(homologueId) == null) { logger.info("Creating ReferenceGeneProduct for " + homologue); - infReferenceGeneProductInst = InstanceUtilities.createNewInferredGKInstance((GKInstance) ewasInst.getAttributeValue(referenceEntity)); + GKInstance referenceEntityInst = (GKInstance) ewasInst.getAttributeValue(referenceEntity); + infReferenceGeneProductInst = InstanceUtilities.createNewInferredGKInstance(referenceEntityInst); infReferenceGeneProductInst.addAttributeValue(identifier, homologueId); // Reference DB can differ between homologue mappings, but can be differentiated by the 'homologueSource' found in each mapping. // With PANTHER data, the Protein IDs are exclusively UniProt @@ -78,7 +79,12 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infReferenceGeneProductInst.addAttributeValue(species, speciesInst); String referenceGeneProductSource = refDbName.contains("NCBI") ? "NCBI Nucleotide:" : "UniProt:"; - infReferenceGeneProductInst.setAttributeValue(_displayName, referenceGeneProductSource + homologueId); + infReferenceGeneProductInst.setAttributeValue(_displayName, referenceGeneProductSource + homologueId + " " + referenceEntityInst.getAttributeValue(name)); + infReferenceGeneProductInst.setAttributeValue(name, referenceEntityInst.getAttributeValue(name)); + infReferenceGeneProductInst.setAttributeValue(geneName, referenceEntityInst.getAttributeValue(geneName)); + if (referenceEntityInst.getAttributeValue(keyword) != null) { + infReferenceGeneProductInst.setAttributeValue(keyword, referenceEntityInst.getAttributeValuesList(keyword)); + } // GeneName value comes from UniProt's identifier mapping service. if (geneNameMappings.containsKey(homologueId)) { @@ -95,19 +101,20 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // Creating inferred EWAS GKInstance infEWASInst = InstanceUtilities.createNewInferredGKInstance(ewasInst); infEWASInst.addAttributeValue(referenceEntity, infReferenceGeneProductInst); - + infEWASInst.addAttributeValue(name, ewasInst.getAttributeValue(name)); // Method for adding start/end coordinates. It is convoluted due to a quirk with assigning the name differently based on coordinate value (see infer_events.pl lines 1190-1192). // The name of the entity needs to be at the front of the 'name' array if the coordinate is over 1, and rearranging arrays in Java for this was a bit tricky. - String ewasNameSimple = ewasInst.getAttributeValue(name).toString(); + + String coordKey = getCoordKey(ewasInst); for (int startCoord : (Collection) ewasInst.getAttributeValuesList(startCoordinate)) { - if (coordinateMappings.get(ewasNameSimple) != null) { - startCoord = Integer.valueOf(coordinateMappings.get(ewasNameSimple).get("start")); + if (coordinateMappings.get(coordKey) != null) { + startCoord = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); } infEWASInst.addAttributeValue(startCoordinate, startCoord); } for (int endCoord : (Collection) ewasInst.getAttributeValuesList(endCoordinate)) { - if (coordinateMappings.get(ewasNameSimple) != null) { - endCoord = Integer.valueOf(coordinateMappings.get(ewasNameSimple).get("end")); + if (coordinateMappings.get(coordKey) != null) { + endCoord = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); } infEWASInst.addAttributeValue(endCoordinate, endCoord); } @@ -143,6 +150,16 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infModifiedResidueInst.addAttributeValue(referenceSequence, infReferenceGeneProductInst); infModifiedResidueDisplayName += infReferenceGeneProductInst.getDisplayName(); for (int coordinateValue : (Collection) modifiedResidueInst.getAttributeValuesList(coordinate)) { + if (coordinateMappings.get(coordKey) != null) { + String ewasStartCoord = ewasInst.getAttributeValue(startCoordinate).toString(); + String ewasEndCoord = ewasInst.getAttributeValue(endCoordinate).toString(); + if (ewasStartCoord.equals(String.valueOf(coordinateValue))) { + coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); + } + if (ewasEndCoord.equals(String.valueOf(coordinateValue))) { + coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); + } + } infModifiedResidueInst.addAttributeValue(coordinate, coordinateValue); } if (infModifiedResidueInst.getSchemClass().isValidAttribute(modification)) { @@ -178,11 +195,17 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); } } - infModifiedResidueInst.setAttributeValue(_displayName, modifiedResidueInst.getAttributeValue(_displayName)); + + if (infModifiedResidueInst.getSchemClass().isa("ModifiedNucleotide")) { + infModifiedResidueDisplayName = createModifiedNucleotideDisplayName(modifiedResidueInst, infModifiedResidueInst); + } + infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. if (modifiedResidueInst.getAttributeValue(coordinate) != null) { - String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString() + " (in Homo sapiens)"; - infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); + // Commented out during COV-1 to COV-2 projection +// String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString(); // + " (in Homo sapiens)"; +// infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); + // } else { if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { @@ -241,6 +264,24 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr return infEWASInstances; } + private static String createModifiedNucleotideDisplayName(GKInstance modifiedResidueInst, GKInstance infModifiedResidueInst) throws Exception { + String coordinateString = infModifiedResidueInst.getAttributeValue(coordinate).toString() + " "; + GKInstance modificationInst = (GKInstance) infModifiedResidueInst.getAttributeValue(modification); + String modificationName = modificationInst.getAttributeValue(name).toString() + " "; + GKInstance refSeqInst = (GKInstance) infModifiedResidueInst.getAttributeValue(referenceSequence); + String refSeqIdentifier = refSeqInst.getAttributeValue(identifier).toString() + " "; + String refSeqName = refSeqInst.getAttributeValue(name).toString(); + return coordinateString + modificationName + refSeqIdentifier + refSeqName; + } + + private static String getCoordKey(GKInstance ewasInst) throws Exception { + GKInstance rgpInst = (GKInstance) ewasInst.getAttributeValue(referenceEntity); + String rgpIdentifier = rgpInst.getAttributeValue(identifier).toString(); + String startCoord = ewasInst.getAttributeValue(startCoordinate).toString(); + String endCoord = ewasInst.getAttributeValue(endCoordinate).toString(); + return rgpIdentifier + startCoord + endCoord; + } + /** * Retrieve all Wormbase gene names that match the homologue Id. * @param homologueId -- String homologue ID value from Orthopair file. @@ -438,17 +479,25 @@ public static void readAndSetCoordinateMappingFile(String targetSpecies) throws BufferedReader br = new BufferedReader(fr); String currentLine; + Set coords = new HashSet<>(); while ((currentLine = br.readLine()) != null) { String[] tabSplit = currentLine.split("\t"); String name = tabSplit[0]; - String startCoord = tabSplit.length > 1 ? tabSplit[1] : ""; - String endCoord = tabSplit.length > 2 ? tabSplit[2] : ""; - if (!startCoord.isEmpty() && !endCoord.isEmpty()) { + String cov1Identifier = tabSplit[1]; + String startCoordCov1 = tabSplit[2] != null ? tabSplit[2] : ""; + String endCoordCov1 = tabSplit[3] != null ? tabSplit[3] : ""; + if (!startCoordCov1.isEmpty() && !endCoordCov1.isEmpty()) { + + String cov1Joined = cov1Identifier + startCoordCov1 + endCoordCov1; + + String startCoordCov2 = tabSplit[5] != null ? tabSplit[5] : ""; + String endCoordCov2 = tabSplit[6] != null ? tabSplit[6] : ""; + Map coordMap = new HashMap<>(); - coordMap.put("start", startCoord); - coordMap.put("end", endCoord); - coordinateMappings.put(name, coordMap); + coordMap.put("start", startCoordCov2); + coordMap.put("end", endCoordCov2); + coordinateMappings.put(cov1Joined, coordMap); } } br.close(); diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 73652786..7e616060 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -144,6 +144,9 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI if (inferredInst.getSchemClass().isValidAttribute(literatureReference) && originalInst.getAttributeValue(literatureReference) != null) { inferredInst.setAttributeValue(literatureReference, originalInst.getAttributeValuesList(literatureReference)); } + if (inferredInst.getSchemClass().isValidAttribute(crossReference) && originalInst.getAttributeValue(crossReference) != null) { + inferredInst.setAttributeValue(crossReference, originalInst.getAttributeValuesList(crossReference)); + } if (inferredInst.getSchemClass().isValidAttribute(disease) && originalInst.getAttributeValue(disease) != null) { inferredInst.setAttributeValue(disease, diseaseInst); } From 7d38954247411cdbc12b847f5da530c80c351517 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 26 Jun 2020 11:29:34 -0400 Subject: [PATCH 07/21] Reverted StableIdentifier convention; EWAS names sorta corrected --- .../reactome/orthoinference/EWASInferrer.java | 11 ++++++++++- .../orthoinference/InstanceUtilities.java | 17 ++++++++++++----- .../orthoinference/PathwaysInferrer.java | 11 +++++++---- .../orthoinference/ReactionInferrer.java | 10 +++++++--- .../StableIdentifierGenerator.java | 8 +++++--- 5 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index f60e8e86..9ec7a1da 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -101,7 +101,8 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // Creating inferred EWAS GKInstance infEWASInst = InstanceUtilities.createNewInferredGKInstance(ewasInst); infEWASInst.addAttributeValue(referenceEntity, infReferenceGeneProductInst); - infEWASInst.addAttributeValue(name, ewasInst.getAttributeValue(name)); + infEWASInst.setAttributeValue(name, ewasInst.getAttributeValuesList(name)); + // Method for adding start/end coordinates. It is convoluted due to a quirk with assigning the name differently based on coordinate value (see infer_events.pl lines 1190-1192). // The name of the entity needs to be at the front of the 'name' array if the coordinate is over 1, and rearranging arrays in Java for this was a bit tricky. @@ -238,6 +239,14 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr logger.info("Successfully inferred ModifiedResidue"); } infEWASInst.addAttributeValue(hasModifiedResidue, infModifiedResidueInstances); + Set infEWASNames = new HashSet<>(); + for (String infEWASName : (Collection) infEWASInst.getAttributeValuesList(name)) { + if (infEWASNames.contains(infEWASName)) { + infEWASInst.removeAttributeValueNoCheck(name, infEWASName); + } else { + infEWASNames.add(infEWASName); + } + } // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infEWASInst.getSchemClass(), infEWASInst); if (ewasIdenticals.get(cacheKey) != null) { diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 7e616060..2fe81bab 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -139,6 +139,12 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI return identicalInstances.iterator().next(); } } else { + + if (inferredInst.getSchemClass().isa(PhysicalEntity)) { + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); + inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + } + // COV-1-to-COV-2 Projection additions. if (originalInst != null) { if (inferredInst.getSchemClass().isValidAttribute(literatureReference) && originalInst.getAttributeValue(literatureReference) != null) { @@ -160,11 +166,12 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI // dba.storeInstance(inferredInst); - if (inferredInst.getSchemClass().isa(PhysicalEntity)) { - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); - inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); - dba.updateInstanceAttribute(inferredInst, stableIdentifier); - } +// if (inferredInst.getSchemClass().isa(PhysicalEntity)) { +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); +// inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// dba.updateInstanceAttribute(inferredInst, stableIdentifier); +// } + return inferredInst; } } diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index 46ec6866..ff8316e9 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -111,6 +111,9 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex infPathwayInst.setDisplayName(sourcePathwayReferralInst.getDisplayName()); inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); + infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); + // COV-1-to-COV-2 Projection code if (sourcePathwayReferralInst.getAttributeValue(disease) != null) { infPathwayInst.setAttributeValue(disease, InstanceUtilities.getDiseaseInst()); @@ -126,11 +129,11 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex } } // - dba.storeInstance(infPathwayInst); - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); - infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); - dba.updateInstanceAttribute(infPathwayInst, stableIdentifier); + +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); +// infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// dba.updateInstanceAttribute(infPathwayInst, stableIdentifier); // This was replaced with addAttributeValueIfNecessary due to a bug where a Pathway instance's 'OrthologousEvent' attribute was being replaced, // instead of being added to the existing array when the script was executed from a jar (rather than from Eclipse) (Justin Cook 2018) diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index d6c5e3bd..849f6869 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -88,6 +88,9 @@ public static void inferReaction(GKInstance reactionInst) throws Exception { infReactionInst.addAttributeValue(releaseDate, dateOfRelease); } + + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); + infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // FetchIdenticalInstances would just return the instance being inferred. Since this step is meant to always // add a new inferred instance, the storeInstance method is just called here. @@ -110,9 +113,10 @@ public static void inferReaction(GKInstance reactionInst) throws Exception dba.storeInstance(infReactionInst); - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); - infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); - dba.updateInstanceAttribute(infReactionInst, stableIdentifier); +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); +// infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// dba.updateInstanceAttribute(infReactionInst, stableIdentifier); + logger.info("Inferred RlE instance: " + infReactionInst); if (infReactionInst.getSchemClass().isValidAttribute(inferredFrom)) diff --git a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java index fad60472..b63ff5cf 100644 --- a/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java +++ b/src/main/java/org/reactome/orthoinference/StableIdentifierGenerator.java @@ -61,9 +61,11 @@ public GKInstance generateOrthologousStableId(GKInstance inferredInst, GKInstanc // if (paralogCount > 1) { // targetIdentifier += "-" + paralogCount; // } -// targetIdentifier += "-2"; - String sourceDBID = originalInst.getDBID().toString(); - targetIdentifier = targetIdentifier.replace(sourceDBID, inferredInst.getDBID().toString()); + + + targetIdentifier += "-2"; +// String sourceDBID = originalInst.getDBID().toString(); +// targetIdentifier = targetIdentifier.replace(sourceDBID, inferredInst.getDBID().toString()); // // Check that the stable identifier instance does not already exist in DB From c8f6b599d1455319beead414599c765a35d13208 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 26 Jun 2020 13:18:36 -0400 Subject: [PATCH 08/21] Revert initial name projection; added first name in appropriate spot --- .../org/reactome/orthoinference/EWASInferrer.java | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 9ec7a1da..95bcb3d1 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -101,7 +101,7 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // Creating inferred EWAS GKInstance infEWASInst = InstanceUtilities.createNewInferredGKInstance(ewasInst); infEWASInst.addAttributeValue(referenceEntity, infReferenceGeneProductInst); - infEWASInst.setAttributeValue(name, ewasInst.getAttributeValuesList(name)); +// infEWASInst.addAttributeValue(name, ewasInst.getAttributeValue(name)); // Method for adding start/end coordinates. It is convoluted due to a quirk with assigning the name differently based on coordinate value (see infer_events.pl lines 1190-1192). // The name of the entity needs to be at the front of the 'name' array if the coordinate is over 1, and rearranging arrays in Java for this was a bit tricky. @@ -124,6 +124,9 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr infEWASInst.addAttributeValue(name, infEWASInstNames.get(0)); infEWASInst.addAttributeValue(name, homologueId); } else { + // Added for COV-1-to-COV-2 projections + infEWASInst.addAttributeValue(name, ewasInst.getAttributeValue(name)); + // infEWASInst.addAttributeValue(name, homologueId); } @@ -239,14 +242,6 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr logger.info("Successfully inferred ModifiedResidue"); } infEWASInst.addAttributeValue(hasModifiedResidue, infModifiedResidueInstances); - Set infEWASNames = new HashSet<>(); - for (String infEWASName : (Collection) infEWASInst.getAttributeValuesList(name)) { - if (infEWASNames.contains(infEWASName)) { - infEWASInst.removeAttributeValueNoCheck(name, infEWASName); - } else { - infEWASNames.add(infEWASName); - } - } // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infEWASInst.getSchemClass(), infEWASInst); if (ewasIdenticals.get(cacheKey) != null) { From d29c0520071c01c2854e69a23ad901c0c9a1d409 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 26 Jun 2020 15:06:31 -0400 Subject: [PATCH 09/21] DisplayName/Name changed from CoV-1 to CoV-2; debugging code for nonhuman participants --- .../orthoinference/EventsInferrer.java | 9 ++++ .../orthoinference/InstanceUtilities.java | 13 ++++++ .../OrthologousEntityGenerator.java | 46 +++++++++++++++---- .../orthoinference/PathwaysInferrer.java | 9 ++++ .../orthoinference/ReactionInferrer.java | 13 +++++- 5 files changed, 79 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 464fbf4f..d84ac3d5 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -204,6 +204,7 @@ public static void inferEvents(Properties props, String referenceSpecies, String // An inferred ReactionlikeEvent doesn't already exist for this targetSpecies, and an orthologous inference will be attempted. try { +// System.out.println(reactionInst); ReactionInferrer.inferReaction(reactionInst); logger.info("Successfully inferred " + reactionInst); } catch (Exception e) { @@ -211,6 +212,14 @@ public static void inferEvents(Properties props, String referenceSpecies, String System.exit(1); } } +// System.out.println("\n\n"); +// Map> nonHumanParticpants = OrthologousEntityGenerator.getNonHumanParticipants(); +// for (GKInstance humanPE : nonHumanParticpants.keySet()) { +// System.out.println(humanPE); +// for (GKInstance nonHumanParticipant : nonHumanParticpants.get(humanPE)) { +// System.out.println("\t" + nonHumanParticipant); +// } +// } PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 2fe81bab..73d4ff4d 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -163,7 +163,20 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI inferredInst.setAttributeValue(includedLocation, originalInst.getAttributeValuesList(includedLocation)); } } + + String updatedDisplayName = inferredInst.getDisplayName().replace("CoV-1", "CoV-2"); + inferredInst.setDisplayName(updatedDisplayName); + if (inferredInst.getSchemClass().isValidAttribute(name)) { + List names = inferredInst.getAttributeValuesList(name); + List newNames = new ArrayList<>(); + for (String name : names) { + String newName = name.replace("CoV-1", "CoV-2"); + newNames.add(newName); + } + inferredInst.setAttributeValue(name, newNames); + } // + dba.storeInstance(inferredInst); // if (inferredInst.getSchemClass().isa(PhysicalEntity)) { diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 06f57372..97251542 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -1,12 +1,6 @@ package org.reactome.orthoinference; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -35,6 +29,7 @@ public class OrthologousEntityGenerator { private static Map definedSetIdenticals = new HashMap<>(); private static Map complexIdenticals = new HashMap<>(); private static Map entitySetIdenticals = new HashMap<>(); + private static Map> nonHumanParticpants = new HashMap<>(); /** The heart of the OrthoInference process. This function takes PhysicalEntity (PE) instances and will infer those that are EWAS', Complexes/Polymers, or EntitySets. The function's arguments are an incoming PE instance and an override attribute. Instances that are comprised of PE's will often recursively call this createOrthoEntity function @@ -102,8 +97,9 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri } else { logger.warn("Unknown PhysicalEntity class: " + entityInst.getClass()); } - GKInstance speciesInst = (GKInstance) entityInst.getAttributeValue(species); - if (speciesInst != null && speciesInst.getDBID().equals(48887L)) { + GKInstance entitySpeciesInst = (GKInstance) entityInst.getAttributeValue(species); + if (entitySpeciesInst != null && entitySpeciesInst.getDBID().equals(48887L)) { + inferNonHumanParticipants(entityInst); return entityInst; } if (override) @@ -114,7 +110,33 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri logger.info("PE inference completed: " + entityInst); return infEntityInst; } - + + private static void inferNonHumanParticipants(GKInstance entityInst) throws Exception { + Set containedInstances= org.gk.model.InstanceUtilities.getContainedInstances(entityInst, + ReactomeJavaConstants.hasMember, + ReactomeJavaConstants.hasCandidate, + ReactomeJavaConstants.hasComponent, + ReactomeJavaConstants.repeatedUnit + ); + for (GKInstance containedInst : containedInstances) { + if (containedInst.getSchemClass().isValidAttribute(species)) { + Collection containedInstSpecies = (Collection) containedInst.getAttributeValuesList(species); + for (GKInstance conSpeciesInst : containedInstSpecies) { + if (conSpeciesInst.getDBID().equals(9678119L)) { +// System.out.println("\t\t\t\t" + entityInst + "\t" + containedInst); + if (nonHumanParticpants.get(entityInst) != null) { + nonHumanParticpants.get(entityInst).add(containedInst); + } else { + Set singleSet = new HashSet<>(Arrays.asList(containedInst)); + nonHumanParticpants.put(entityInst, singleSet); + } + } + } + + } + } + } + // Function that first tries to infer any EWAS' associated with the instance. For those that have more than 1 returned EWAS instance, // it's re-structured to a DefinedSet instance. If there is no EWAS instances inferred, it will either return null or, if override is set, return a mock instance. private static GKInstance createInfEWAS(GKInstance ewasInst, boolean override) throws InvalidAttributeException, Exception @@ -444,4 +466,8 @@ public static void setComplexSummationInstance() throws Exception complexSummationInst.setAttributeValue(_displayName, complexSummationText); complexSummationInst = InstanceUtilities.checkForIdenticalInstances(complexSummationInst, null); } + + public static Map> getNonHumanParticipants() { + return nonHumanParticpants; + } } diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index ff8316e9..23a9ebd3 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -128,6 +128,15 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex infPathwayInst.addAttributeValue(definition, definitionString); } } + String updatedDisplayName = infPathwayInst.getDisplayName().replace("CoV-1", "CoV-2"); + infPathwayInst.setDisplayName(updatedDisplayName); + List names = infPathwayInst.getAttributeValuesList(name); + List newNames = new ArrayList<>(); + for (String name : names) { + String newName = name.replace("CoV-1", "CoV-2"); + newNames.add(newName); + } + infPathwayInst.setAttributeValue(name, newNames); // dba.storeInstance(infPathwayInst); diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index 849f6869..90c95673 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -110,7 +110,15 @@ public static void inferReaction(GKInstance reactionInst) throws Exception } } // - + String updatedDisplayName = infReactionInst.getDisplayName().replace("CoV-1", "CoV-2"); + infReactionInst.setDisplayName(updatedDisplayName); + List names = infReactionInst.getAttributeValuesList(name); + List newNames = new ArrayList<>(); + for (String name : names) { + String newName = name.replace("CoV-1", "CoV-2"); + newNames.add(newName); + } + infReactionInst.setAttributeValue(name, newNames); dba.storeInstance(infReactionInst); // GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); @@ -172,6 +180,7 @@ private static boolean inferReactionInputsOrOutputs(GKInstance reactionInst, GKI logger.info(attribute.substring(0,1).toUpperCase() + attribute.substring(1) + " instances: " + attributeInstances); for (GKInstance attributeInst : attributeInstances) { +// System.out.println("\t" + attribute + "\t" + attributeInst); GKInstance infAttributeInst = OrthologousEntityGenerator.createOrthoEntity(attributeInst, false); if (infAttributeInst == null) { @@ -197,6 +206,7 @@ private static boolean inferReactionCatalysts(GKInstance reactionInst, GKInstanc for (GKInstance catalystInst : catalystInstances) { logger.info("Attempting catalyst inference: " + catalystInst); +// System.out.println("\tcatalyst\t" + catalystInst); if (inferredCatalyst.get(catalystInst) == null) { GKInstance infCatalystInst = InstanceUtilities.createNewInferredGKInstance(catalystInst); @@ -252,6 +262,7 @@ private static List inferReactionRegulations(GKInstance reactionInst logger.info("Regulation instances: " + regulationInstances); for (GKInstance regulationInst : regulationInstances) { logger.info("Attempting Regulation inference: " + regulationInst); +// System.out.println("\tregulation\t" + regulationInst); GKInstance regulatorInst = (GKInstance) regulationInst.getAttributeValue(regulator); logger.info("Regulator: " + regulatorInst); GKInstance infRegulatorInst = null; From dcc7471b4a9b75381d180e2568fc7532a01fb410 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 26 Jun 2020 15:42:56 -0400 Subject: [PATCH 10/21] Changed position of caching in PathwaysInferrer --- .../org/reactome/orthoinference/InstanceUtilities.java | 2 +- .../org/reactome/orthoinference/PathwaysInferrer.java | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 73d4ff4d..4b3d7ee6 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -23,7 +23,7 @@ public class InstanceUtilities { private static GKInstance instanceEditInst; private static GKInstance diseaseInst; private static Map mockedIdenticals = new HashMap<>(); - + // Creates new instance that will be inferred based on the incoming instances class public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInferred) throws Exception { diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index 23a9ebd3..366ba067 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -109,10 +109,6 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex logger.warn(sourcePathwayReferralInst + " is a ReactionLikeEvent, which is unexpected -- refer to infer_events.pl"); } infPathwayInst.setDisplayName(sourcePathwayReferralInst.getDisplayName()); - inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); - - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); - infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // COV-1-to-COV-2 Projection code if (sourcePathwayReferralInst.getAttributeValue(disease) != null) { @@ -137,6 +133,11 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex newNames.add(newName); } infPathwayInst.setAttributeValue(name, newNames); + + inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); + + GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); + infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // dba.storeInstance(infPathwayInst); From 9f2d6ee1672c4ecb04babd498af3ffd3353185a6 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 3 Jul 2020 12:30:41 -0400 Subject: [PATCH 11/21] Retrieve COV Reactions from Pathway hierarchy; RGP comments copied over --- .../reactome/orthoinference/EWASInferrer.java | 4 +- .../orthoinference/EventsInferrer.java | 37 +++++++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 95bcb3d1..d35ef4b5 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -90,7 +90,9 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr if (geneNameMappings.containsKey(homologueId)) { infReferenceGeneProductInst.addAttributeValue(geneName, geneNameMappings.get(homologueId)); } - + if (referenceEntityInst.getAttributeValue(comment) != null) { + infReferenceGeneProductInst.setAttributeValue(comment, referenceEntityInst.getAttributeValuesList(comment)); + } logger.info("ReferenceGeneProduct instance created"); infReferenceGeneProductInst = InstanceUtilities.checkForIdenticalInstances(infReferenceGeneProductInst, null); referenceGeneProductIdenticals.put(homologueId, infReferenceGeneProductInst); diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index d84ac3d5..59ef1023 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -52,6 +52,7 @@ public class EventsInferrer private static List manualHumanEvents = new ArrayList<>(); private static StableIdentifierGenerator stableIdentifierGenerator; private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator; + private static Long sarsCOVInfectionsPathwayDbId = 9679506L; @SuppressWarnings("unchecked") public static void inferEvents(Properties props, String referenceSpecies, String targetSpecies) throws Exception @@ -166,11 +167,15 @@ public static void inferEvents(Properties props, String referenceSpecies, String // Gets Reaction instances of source targetSpecies (human) Collection reactionInstances = new ArrayList<>(); // if (referenceSpeciesName.equals("Human SARS coronavirus")) { - reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute(ReactionlikeEvent, relatedSpecies, "=", referenceSpeciesInst); + GKInstance covPathwayInst = dbAdaptor.fetchInstance(sarsCOVInfectionsPathwayDbId); + Set uniqueReactionInstances = new HashSet<>(); + for (GKInstance hasEventInst : (Collection) covPathwayInst.getAttributeValuesList(hasEvent)) { + uniqueReactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst)); + } + reactionInstances.addAll(uniqueReactionInstances); } else { reactionInstances = (Collection) dbAdaptor.fetchInstanceByAttribute(ReactionlikeEvent, species, "=", referenceSpeciesInstanceDbId); } - List dbids = new ArrayList<>(); Map reactionMap = new HashMap<>(); for (GKInstance reactionInst : reactionInstances) { @@ -204,7 +209,6 @@ public static void inferEvents(Properties props, String referenceSpecies, String // An inferred ReactionlikeEvent doesn't already exist for this targetSpecies, and an orthologous inference will be attempted. try { -// System.out.println(reactionInst); ReactionInferrer.inferReaction(reactionInst); logger.info("Successfully inferred " + reactionInst); } catch (Exception e) { @@ -212,14 +216,13 @@ public static void inferEvents(Properties props, String referenceSpecies, String System.exit(1); } } -// System.out.println("\n\n"); -// Map> nonHumanParticpants = OrthologousEntityGenerator.getNonHumanParticipants(); -// for (GKInstance humanPE : nonHumanParticpants.keySet()) { -// System.out.println(humanPE); -// for (GKInstance nonHumanParticipant : nonHumanParticpants.get(humanPE)) { -// System.out.println("\t" + nonHumanParticipant); -// } -// } + Map> nonHumanParticpants = OrthologousEntityGenerator.getNonHumanParticipants(); + for (GKInstance humanPE : nonHumanParticpants.keySet()) { + System.out.println(humanPE); + for (GKInstance nonHumanParticipant : nonHumanParticpants.get(humanPE)) { + System.out.println("\t" + nonHumanParticipant); + } + } PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); @@ -227,6 +230,18 @@ public static void inferEvents(Properties props, String referenceSpecies, String logger.info("Finished orthoinference of " + targetSpeciesName); } + private static Set getReactionsInEventHierarchy(GKInstance eventInst) throws Exception { + Set reactionInstances = new HashSet<>(); + if (eventInst.getSchemClass().isa(ReactionlikeEvent)) { + reactionInstances.add(eventInst); + } else { + for (GKInstance hasEventInst : (Collection) eventInst.getAttributeValuesList(hasEvent)) { + reactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst)); + } + } + return reactionInstances; + } + /** * Create mapping of UniProt accessions to species-specific gene names, and then set this mapping for use in EWASInferrer. * @param species String - 4-letter shortened version of species name (eg: Homo sapiens --> hsap). From afb6f42e0d054dc4966fccf20eb41329e293506f Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 3 Jul 2020 14:24:06 -0400 Subject: [PATCH 12/21] Inferred Summations; suppressed StableIdentifier generation --- .../orthoinference/EventsInferrer.java | 5 +- .../orthoinference/InstanceUtilities.java | 52 ++++++++++++++++--- .../OrthologousEntityGenerator.java | 2 +- .../orthoinference/PathwaysInferrer.java | 7 +-- .../orthoinference/ReactionInferrer.java | 8 +-- 5 files changed, 55 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 59ef1023..7039b148 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -11,7 +11,6 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; -import java.sql.SQLOutput; import java.util.*; import java.util.zip.GZIPInputStream; @@ -147,10 +146,10 @@ public static void inferEvents(Properties props, String referenceSpecies, String EWASInferrer.setAltRefDbToFalse(); } createAndSetSpeciesInstance(targetSpeciesName); - setSummationInstance(); +// setSummationInstance(); setEvidenceTypeInstance(); InstanceUtilities.setDiseaseInstance(dbAdaptor.fetchInstance(9683915L)); - OrthologousEntityGenerator.setComplexSummationInstance(); +// OrthologousEntityGenerator.setComplexSummationInstance(); /** * Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the targetSpecies. diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 4b3d7ee6..3e902f7d 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -7,11 +7,8 @@ import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; -import org.gk.model.ReactomeJavaConstants; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.GKSchemaAttribute; -import org.gk.schema.GKSchemaClass; -import org.gk.schema.SchemaClass; +import org.gk.schema.*; // GenerateInstance is meant to act as a catch-all for functions that are instance-oriented, such as creating, mocking, or identical-checking. @@ -23,6 +20,7 @@ public class InstanceUtilities { private static GKInstance instanceEditInst; private static GKInstance diseaseInst; private static Map mockedIdenticals = new HashMap<>(); + private static String inferredEventsReactomeURL = "https://reactome.org/documentation/inferred-events"; // Creates new instance that will be inferred based on the incoming instances class public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInferred) throws Exception @@ -141,8 +139,8 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI } else { if (inferredInst.getSchemClass().isa(PhysicalEntity)) { - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); - inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(inferredInst, originalInst); +// inferredInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); } // COV-1-to-COV-2 Projection additions. @@ -162,10 +160,17 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI if (inferredInst.getSchemClass().isValidAttribute(includedLocation) && originalInst.getAttributeValuesList(includedLocation) != null) { inferredInst.setAttributeValue(includedLocation, originalInst.getAttributeValuesList(includedLocation)); } + + if (inferredInst.getSchemClass().isValidAttribute(summation)) { + createCOVSummationInstances(inferredInst, originalInst); + } } - String updatedDisplayName = inferredInst.getDisplayName().replace("CoV-1", "CoV-2"); - inferredInst.setDisplayName(updatedDisplayName); + // Inferred Summations should keep the normal displayName + if (!inferredInst.getSchemClass().isa(Summation)) { + String updatedDisplayName = inferredInst.getDisplayName().replace("CoV-1", "CoV-2"); + inferredInst.setDisplayName(updatedDisplayName); + } if (inferredInst.getSchemClass().isValidAttribute(name)) { List names = inferredInst.getAttributeValuesList(name); List newNames = new ArrayList<>(); @@ -188,6 +193,37 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI return inferredInst; } } + + public static void createCOVSummationInstances(GKInstance inferredInst, GKInstance originalInst) throws Exception { + + System.out.println(originalInst); + List originalSummationInstances = originalInst.getAttributeValuesList(summation); + String summationText = "This CoV-2 Reactome " + originalInst.getSchemClass().getName() + " instance was generated via electronic inference from a curated CoV-1 instance. In Reactome, inference is the process used to automatically create orthologous Pathways, Reactions and PhysicalEntities from our expertly curated data (" + inferredEventsReactomeURL + ")."; + if (originalSummationInstances.size() > 0) { + for (GKInstance summationInst : originalSummationInstances) { + inferredInst.addAttributeValue(summation, createCOVSummationInst(summationInst, summationText)); + } + } else { + inferredInst.addAttributeValue(summation, createCOVSummationInst(null, summationText)); + } + } + + private static GKInstance createCOVSummationInst(GKInstance summationInst, String summationText) throws Exception { + + GKInstance infSummationInst = new GKInstance(dba.getSchema().getClassByName(Summation)); + infSummationInst.setDbAdaptor(dba); + infSummationInst.setAttributeValue(created, instanceEditInst); + String summationDisplayName = summationInst != null ? summationInst.getDisplayName() : summationText; + infSummationInst.setDisplayName(summationDisplayName); + String updatedSummationText = summationInst != null ? summationText + "\n\n" + summationInst.getAttributeValue(text).toString() : summationText; + infSummationInst.setAttributeValue(text, updatedSummationText); + if (summationInst != null) { + infSummationInst.setAttributeValue(literatureReference, summationInst.getAttributeValuesList(literatureReference)); + } + infSummationInst = checkForIdenticalInstances(infSummationInst, summationInst); + return infSummationInst; + } + // Checks if the instanceToCheck already contains the instanceToUse in the multi-value attribute @SuppressWarnings("unchecked") public static GKInstance addAttributeValueIfNecessary(GKInstance instanceToBeCheckedForExistingAttribute, GKInstance instanceContainingAttributeToBeChecked, String attribute) throws Exception diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 97251542..41ba4da9 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -227,7 +227,7 @@ private static GKInstance createInfComplexPolymer(GKInstance complexInst, boolea } logger.info("Complex protein counts. Total: " + complexTotalProteinCounts + " Inferrable: " + complexInferrableProteinCounts); GKInstance infComplexInst = InstanceUtilities.createNewInferredGKInstance(complexInst); - infComplexInst.addAttributeValue(summation, complexSummationInst); +// infComplexInst.addAttributeValue(summation, complexSummationInst); infComplexInst.addAttributeValue(name, complexInst.getAttributeValue(name)); List infComponentInstances = new ArrayList<>(); // Inference handling is different depending on if it is a Complex or a Polymer. Complexes will infer all 'components' while Polymers will infer all 'repeatedUnits'. diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index 366ba067..cd88fa9c 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -91,7 +91,8 @@ private static List safeList(Collection collection) { private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Exception { GKInstance infPathwayInst = InstanceUtilities.createNewInferredGKInstance(sourcePathwayReferralInst); infPathwayInst.addAttributeValue(name, sourcePathwayReferralInst.getAttributeValuesList(name)); - infPathwayInst.addAttributeValue(summation, summationInst); +// infPathwayInst.addAttributeValue(summation, summationInst); + InstanceUtilities.createCOVSummationInstances(infPathwayInst, sourcePathwayReferralInst); if (infPathwayInst.getSchemClass().isValidAttribute(releaseDate)) { infPathwayInst.addAttributeValue(releaseDate, dateOfRelease); @@ -136,8 +137,8 @@ private static void inferPathway(GKInstance sourcePathwayReferralInst) throws Ex inferredEventIdenticals.put(sourcePathwayReferralInst, infPathwayInst); - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); - infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infPathwayInst, sourcePathwayReferralInst); +// infPathwayInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // dba.storeInstance(infPathwayInst); diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index 90c95673..5cf95265 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -14,7 +14,6 @@ import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.InvalidAttributeException; public class ReactionInferrer { @@ -48,7 +47,8 @@ public static void inferReaction(GKInstance reactionInst) throws Exception GKInstance infReactionInst = InstanceUtilities.createNewInferredGKInstance(reactionInst); infReactionInst.addAttributeValue(name, reactionInst.getAttributeValuesList(name)); infReactionInst.addAttributeValue(goBiologicalProcess, reactionInst.getAttributeValue(goBiologicalProcess)); - infReactionInst.addAttributeValue(summation, summationInst); +// infReactionInst.addAttributeValue(summation, summationInst); + InstanceUtilities.createCOVSummationInstances(infReactionInst, reactionInst); infReactionInst.addAttributeValue(evidenceType, evidenceTypeInst); infReactionInst.addAttributeValue(_displayName, reactionInst.getAttributeValue(_displayName)); @@ -89,8 +89,8 @@ public static void inferReaction(GKInstance reactionInst) throws Exception infReactionInst.addAttributeValue(releaseDate, dateOfRelease); } - GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); - infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); +// GKInstance orthoStableIdentifierInst = EventsInferrer.getStableIdentifierGenerator().generateOrthologousStableId(infReactionInst, reactionInst); +// infReactionInst.addAttributeValue(stableIdentifier, orthoStableIdentifierInst); // FetchIdenticalInstances would just return the instance being inferred. Since this step is meant to always // add a new inferred instance, the storeInstance method is just called here. From 241d1fbadbf238574e03fd80f5f6eb84d0a88934 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Fri, 3 Jul 2020 14:57:11 -0400 Subject: [PATCH 13/21] Suppressed ModifiedResidue projections; Code for outputting EWAS-MR info --- .../reactome/orthoinference/EWASInferrer.java | 194 ++++++++++-------- .../orthoinference/EventsInferrer.java | 35 +++- .../orthoinference/InstanceUtilities.java | 1 - 3 files changed, 132 insertions(+), 98 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index d35ef4b5..e70af6dd 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -1,7 +1,6 @@ package org.reactome.orthoinference; import java.io.BufferedReader; -import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.file.Paths; @@ -41,6 +40,7 @@ public class EWASInferrer { private static Map> wormbaseMappings = new HashMap<>(); private static Map geneNameMappings = new HashMap<>(); private static Map> coordinateMappings = new HashMap<>(); + private static Map> modifiedResidueMappings = new HashMap<>(); // Creates an array of inferred EWAS instances from the homologue mappings file (hsap_species_mapping.txt). @SuppressWarnings("unchecked") @@ -147,103 +147,113 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // Infer residue modifications. This was another step where the name of an EWAS can change. // For this, it is based on the existence of the string 'phospho' in the name of the psiMod attribute. // If true, 'phospho-' is prepended to the EWAS' name attribute. - List infModifiedResidueInstances = new ArrayList<>(); - boolean phosFlag = true; +// List infModifiedResidueInstances = new ArrayList<>(); +// boolean phosFlag = true; for (GKInstance modifiedResidueInst : (Collection) ewasInst.getAttributeValuesList(hasModifiedResidue)) { logger.info("Inferring ModifiedResidue: " + modifiedResidueInst); - String infModifiedResidueDisplayName = ""; - GKInstance infModifiedResidueInst = InstanceUtilities.createNewInferredGKInstance(modifiedResidueInst); - infModifiedResidueInst.addAttributeValue(referenceSequence, infReferenceGeneProductInst); - infModifiedResidueDisplayName += infReferenceGeneProductInst.getDisplayName(); - for (int coordinateValue : (Collection) modifiedResidueInst.getAttributeValuesList(coordinate)) { - if (coordinateMappings.get(coordKey) != null) { - String ewasStartCoord = ewasInst.getAttributeValue(startCoordinate).toString(); - String ewasEndCoord = ewasInst.getAttributeValue(endCoordinate).toString(); - if (ewasStartCoord.equals(String.valueOf(coordinateValue))) { - coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); - } - if (ewasEndCoord.equals(String.valueOf(coordinateValue))) { - coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); - } - } - infModifiedResidueInst.addAttributeValue(coordinate, coordinateValue); - } - if (infModifiedResidueInst.getSchemClass().isValidAttribute(modification)) { - for (GKInstance modifiedInst : (Collection) modifiedResidueInst.getAttributeValuesList(modification)) { - infModifiedResidueInst.addAttributeValue(modification, modifiedInst); - } - if (infModifiedResidueInst.getAttributeValue(modification) != null) { - infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(modification)).getDisplayName(); - } - } - if (modifiedResidueInst.getSchemClass().isValidAttribute(psiMod)) { - // Update name depending on the presence of 'phospho' in the Psimod's name attribute - GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); - if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { - String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); - List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); - String originalName = ewasNames.remove(0); - infEWASInst.setAttributeValue(name, phosphoName); - // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. - // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. - // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) - infEWASInst.addAttributeValue(name, ewasNames); - String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; - infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); - // This flag ensures the 'phospho-' is only prepended once. - logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); - phosFlag = false; - } - for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { - infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); - } - if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { - infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); - } - } - - if (infModifiedResidueInst.getSchemClass().isa("ModifiedNucleotide")) { - infModifiedResidueDisplayName = createModifiedNucleotideDisplayName(modifiedResidueInst, infModifiedResidueInst); - } - infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); - // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. - if (modifiedResidueInst.getAttributeValue(coordinate) != null) { - // Commented out during COV-1 to COV-2 projection -// String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString(); // + " (in Homo sapiens)"; -// infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); - // + if (modifiedResidueMappings.get(ewasInst) != null) { + modifiedResidueMappings.get(ewasInst).add(modifiedResidueInst); } else { - if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { - infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); - } + List singleList = new ArrayList<>(); + singleList.add(modifiedResidueInst); + modifiedResidueMappings.put(ewasInst, singleList); } - // Database-checker gave errors related to missing 'secondReferenceSequence' and 'equivalentTo' attributes in InterChainCrosslinkedResidues - // This was because they were never populated. This block is the fix. - if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { - if (modifiedResidueInst.getAttributeValue(secondReferenceSequence) != null) { - for (GKInstance secondRefSequenceInst : (Collection) modifiedResidueInst.getAttributeValuesList(secondReferenceSequence)) { - infModifiedResidueInst.addAttributeValue(secondReferenceSequence, secondRefSequenceInst); - } - } - if (modifiedResidueInst.getAttributeValue("equivalentTo") != null) { - for (GKInstance equivalentToInst : (Collection) modifiedResidueInst.getAttributeValuesList("equivalentTo")) { - infModifiedResidueInst.addAttributeValue("equivalentTo", equivalentToInst); - } - } - } - // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. - String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infModifiedResidueInst.getSchemClass(), infModifiedResidueInst); -// if (residueIdenticals.get(cacheKey) != null) { -// infModifiedResidueInst = residueIdenticals.get(cacheKey); + + +// String infModifiedResidueDisplayName = ""; +// GKInstance infModifiedResidueInst = InstanceUtilities.createNewInferredGKInstance(modifiedResidueInst); +// infModifiedResidueInst.addAttributeValue(referenceSequence, infReferenceGeneProductInst); +// infModifiedResidueDisplayName += infReferenceGeneProductInst.getDisplayName(); +// for (int coordinateValue : (Collection) modifiedResidueInst.getAttributeValuesList(coordinate)) { +// if (coordinateMappings.get(coordKey) != null) { +// String ewasStartCoord = ewasInst.getAttributeValue(startCoordinate).toString(); +// String ewasEndCoord = ewasInst.getAttributeValue(endCoordinate).toString(); +// if (ewasStartCoord.equals(String.valueOf(coordinateValue))) { +// coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); +// } +// if (ewasEndCoord.equals(String.valueOf(coordinateValue))) { +// coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); +// } +// } +// infModifiedResidueInst.addAttributeValue(coordinate, coordinateValue); +// } +// if (infModifiedResidueInst.getSchemClass().isValidAttribute(modification)) { +// for (GKInstance modifiedInst : (Collection) modifiedResidueInst.getAttributeValuesList(modification)) { +// infModifiedResidueInst.addAttributeValue(modification, modifiedInst); +// } +// if (infModifiedResidueInst.getAttributeValue(modification) != null) { +// infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(modification)).getDisplayName(); +// } +// } +// if (modifiedResidueInst.getSchemClass().isValidAttribute(psiMod)) { +// // Update name depending on the presence of 'phospho' in the Psimod's name attribute +// GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); +// if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { +// String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); +// List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); +// String originalName = ewasNames.remove(0); +// infEWASInst.setAttributeValue(name, phosphoName); +// // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. +// // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. +// // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) +// infEWASInst.addAttributeValue(name, ewasNames); +// String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; +// infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); +// // This flag ensures the 'phospho-' is only prepended once. +// logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); +// phosFlag = false; +// } +// for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { +// infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); +// } +// if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { +// infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); +// } +// } +// +// if (infModifiedResidueInst.getSchemClass().isa("ModifiedNucleotide")) { +// infModifiedResidueDisplayName = createModifiedNucleotideDisplayName(modifiedResidueInst, infModifiedResidueInst); +// } +// infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); +// // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. +// if (modifiedResidueInst.getAttributeValue(coordinate) != null) { +// // Commented out during COV-1 to COV-2 projection +//// String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString(); // + " (in Homo sapiens)"; +//// infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); +// // +// // } else { - infModifiedResidueInst = InstanceUtilities.checkForIdenticalInstances(infModifiedResidueInst, null); -// residueIdenticals.put(cacheKey, infModifiedResidueInst); +// if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { +// infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); +// } +// } +// // Database-checker gave errors related to missing 'secondReferenceSequence' and 'equivalentTo' attributes in InterChainCrosslinkedResidues +// // This was because they were never populated. This block is the fix. +// if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { +// if (modifiedResidueInst.getAttributeValue(secondReferenceSequence) != null) { +// for (GKInstance secondRefSequenceInst : (Collection) modifiedResidueInst.getAttributeValuesList(secondReferenceSequence)) { +// infModifiedResidueInst.addAttributeValue(secondReferenceSequence, secondRefSequenceInst); +// } +// } +// if (modifiedResidueInst.getAttributeValue("equivalentTo") != null) { +// for (GKInstance equivalentToInst : (Collection) modifiedResidueInst.getAttributeValuesList("equivalentTo")) { +// infModifiedResidueInst.addAttributeValue("equivalentTo", equivalentToInst); +// } +// } // } - infModifiedResidueInstances.add(infModifiedResidueInst); - logger.info("Successfully inferred ModifiedResidue"); +// // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. +// String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infModifiedResidueInst.getSchemClass(), infModifiedResidueInst); +//// if (residueIdenticals.get(cacheKey) != null) { +//// infModifiedResidueInst = residueIdenticals.get(cacheKey); +//// } else { +// infModifiedResidueInst = InstanceUtilities.checkForIdenticalInstances(infModifiedResidueInst, null); +//// residueIdenticals.put(cacheKey, infModifiedResidueInst); +//// } +// infModifiedResidueInstances.add(infModifiedResidueInst); +// logger.info("Successfully inferred ModifiedResidue"); } - infEWASInst.addAttributeValue(hasModifiedResidue, infModifiedResidueInstances); +// infEWASInst.addAttributeValue(hasModifiedResidue, infModifiedResidueInstances); // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infEWASInst.getSchemClass(), infEWASInst); if (ewasIdenticals.get(cacheKey) != null) { @@ -509,4 +519,8 @@ public static void readAndSetCoordinateMappingFile(String targetSpecies) throws br.close(); fr.close(); } + + public static Map> getModifiedResiduesMapping() { + return modifiedResidueMappings; + } } diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 7039b148..439a6a05 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -20,6 +20,7 @@ import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; import org.gk.persistence.MySQLAdaptor; +import org.gk.schema.GKSchemaClass; import org.gk.schema.InvalidAttributeException; import org.gk.schema.SchemaClass; import org.json.simple.JSONArray; @@ -215,13 +216,33 @@ public static void inferEvents(Properties props, String referenceSpecies, String System.exit(1); } } - Map> nonHumanParticpants = OrthologousEntityGenerator.getNonHumanParticipants(); - for (GKInstance humanPE : nonHumanParticpants.keySet()) { - System.out.println(humanPE); - for (GKInstance nonHumanParticipant : nonHumanParticpants.get(humanPE)) { - System.out.println("\t" + nonHumanParticipant); - } - } + +// Map> modifiedResiduesMapping = EWASInferrer.getModifiedResiduesMapping(); +// String header = "COV-1 EWAS\tCOV-1 ModifiedResidues\n"; +// Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), header.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); +// for (GKInstance ewasInst : modifiedResiduesMapping.keySet()) { +// String outputLine = ewasInst + "\t"; +// List modifiedResidues = modifiedResiduesMapping.get(ewasInst); +// int count = 0; +// for (GKInstance modifiedResidueInst : modifiedResidues) { +// if (count != 0) { +// outputLine += "|" + modifiedResidueInst; +// } else { +// outputLine += modifiedResidueInst; +// count++; +// } +// } +// outputLine += "\n"; +// Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), outputLine.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); +// } + +// Map> nonHumanParticpants = OrthologousEntityGenerator.getNonHumanParticipants(); +// for (GKInstance humanPE : nonHumanParticpants.keySet()) { +// System.out.println(humanPE); +// for (GKInstance nonHumanParticipant : nonHumanParticpants.get(humanPE)) { +// System.out.println("\t" + nonHumanParticipant); +// } +// } PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 3e902f7d..746439c2 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -196,7 +196,6 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI public static void createCOVSummationInstances(GKInstance inferredInst, GKInstance originalInst) throws Exception { - System.out.println(originalInst); List originalSummationInstances = originalInst.getAttributeValuesList(summation); String summationText = "This CoV-2 Reactome " + originalInst.getSchemClass().getName() + " instance was generated via electronic inference from a curated CoV-1 instance. In Reactome, inference is the process used to automatically create orthologous Pathways, Reactions and PhysicalEntities from our expertly curated data (" + inferredEventsReactomeURL + ")."; if (originalSummationInstances.size() > 0) { From 2a835a31c8253b405c03411ce47d138bf7cd8b0f Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Sun, 5 Jul 2020 17:31:46 -0400 Subject: [PATCH 14/21] COV-1-containing Human instances handled; Slight tweak to summation message --- .../orthoinference/EventsInferrer.java | 9 - .../orthoinference/InstanceUtilities.java | 4 +- .../OrthologousEntityGenerator.java | 182 +++++++++++++++--- 3 files changed, 156 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 439a6a05..37076e1a 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -20,7 +20,6 @@ import org.gk.model.GKInstance; import static org.gk.model.ReactomeJavaConstants.*; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.GKSchemaClass; import org.gk.schema.InvalidAttributeException; import org.gk.schema.SchemaClass; import org.json.simple.JSONArray; @@ -234,14 +233,6 @@ public static void inferEvents(Properties props, String referenceSpecies, String // } // outputLine += "\n"; // Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), outputLine.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); -// } - -// Map> nonHumanParticpants = OrthologousEntityGenerator.getNonHumanParticipants(); -// for (GKInstance humanPE : nonHumanParticpants.keySet()) { -// System.out.println(humanPE); -// for (GKInstance nonHumanParticipant : nonHumanParticpants.get(humanPE)) { -// System.out.println("\t" + nonHumanParticipant); -// } // } PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 746439c2..15633e33 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -40,7 +40,7 @@ public static GKInstance createNewInferredGKInstance(GKInstance instanceToBeInfe for (Object compartmentInst : instanceToBeInferred.getAttributeValuesList(compartment)) { GKInstance compartmentInstGk = (GKInstance) compartmentInst; - if (compartmentInstGk.getSchemClass().isa(Compartment)) + if (compartmentInstGk.getSchemClass().isa(Compartment)) { inferredInst.addAttributeValue(compartment, compartmentInstGk); } else { @@ -197,7 +197,7 @@ public static GKInstance checkForIdenticalInstances(GKInstance inferredInst, GKI public static void createCOVSummationInstances(GKInstance inferredInst, GKInstance originalInst) throws Exception { List originalSummationInstances = originalInst.getAttributeValuesList(summation); - String summationText = "This CoV-2 Reactome " + originalInst.getSchemClass().getName() + " instance was generated via electronic inference from a curated CoV-1 instance. In Reactome, inference is the process used to automatically create orthologous Pathways, Reactions and PhysicalEntities from our expertly curated data (" + inferredEventsReactomeURL + ")."; + String summationText = "This COVID-19 " + originalInst.getSchemClass().getName() + " instance was generated via electronic inference from a curated CoV-1 (Human SARS coronavirus) Reactome instance. In Reactome, inference is the process used to automatically create orthologous Pathways, Reactions and PhysicalEntities from our expertly curated data (" + inferredEventsReactomeURL + ")."; if (originalSummationInstances.size() > 0) { for (GKInstance summationInst : originalSummationInstances) { inferredInst.addAttributeValue(summation, createCOVSummationInst(summationInst, summationText)); diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 41ba4da9..0f651144 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -9,10 +9,7 @@ import org.gk.model.ReactomeJavaConstants; import org.gk.persistence.MySQLAdaptor; -import org.gk.schema.GKSchemaClass; -import org.gk.schema.InvalidAttributeException; -import org.gk.schema.InvalidAttributeValueException; -import org.gk.schema.SchemaClass; +import org.gk.schema.*; public class OrthologousEntityGenerator { @@ -30,8 +27,10 @@ public class OrthologousEntityGenerator { private static Map complexIdenticals = new HashMap<>(); private static Map entitySetIdenticals = new HashMap<>(); private static Map> nonHumanParticpants = new HashMap<>(); + private static Map inferredSARSIdenticals = new HashMap<>(); + private static Map humanComplexIdenticals = new HashMap<>(); -/** The heart of the OrthoInference process. This function takes PhysicalEntity (PE) instances and will infer those that are EWAS', Complexes/Polymers, or EntitySets. + /** The heart of the OrthoInference process. This function takes PhysicalEntity (PE) instances and will infer those that are EWAS', Complexes/Polymers, or EntitySets. The function's arguments are an incoming PE instance and an override attribute. Instances that are comprised of PE's will often recursively call this createOrthoEntity function on constituent PE's with the override attribute set to 'true'. This ensures that these PE's are inferred, despite the fact that they might not pass some filter criteria. This is often handled using 'mock' instances (i.e. 'ghost instances' from Perl script), which allow a PE to be inferred without having to commit a 'real' instance to the DB. @@ -53,6 +52,12 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri return orthologousEntityIdenticals.get(entityInst); } + GKInstance entitySpeciesInst = (GKInstance) entityInst.getAttributeValue(species); + if (entitySpeciesInst != null && entitySpeciesInst.getDBID().equals(48887L)) { + inferSARSParticipants(entityInst); + return entityInst; + } + // Checks that a species attribute exists in either the current instance or in constituent instances. if (!SpeciesCheckUtility.checkForSpeciesAttribute(entityInst)) { @@ -97,11 +102,6 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri } else { logger.warn("Unknown PhysicalEntity class: " + entityInst.getClass()); } - GKInstance entitySpeciesInst = (GKInstance) entityInst.getAttributeValue(species); - if (entitySpeciesInst != null && entitySpeciesInst.getDBID().equals(48887L)) { - inferNonHumanParticipants(entityInst); - return entityInst; - } if (override) { return infEntityInst; @@ -111,30 +111,156 @@ public static GKInstance createOrthoEntity(GKInstance entityInst, boolean overri return infEntityInst; } - private static void inferNonHumanParticipants(GKInstance entityInst) throws Exception { - Set containedInstances= org.gk.model.InstanceUtilities.getContainedInstances(entityInst, - ReactomeJavaConstants.hasMember, - ReactomeJavaConstants.hasCandidate, - ReactomeJavaConstants.hasComponent, - ReactomeJavaConstants.repeatedUnit - ); - for (GKInstance containedInst : containedInstances) { - if (containedInst.getSchemClass().isValidAttribute(species)) { - Collection containedInstSpecies = (Collection) containedInst.getAttributeValuesList(species); - for (GKInstance conSpeciesInst : containedInstSpecies) { - if (conSpeciesInst.getDBID().equals(9678119L)) { -// System.out.println("\t\t\t\t" + entityInst + "\t" + containedInst); - if (nonHumanParticpants.get(entityInst) != null) { - nonHumanParticpants.get(entityInst).add(containedInst); - } else { - Set singleSet = new HashSet<>(Arrays.asList(containedInst)); - nonHumanParticpants.put(entityInst, singleSet); + private static GKInstance inferSARSParticipants(GKInstance entityInst) throws Exception { + + if (humanComplexIdenticals.get(entityInst) == null) { + Set containedInstances = getComplexEntitySetContainedInstances(entityInst); + + boolean hasContainedSARSInstance = false; + for (GKInstance containedInst : containedInstances) { + if (hasSARSSpecies(containedInst)) { + hasContainedSARSInstance = true; + if (inferredSARSIdenticals.get(containedInst) == null) { + GKInstance inferredSARSEntityInst = createOrthoEntity(containedInst, false); + inferredSARSIdenticals.put(containedInst, inferredSARSEntityInst); + } + } + } + + if (hasContainedSARSInstance) { + GKInstance copiedHumanComplex = InstanceUtilities.createNewInferredGKInstance(entityInst); + for (SchemaAttribute complexAttr : (Collection) entityInst.getSchemClass().getAttributes()) { + if (!complexAttr.getName().equals(authored) + && !complexAttr.getName().equals(created) + && !complexAttr.getName().equals(modified) + && !complexAttr.getName().equals(relatedSpecies) + && !complexAttr.getName().equals(disease) + && !complexAttr.getName().equals(reviewed) + && !complexAttr.getName().equals(inferredFrom) + && !complexAttr.getName().equals(inferredTo) + && !complexAttr.getName().equals(DB_ID) + && !complexAttr.getName().equals(stableIdentifier) + && !complexAttr.getName().equals(revised) + && !complexAttr.getName().equals(edited) + && !complexAttr.getName().equals(compartment) + && !complexAttr.getName().equals(species)) { + + if (entityInst.getAttributeValuesList(complexAttr).size() > 0) { + for (Object attrValue : entityInst.getAttributeValuesList(complexAttr)) { + copiedHumanComplex.addAttributeValue(complexAttr, attrValue); + } } } } + List components = (List) copiedHumanComplex.getAttributeValuesList(hasComponent); + List updatedComponents = new ArrayList<>(); + for (GKInstance component : components) { + if (hasSARSSpecies(component)) { + updatedComponents.add(inferredSARSIdenticals.get(component)); + } else { + updatedComponents.add(component); + } + } + copiedHumanComplex.setAttributeValue(hasComponent, updatedComponents); + + copiedHumanComplex = InstanceUtilities.checkForIdenticalInstances(copiedHumanComplex, entityInst); + + copiedHumanComplex = InstanceUtilities.addAttributeValueIfNecessary(copiedHumanComplex, entityInst, inferredFrom); + dba.updateInstanceAttribute(copiedHumanComplex, inferredFrom); + entityInst = InstanceUtilities.addAttributeValueIfNecessary(entityInst, copiedHumanComplex, inferredTo); + dba.updateInstanceAttribute(entityInst, inferredTo); + + humanComplexIdenticals.put(entityInst, copiedHumanComplex); + + + /////// This code was used for troubleshooting and to see how far down 'multi-species' instances went in the Complex/EntitySet hierarchy +// for (String attr : complexAttrs) { +// System.out.println(attr); +// } +// for (GKInstance containedInst : containedInstances) { + +// if (hasSARSSpecies(containedInst)) { +////// System.out.println("\t" + containedInst); +//// Set subContainedInstances = getComplexEntitySetContainedInstances(containedInst); +//// for (GKInstance subContainedInst : subContainedInstances) { +//// if (hasSARSSpecies(subContainedInst)) { +////// System.out.println("\t\t" + subContainedInst); +////// Set subSubContainedInstances = getComplexEntitySetContainedInstances(subContainedInst); +////// for (GKInstance subSubContainedInst : subSubContainedInstances) { +////// if (hasSARSSpecies(subSubContainedInst)) { +//////// System.out.println("\t\t\t" + subSubContainedInst); +////// Set subSubSubContainedInstances = getComplexEntitySetContainedInstances(subSubContainedInst); +////// for (GKInstance subSubSubContainedInst : subSubSubContainedInstances) { +////// if (hasSARSSpecies(subSubSubContainedInst)) { +//////// System.out.println("\t\t\t\t" + subSubSubContainedInst); +////// } else if (hasContainedSARSInstance(subSubSubContainedInst)) { +////// +////// } else { +////// System.out.println(subSubSubContainedInst.getAttributeValue(species) + "\t\t" + subSubSubContainedInst); +////// } +////// } +////// } else if (hasContainedSARSInstance(subSubContainedInst)) { +////// +////// } +////// } +//// } else if (hasContainedSARSInstance(subContainedInst)) { +//// +//// } else { +//// System.out.println(subContainedInst.getAttributeValue(species) + "\t\t" + subContainedInst); +//// } +//// } +// } else if (hasContainedSARSInstance(containedInst)) { +//// Set subContainedInstances = getComplexEntitySetContainedInstances(containedInst); +//// System.out.println(subContainedInstances.size()); +//// for (GKInstance subContainedInst : subContainedInstances) { +//// System.out.println(subContainedInst); +//// if (hasSARSSpecies(subContainedInst)) { +//// System.out.println("\t\tTWOO: " + subContainedInst); +//// } else if (hasContainedSARSInstance(subContainedInst)) { +//// System.out.println("\t\t\tTEE: " + subContainedInst); +//// } else { +//// System.out.println("\t\t\t\t\t\t\tDUDDD: " + subContainedInst); +//// } +//// } +// } else { +//// System.out.println("\t\t\t\t\tDUD: " + containedInst); +//// System.out.println(containedInst.getAttributeValue(species) + "\t\t" + containedInst); +// } +// } + ///////////// + + } + + } + return humanComplexIdenticals.get(entityInst); + } + + private static boolean hasSARSSpecies(GKInstance entityInst) throws Exception { + if (entityInst.getSchemClass().isValidAttribute(species)) { + GKInstance speciesInst = (GKInstance) entityInst.getAttributeValue(species); + return speciesInst != null && speciesInst.getDBID().equals(9678119L); + } + return false; + } + + private static boolean hasContainedSARSInstance(GKInstance subEntityInst) throws Exception { + boolean hasContainedSARSInstance = false; + for (GKInstance subContainedInst : getComplexEntitySetContainedInstances(subEntityInst)) { + if (hasSARSSpecies(subContainedInst)) { + hasContainedSARSInstance = true; } } + return hasContainedSARSInstance; + } + + private static Set getComplexEntitySetContainedInstances(GKInstance entityInst) throws Exception { + return org.gk.model.InstanceUtilities.getContainedInstances(entityInst, + ReactomeJavaConstants.hasMember, + ReactomeJavaConstants.hasCandidate, + ReactomeJavaConstants.hasComponent, + ReactomeJavaConstants.repeatedUnit + ); } // Function that first tries to infer any EWAS' associated with the instance. For those that have more than 1 returned EWAS instance, From 1a8f3b5cb6b42af43d7d932c058a0346f7d55c9d Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Tue, 7 Jul 2020 12:38:33 -0400 Subject: [PATCH 15/21] Allow ModifiedResidue inference --- .../reactome/orthoinference/EWASInferrer.java | 186 +++++++++--------- 1 file changed, 94 insertions(+), 92 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index e70af6dd..8011f417 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -147,8 +147,8 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr // Infer residue modifications. This was another step where the name of an EWAS can change. // For this, it is based on the existence of the string 'phospho' in the name of the psiMod attribute. // If true, 'phospho-' is prepended to the EWAS' name attribute. -// List infModifiedResidueInstances = new ArrayList<>(); -// boolean phosFlag = true; + List infModifiedResidueInstances = new ArrayList<>(); + boolean phosFlag = true; for (GKInstance modifiedResidueInst : (Collection) ewasInst.getAttributeValuesList(hasModifiedResidue)) { logger.info("Inferring ModifiedResidue: " + modifiedResidueInst); @@ -161,99 +161,101 @@ public static List inferEWAS(GKInstance ewasInst) throws InvalidAttr } -// String infModifiedResidueDisplayName = ""; -// GKInstance infModifiedResidueInst = InstanceUtilities.createNewInferredGKInstance(modifiedResidueInst); -// infModifiedResidueInst.addAttributeValue(referenceSequence, infReferenceGeneProductInst); -// infModifiedResidueDisplayName += infReferenceGeneProductInst.getDisplayName(); -// for (int coordinateValue : (Collection) modifiedResidueInst.getAttributeValuesList(coordinate)) { -// if (coordinateMappings.get(coordKey) != null) { -// String ewasStartCoord = ewasInst.getAttributeValue(startCoordinate).toString(); -// String ewasEndCoord = ewasInst.getAttributeValue(endCoordinate).toString(); -// if (ewasStartCoord.equals(String.valueOf(coordinateValue))) { -// coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); -// } -// if (ewasEndCoord.equals(String.valueOf(coordinateValue))) { -// coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); -// } -// } -// infModifiedResidueInst.addAttributeValue(coordinate, coordinateValue); -// } -// if (infModifiedResidueInst.getSchemClass().isValidAttribute(modification)) { -// for (GKInstance modifiedInst : (Collection) modifiedResidueInst.getAttributeValuesList(modification)) { -// infModifiedResidueInst.addAttributeValue(modification, modifiedInst); -// } -// if (infModifiedResidueInst.getAttributeValue(modification) != null) { -// infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(modification)).getDisplayName(); -// } -// } -// if (modifiedResidueInst.getSchemClass().isValidAttribute(psiMod)) { -// // Update name depending on the presence of 'phospho' in the Psimod's name attribute -// GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); -// if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { -// String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); -// List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); -// String originalName = ewasNames.remove(0); -// infEWASInst.setAttributeValue(name, phosphoName); -// // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. -// // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. -// // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) -// infEWASInst.addAttributeValue(name, ewasNames); -// String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; -// infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); -// // This flag ensures the 'phospho-' is only prepended once. -// logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); -// phosFlag = false; -// } -// for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { -// infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); -// } -// if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { -// infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); -// } -// } -// -// if (infModifiedResidueInst.getSchemClass().isa("ModifiedNucleotide")) { -// infModifiedResidueDisplayName = createModifiedNucleotideDisplayName(modifiedResidueInst, infModifiedResidueInst); -// } -// infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); -// // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. -// if (modifiedResidueInst.getAttributeValue(coordinate) != null) { -// // Commented out during COV-1 to COV-2 projection -//// String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString(); // + " (in Homo sapiens)"; -//// infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); -// // -// + String infModifiedResidueDisplayName = ""; + GKInstance infModifiedResidueInst = InstanceUtilities.createNewInferredGKInstance(modifiedResidueInst); + infModifiedResidueInst.addAttributeValue(referenceSequence, infReferenceGeneProductInst); + infModifiedResidueDisplayName += infReferenceGeneProductInst.getDisplayName(); + for (int coordinateValue : (Collection) modifiedResidueInst.getAttributeValuesList(coordinate)) { + if (coordinateMappings.get(coordKey) != null) { + String ewasStartCoord = ewasInst.getAttributeValue(startCoordinate).toString(); + String ewasEndCoord = ewasInst.getAttributeValue(endCoordinate).toString(); + if (ewasStartCoord.equals(String.valueOf(coordinateValue))) { + coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("start")); + } + if (ewasEndCoord.equals(String.valueOf(coordinateValue))) { + coordinateValue = Integer.valueOf(coordinateMappings.get(coordKey).get("end")); + } + } + infModifiedResidueInst.addAttributeValue(coordinate, coordinateValue); + } + if (infModifiedResidueInst.getSchemClass().isValidAttribute(modification)) { + for (GKInstance modifiedInst : (Collection) modifiedResidueInst.getAttributeValuesList(modification)) { + infModifiedResidueInst.addAttributeValue(modification, modifiedInst); + } + if (infModifiedResidueInst.getAttributeValue(modification) != null) { + infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(modification)).getDisplayName(); + } + } + if (modifiedResidueInst.getSchemClass().isValidAttribute(psiMod)) { + // Update name depending on the presence of 'phospho' in the Psimod's name attribute + GKInstance firstPsiModInst = (GKInstance) modifiedResidueInst.getAttributeValue(psiMod); + if (phosFlag && firstPsiModInst.getAttributeValue(name).toString().contains("phospho")) { + String phosphoName = "phospho-" + infEWASInst.getAttributeValue(name); + List ewasNames = (ArrayList) infEWASInst.getAttributeValuesList(name); + String originalName = ewasNames.remove(0); + infEWASInst.setAttributeValue(name, phosphoName); + // In the Perl version, this code block modifies the 'name' attribute to include 'phosopho-', but in the process it drops the other names contained. I believe this is unintentional. + // This would mean attributes without the 'phospho- ' addition would retain their array of names, while attributes containing 'phospho-' would only contain a single name attribute. + // I've assumed this is incorrect for the rewrite -- Instances that modify the name attribute to prepend 'phospho-' retain their name array. (Justin Cook 2018) + infEWASInst.addAttributeValue(name, ewasNames); + String phosphoDisplayName = phosphoName + " [" + ((GKInstance) ewasInst.getAttributeValue(compartment)).getDisplayName() + "]"; + infEWASInst.setAttributeValue(_displayName, phosphoDisplayName); + // This flag ensures the 'phospho-' is only prepended once. + logger.info("Updated EWAS name to reflect phosphorylation. Original: " + originalName + ". Updated: " + phosphoName); + phosFlag = false; + } + for (GKInstance psiModInst : (Collection) modifiedResidueInst.getAttributeValuesList(psiMod)) { + infModifiedResidueInst.addAttributeValue(psiMod, psiModInst); + } + if (infModifiedResidueInst.getAttributeValue(psiMod) != null) { + infModifiedResidueDisplayName += " " + ((GKInstance) infModifiedResidueInst.getAttributeValue(psiMod)).getDisplayName(); + } + } + + if (infModifiedResidueInst.getSchemClass().isa("ModifiedNucleotide")) { + infModifiedResidueDisplayName = createModifiedNucleotideDisplayName(modifiedResidueInst, infModifiedResidueInst); + } + infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); + // Update name to reflect that coordinate values are taken from humans. This takes place after cache retrieval, since the name from DB won't contain updated name. + if (modifiedResidueInst.getAttributeValue(coordinate) != null) { + // Commented out during COV-1 to COV-2 projection +// String newModifiedResidueDisplayName = modifiedResidueInst.getAttributeValue(_displayName).toString(); // + " (in Homo sapiens)"; +// infModifiedResidueInst.setAttributeValue(_displayName, newModifiedResidueDisplayName); + // + + } else { + if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { + infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); + } + } + // Database-checker gave errors related to missing 'secondReferenceSequence' and 'equivalentTo' attributes in InterChainCrosslinkedResidues + // This was because they were never populated. This block is the fix. + if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { + if (modifiedResidueInst.getAttributeValue(secondReferenceSequence) != null) { + for (GKInstance secondRefSequenceInst : (Collection) modifiedResidueInst.getAttributeValuesList(secondReferenceSequence)) { + infModifiedResidueInst.addAttributeValue(secondReferenceSequence, secondRefSequenceInst); + } + } + if (modifiedResidueInst.getAttributeValue("equivalentTo") != null) { + for (GKInstance equivalentToInst : (Collection) modifiedResidueInst.getAttributeValuesList("equivalentTo")) { + infModifiedResidueInst.addAttributeValue("equivalentTo", equivalentToInst); + } + } + } + String modifiedResidueDisplayName = "[INFERRED] " + infModifiedResidueInst.getDisplayName(); + infModifiedResidueInst.setDisplayName(modifiedResidueDisplayName); + // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. + String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infModifiedResidueInst.getSchemClass(), infModifiedResidueInst); +// if (residueIdenticals.get(cacheKey) != null) { +// infModifiedResidueInst = residueIdenticals.get(cacheKey); // } else { -// if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { -// infModifiedResidueInst.setDisplayName(infModifiedResidueDisplayName); -// } -// } -// // Database-checker gave errors related to missing 'secondReferenceSequence' and 'equivalentTo' attributes in InterChainCrosslinkedResidues -// // This was because they were never populated. This block is the fix. -// if (infModifiedResidueInst.getSchemClass().isa(InterChainCrosslinkedResidue)) { -// if (modifiedResidueInst.getAttributeValue(secondReferenceSequence) != null) { -// for (GKInstance secondRefSequenceInst : (Collection) modifiedResidueInst.getAttributeValuesList(secondReferenceSequence)) { -// infModifiedResidueInst.addAttributeValue(secondReferenceSequence, secondRefSequenceInst); -// } -// } -// if (modifiedResidueInst.getAttributeValue("equivalentTo") != null) { -// for (GKInstance equivalentToInst : (Collection) modifiedResidueInst.getAttributeValuesList("equivalentTo")) { -// infModifiedResidueInst.addAttributeValue("equivalentTo", equivalentToInst); -// } -// } + infModifiedResidueInst = InstanceUtilities.checkForIdenticalInstances(infModifiedResidueInst, null); +// residueIdenticals.put(cacheKey, infModifiedResidueInst); // } -// // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. -// String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infModifiedResidueInst.getSchemClass(), infModifiedResidueInst); -//// if (residueIdenticals.get(cacheKey) != null) { -//// infModifiedResidueInst = residueIdenticals.get(cacheKey); -//// } else { -// infModifiedResidueInst = InstanceUtilities.checkForIdenticalInstances(infModifiedResidueInst, null); -//// residueIdenticals.put(cacheKey, infModifiedResidueInst); -//// } -// infModifiedResidueInstances.add(infModifiedResidueInst); -// logger.info("Successfully inferred ModifiedResidue"); + infModifiedResidueInstances.add(infModifiedResidueInst); + logger.info("Successfully inferred ModifiedResidue"); } -// infEWASInst.addAttributeValue(hasModifiedResidue, infModifiedResidueInstances); + infEWASInst.addAttributeValue(hasModifiedResidue, infModifiedResidueInstances); // Caching based on an instance's defining attributes. This reduces the number of 'checkForIdenticalInstance' calls, which slows things. String cacheKey = InstanceUtilities.getCacheKey((GKSchemaClass) infEWASInst.getSchemClass(), infEWASInst); if (ewasIdenticals.get(cacheKey) != null) { From 5237e703e43b8a7f9a7615a7f9ed0d6a7d29a443 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Tue, 7 Jul 2020 12:44:06 -0400 Subject: [PATCH 16/21] Final cov1-to-cov2 projections; Do not infer Therapeutics referrals; Do not infer Reactions that have no CoV-1 instances; Inferred Summations displayName receives header too --- .../orthoinference/EventsInferrer.java | 21 +++++++++-- .../orthoinference/InstanceUtilities.java | 5 +-- .../OrthologousEntityGenerator.java | 4 ++- .../orthoinference/PathwaysInferrer.java | 2 +- .../orthoinference/ReactionInferrer.java | 35 +++++++++++++++---- 5 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 37076e1a..47748614 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -51,7 +51,7 @@ public class EventsInferrer private static List manualHumanEvents = new ArrayList<>(); private static StableIdentifierGenerator stableIdentifierGenerator; private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator; - private static Long sarsCOVInfectionsPathwayDbId = 9679506L; + private static Long sarsCOV1InfectionsPathwayDbId = 9678108L; @SuppressWarnings("unchecked") public static void inferEvents(Properties props, String referenceSpecies, String targetSpecies) throws Exception @@ -66,6 +66,7 @@ public static void inferEvents(Properties props, String referenceSpecies, String int port = Integer.valueOf(props.getProperty("release.database.port")); dbAdaptor = new MySQLAdaptor(host, database, username, password, port); + System.out.println(dbAdaptor.fetchMaxDbId()); dbAdaptorPrev = new MySQLAdaptor(host, prevDatabase, username, password, port); if (dbAdaptor == null || dbAdaptorPrev == null) { logger.fatal("Null MySQLAdaptor, terminating orthoinference"); @@ -166,7 +167,7 @@ public static void inferEvents(Properties props, String referenceSpecies, String // Gets Reaction instances of source targetSpecies (human) Collection reactionInstances = new ArrayList<>(); // if (referenceSpeciesName.equals("Human SARS coronavirus")) { - GKInstance covPathwayInst = dbAdaptor.fetchInstance(sarsCOVInfectionsPathwayDbId); + GKInstance covPathwayInst = dbAdaptor.fetchInstance(sarsCOV1InfectionsPathwayDbId); Set uniqueReactionInstances = new HashSet<>(); for (GKInstance hasEventInst : (Collection) covPathwayInst.getAttributeValuesList(hasEvent)) { uniqueReactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst)); @@ -216,6 +217,7 @@ public static void inferEvents(Properties props, String referenceSpecies, String } } + // Outputs a file that contains all EWAS' that were inferred and their contained hasModifiedResidues. // Map> modifiedResiduesMapping = EWASInferrer.getModifiedResiduesMapping(); // String header = "COV-1 EWAS\tCOV-1 ModifiedResidues\n"; // Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), header.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); @@ -234,11 +236,26 @@ public static void inferEvents(Properties props, String referenceSpecies, String // outputLine += "\n"; // Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), outputLine.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND); // } + PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent()); PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents()); orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams(); outputReport(targetSpecies); logger.info("Finished orthoinference of " + targetSpeciesName); + + System.out.println(dbAdaptor.fetchMaxDbId()); + + // Find inferred instances that are referred to by multiple curated instances + // Likely due to lack of distinguishing 'defining' attributes. +// Collection inferredInstances = dbAdaptor.fetchInstancesByClass(Event); +// inferredInstances.addAll(dbAdaptor.fetchInstancesByClass(PhysicalEntity)); +// for (GKInstance inferredInst : inferredInstances) { +// GKInstance createdInst = (GKInstance) inferredInst.getAttributeValue(created); +// Collection inferredFromInstances = inferredInst.getAttributeValuesList(inferredFrom); +// if (createdInst != null && createdInst.getDisplayName().contains("Justin") && inferredFromInstances.size() > 1) { +// System.out.println(inferredInst); +// } +// } } private static Set getReactionsInEventHierarchy(GKInstance eventInst) throws Exception { diff --git a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java index 15633e33..f3b2280c 100644 --- a/src/main/java/org/reactome/orthoinference/InstanceUtilities.java +++ b/src/main/java/org/reactome/orthoinference/InstanceUtilities.java @@ -212,10 +212,11 @@ private static GKInstance createCOVSummationInst(GKInstance summationInst, Strin GKInstance infSummationInst = new GKInstance(dba.getSchema().getClassByName(Summation)); infSummationInst.setDbAdaptor(dba); infSummationInst.setAttributeValue(created, instanceEditInst); - String summationDisplayName = summationInst != null ? summationInst.getDisplayName() : summationText; - infSummationInst.setDisplayName(summationDisplayName); +// String summationDisplayName = summationInst != null ? summationInst.getDisplayName() : summationText; +// infSummationInst.setDisplayName(summationDisplayName); String updatedSummationText = summationInst != null ? summationText + "\n\n" + summationInst.getAttributeValue(text).toString() : summationText; infSummationInst.setAttributeValue(text, updatedSummationText); + infSummationInst.setDisplayName(updatedSummationText); if (summationInst != null) { infSummationInst.setAttributeValue(literatureReference, summationInst.getAttributeValuesList(literatureReference)); } diff --git a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java index 0f651144..66ccce56 100644 --- a/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java +++ b/src/main/java/org/reactome/orthoinference/OrthologousEntityGenerator.java @@ -128,6 +128,8 @@ private static GKInstance inferSARSParticipants(GKInstance entityInst) throws Ex } if (hasContainedSARSInstance) { + // Outputs Human Complexes/EntitySets that contain CoV-1 instances. +// System.out.println(entityInst); GKInstance copiedHumanComplex = InstanceUtilities.createNewInferredGKInstance(entityInst); for (SchemaAttribute complexAttr : (Collection) entityInst.getSchemClass().getAttributes()) { if (!complexAttr.getName().equals(authored) @@ -236,7 +238,7 @@ private static GKInstance inferSARSParticipants(GKInstance entityInst) throws Ex return humanComplexIdenticals.get(entityInst); } - private static boolean hasSARSSpecies(GKInstance entityInst) throws Exception { + public static boolean hasSARSSpecies(GKInstance entityInst) throws Exception { if (entityInst.getSchemClass().isValidAttribute(species)) { GKInstance speciesInst = (GKInstance) entityInst.getAttributeValue(species); return speciesInst != null && speciesInst.getDBID().equals(9678119L); diff --git a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java index cd88fa9c..1462b707 100644 --- a/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java +++ b/src/main/java/org/reactome/orthoinference/PathwaysInferrer.java @@ -74,7 +74,7 @@ private static void createInferredPathwayHierarchy(GKInstance sourceEventInst) t for (GKInstance sourcePathwayReferralInst : sourcePathwayReferralInstances) { logger.info("Generating inferred Pathway: " + sourcePathwayReferralInst); - if (inferredEventIdenticals.get(sourcePathwayReferralInst) == null) + if (inferredEventIdenticals.get(sourcePathwayReferralInst) == null && !sourcePathwayReferralInst.getDBID().equals(9679191L)) { inferPathway(sourcePathwayReferralInst); } else { diff --git a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java index 5cf95265..152c4649 100644 --- a/src/main/java/org/reactome/orthoinference/ReactionInferrer.java +++ b/src/main/java/org/reactome/orthoinference/ReactionInferrer.java @@ -3,11 +3,7 @@ import java.nio.file.Files; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -44,6 +40,34 @@ public static void inferReaction(GKInstance reactionInst) throws Exception { ///// The beginning of an inference process: // Creates inferred instance of reaction. + + // This code screens Reactions that will not need to be inferred. + Collection reactionComponents = org.gk.model.InstanceUtilities.getReactionParticipants(reactionInst); + Set containedComponents = new HashSet<>(); + boolean hasContainedSARSInstance = false; + for (GKInstance reactionComponent : reactionComponents) { + containedComponents.add(reactionComponent); + containedComponents.addAll(org.gk.model.InstanceUtilities.getContainedInstances(reactionComponent, + hasComponent, + hasCandidate, + hasMember, + repeatedUnit)); + } + + for (GKInstance containedComponent : containedComponents) { + if (OrthologousEntityGenerator.hasSARSSpecies(containedComponent)) { + hasContainedSARSInstance = true; + } + } + + if (!hasContainedSARSInstance) { + inferredEvent.put(reactionInst, reactionInst); + inferrableHumanEvents.add(reactionInst); + return; + } + // End of screening code. + + GKInstance infReactionInst = InstanceUtilities.createNewInferredGKInstance(reactionInst); infReactionInst.addAttributeValue(name, reactionInst.getAttributeValuesList(name)); infReactionInst.addAttributeValue(goBiologicalProcess, reactionInst.getAttributeValue(goBiologicalProcess)); @@ -56,7 +80,6 @@ public static void inferReaction(GKInstance reactionInst) throws Exception // Total proteins are stored in reactionProteinCounts[0], inferrable proteins in [1], and the maximum number of homologues for any entity involved in index [2]. // Reactions with no proteins/EWAS (Total = 0) are not inferred. List reactionProteinCounts = ProteinCountUtility.getDistinctProteinCounts(reactionInst); - System.out.println(reactionProteinCounts); int reactionTotalProteinCounts = reactionProteinCounts.get(0); // if (reactionTotalProteinCounts > 0) // { From 3cfe8691c8d8bb8dc6ae86963548e191dd2a788c Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 3 Sep 2020 17:27:07 -0400 Subject: [PATCH 17/21] Mapping files used for cov2 projections added --- cov1_cov2_mapping.tsv | 59 +++++++++++++++++++++++++++++++++++++ cov2_coordinate_mapping.tsv | 59 +++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 cov1_cov2_mapping.tsv create mode 100644 cov2_coordinate_mapping.tsv diff --git a/cov1_cov2_mapping.tsv b/cov1_cov2_mapping.tsv new file mode 100644 index 00000000..5631c500 --- /dev/null +++ b/cov1_cov2_mapping.tsv @@ -0,0 +1,59 @@ +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6U8 P0DTC1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P0C6X7 P0DTD1 +P59594 P0DTC2 +P59632 P0DTC3 +P59596 P0DTC5 +P59637 P0DTC4 +P59634 P0DTC6 +P59635 P0DTC7 +Q7TFA1 P0DTD8 +Q7TFA0 P0DTC8 +P59595 P0DTC9 +P59636 P0DTD2 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 +NC_004718.3 MN908947.3 \ No newline at end of file diff --git a/cov2_coordinate_mapping.tsv b/cov2_coordinate_mapping.tsv new file mode 100644 index 00000000..d832048e --- /dev/null +++ b/cov2_coordinate_mapping.tsv @@ -0,0 +1,59 @@ +full genome NC_004718.3 1 29751 MN908947.3 1 29903 +mRNA1 NC_004718.3 1 21485 MN908947.3 1 21555 +mRNA2 NC_004718.3 21492 29751 MN908947.3 21563 29903 +mRNA3 NC_004718.3 25268 29751 MN908947.3 25393 29903 +mRNA4 NC_004718.3 26117 29751 MN908947.3 26245 29903 +mRNA5 NC_004718.3 26398 29751 MN908947.3 26523 29903 +mRNA6 NC_004718.3 26913 29751 MN908947.3 27202 29903 +mRNA7 NC_004718.3 27273 29751 MN908947.3 27394 29903 +mRNA8 NC_004718.3 27779 29751 MN908947.3 27894 29903 +mRNA9 NC_004718.3 28120 29751 MN908947.3 28274 29903 +full minus strand NC_004718.3 -29751 -1 MN908947.3 -29903 -1 +minus mRNA2 NC_004718.3 -29751 -21492 MN908947.3 -29903 -21563 +minus mRNA3 NC_004718.3 -29751 -25268 MN908947.3 -29903 -25393 +minus mRNA4 NC_004718.3 -29751 -26117 MN908947.3 -29903 -26245 +minus mRNA5 NC_004718.3 -29751 -26398 MN908947.3 -29903 -26523 +minus mRNA6 NC_004718.3 -29751 -26913 MN908947.3 -29903 -27202 +minus mRNA7 NC_004718.3 -29751 -27273 MN908947.3 -29903 -27394 +minus mRNA8 NC_004718.3 -29751 -27779 MN908947.3 -29903 -27894 +minus mRNA9 NC_004718.3 -29751 -28120 MN908947.3 -29903 -28274 +pp1a P0C6U8 P0DTC1 +pp1a-nsp1 P0C6U8 1 180 P0DTC1 1 180 +pp1a-nsp2 P0C6U8 181 818 P0DTC1 181 818 +pp1a-nsp3 P0C6U8 819 2740 P0DTC1 819 2763 +pp1a-nsp4 P0C6U8 2741 3240 P0DTC1 2764 3263 +pp1a-nsp5 P0C6U8 3241 3546 P0DTC1 3264 3569 +pp1a-nsp6 P0C6U8 3547 3836 P0DTC1 3570 3859 +pp1a-nsp7 P0C6U8 3837 3919 P0DTC1 3860 3942 +pp1a-nsp8 P0C6U8 3920 4117 P0DTC1 3943 4140 +pp1a-nsp9 P0C6U8 4118 4230 P0DTC1 4141 4253 +pp1a-nsp10 P0C6U8 4231 4369 P0DTC1 4254 4392 +pp1a-nsp11 P0C6U8 4370 4382 P0DTC1 4393 4405 +pp1a-nsp3-4 P0C6U8 819 3240 P0DTC1 819 3263 +pp1a-nsp6-11 P0C6U8 3547 4382 P0DTC1 3570 4382 +rep P0C6X7 P0DTD1 +pp1ab-nsp1 P0C6X7 1 180 P0DTD1 1 180 +pp1ab-nsp2 P0C6X7 181 818 P0DTD1 181 818 +pp1ab-nsp3 P0C6X7 819 2740 P0DTD1 819 2763 +pp1ab-nsp4 P0C6X7 2741 3240 P0DTD1 2764 3263 +pp1ab-nsp5 P0C6X7 3241 3546 P0DTD1 3264 3569 +pp1ab-nsp6 P0C6X7 3547 3836 P0DTD1 3570 3859 +pp1ab-nsp7 P0C6X7 3837 3919 P0DTD1 3860 3942 +pp1ab-nsp8 P0C6X7 3920 4117 P0DTD1 3943 4140 +pp1ab-nsp9 P0C6X7 4118 4230 P0DTD1 4141 4253 +pp1ab-nsp10 P0C6X7 4231 4369 P0DTD1 4254 4392 +pp1ab-nsp12 P0C6X7 4370 5301 P0DTD1 4393 5324 +pp1ab-nsp13 P0C6X7 5302 5902 P0DTD1 5325 5925 +pp1ab-nsp14 P0C6X7 5903 6429 P0DTD1 5926 6452 +pp1ab-nsp15 P0C6X7 6430 6775 P0DTD1 6453 6798 +pp1ab-nsp15 P0C6X7 6776 7073 P0DTD1 6799 7096 +S P59594 P0DTC2 +3a P59632 P0DTC3 +M P59596 P0DTC5 +E P59637 P0DTC4 +6 P59634 P0DTC6 +7a P59635 P0DTC7 +7b Q7TFA1 P0DTD8 +8a Q7TFA0 P0DTC8 +N P59595 P0DTC9 +9b P59636 P0DTD2 \ No newline at end of file From 8712fb883432e8cb53e403ea0d1759f78018bbca Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 3 Sep 2020 17:27:43 -0400 Subject: [PATCH 18/21] Create placeholder.txt --- orthopairs/placeholder.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 orthopairs/placeholder.txt diff --git a/orthopairs/placeholder.txt b/orthopairs/placeholder.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/orthopairs/placeholder.txt @@ -0,0 +1 @@ + From dd4010b15e87cb46a25dddc55afe5fd94cc19286 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 3 Sep 2020 17:28:11 -0400 Subject: [PATCH 19/21] Rename cov1_cov2_mapping.tsv to orthopairs/cov1_cov2_mapping.tsv --- cov1_cov2_mapping.tsv => orthopairs/cov1_cov2_mapping.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename cov1_cov2_mapping.tsv => orthopairs/cov1_cov2_mapping.tsv (92%) diff --git a/cov1_cov2_mapping.tsv b/orthopairs/cov1_cov2_mapping.tsv similarity index 92% rename from cov1_cov2_mapping.tsv rename to orthopairs/cov1_cov2_mapping.tsv index 5631c500..c9d0a36d 100644 --- a/cov1_cov2_mapping.tsv +++ b/orthopairs/cov1_cov2_mapping.tsv @@ -56,4 +56,4 @@ NC_004718.3 MN908947.3 NC_004718.3 MN908947.3 NC_004718.3 MN908947.3 NC_004718.3 MN908947.3 -NC_004718.3 MN908947.3 \ No newline at end of file +NC_004718.3 MN908947.3 From d486661f4c65f0e6554f695a5f34060ceeec6958 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 3 Sep 2020 17:28:27 -0400 Subject: [PATCH 20/21] Rename cov2_coordinate_mapping.tsv to orthopairs/cov2_coordinate_mapping.tsv --- .../cov2_coordinate_mapping.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename cov2_coordinate_mapping.tsv => orthopairs/cov2_coordinate_mapping.tsv (96%) diff --git a/cov2_coordinate_mapping.tsv b/orthopairs/cov2_coordinate_mapping.tsv similarity index 96% rename from cov2_coordinate_mapping.tsv rename to orthopairs/cov2_coordinate_mapping.tsv index d832048e..2431a3e6 100644 --- a/cov2_coordinate_mapping.tsv +++ b/orthopairs/cov2_coordinate_mapping.tsv @@ -56,4 +56,4 @@ E P59637 P0DTC4 7b Q7TFA1 P0DTD8 8a Q7TFA0 P0DTC8 N P59595 P0DTC9 -9b P59636 P0DTD2 \ No newline at end of file +9b P59636 P0DTD2 From 71116c2cda2a67d9d437cfb3a6c9c1490a49e8d1 Mon Sep 17 00:00:00 2001 From: Justin Cook Date: Thu, 3 Sep 2020 17:28:41 -0400 Subject: [PATCH 21/21] Delete placeholder.txt --- orthopairs/placeholder.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 orthopairs/placeholder.txt diff --git a/orthopairs/placeholder.txt b/orthopairs/placeholder.txt deleted file mode 100644 index 8b137891..00000000 --- a/orthopairs/placeholder.txt +++ /dev/null @@ -1 +0,0 @@ -