Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f1cd37c
Initial cov1 to cov2 projection code
cookersjs Jun 24, 2020
b6becf1
PE & RlE projections rounded out; StableIdentifier notation adjusted
cookersjs Jun 24, 2020
85e96d3
Coordinate mapping from external file; RlEs/PEs/REs inferred properly
cookersjs Jun 24, 2020
6fa26a7
Human PEs are included; Disease pathway projection complete
cookersjs Jun 24, 2020
fb5901e
StableIdentifier made from inferred instance DBID; Disease/RelatedSpe…
cookersjs Jun 25, 2020
0143606
Updated inferred coordinate population; crossRef and keyword attribut…
cookersjs Jun 25, 2020
7d38954
Reverted StableIdentifier convention; EWAS names sorta corrected
cookersjs Jun 26, 2020
c8f6b59
Revert initial name projection; added first name in appropriate spot
cookersjs Jun 26, 2020
d29c052
DisplayName/Name changed from CoV-1 to CoV-2; debugging code for nonh…
cookersjs Jun 26, 2020
dcc7471
Changed position of caching in PathwaysInferrer
cookersjs Jun 26, 2020
9f2d6ee
Retrieve COV Reactions from Pathway hierarchy; RGP comments copied over
cookersjs Jul 3, 2020
afb6f42
Inferred Summations; suppressed StableIdentifier generation
cookersjs Jul 3, 2020
241d1fb
Suppressed ModifiedResidue projections; Code for outputting EWAS-MR info
cookersjs Jul 3, 2020
2a835a3
COV-1-containing Human instances handled; Slight tweak to summation m…
cookersjs Jul 5, 2020
1a8f3b5
Allow ModifiedResidue inference
cookersjs Jul 7, 2020
5237e70
Final cov1-to-cov2 projections; Do not infer Therapeutics referrals; …
cookersjs Jul 7, 2020
3cfe869
Mapping files used for cov2 projections added
cookersjs Sep 3, 2020
8712fb8
Create placeholder.txt
cookersjs Sep 3, 2020
dd4010b
Rename cov1_cov2_mapping.tsv to orthopairs/cov1_cov2_mapping.tsv
cookersjs Sep 3, 2020
d486661
Rename cov2_coordinate_mapping.tsv to orthopairs/cov2_coordinate_mapp…
cookersjs Sep 3, 2020
71116c2
Delete placeholder.txt
cookersjs Sep 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions orthopairs/cov1_cov2_mapping.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6U8 P0DTC1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P0C6X7 P0DTD1
P59594 P0DTC2
P59632 P0DTC3
P59596 P0DTC5
P59637 P0DTC4
P59634 P0DTC6
P59635 P0DTC7
Q7TFA1 P0DTD8
Q7TFA0 P0DTC8
P59595 P0DTC9
P59636 P0DTD2
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
NC_004718.3 MN908947.3
59 changes: 59 additions & 0 deletions orthopairs/cov2_coordinate_mapping.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
full genome NC_004718.3 1 29751 MN908947.3 1 29903
mRNA1 NC_004718.3 1 21485 MN908947.3 1 21555
mRNA2 NC_004718.3 21492 29751 MN908947.3 21563 29903
mRNA3 NC_004718.3 25268 29751 MN908947.3 25393 29903
mRNA4 NC_004718.3 26117 29751 MN908947.3 26245 29903
mRNA5 NC_004718.3 26398 29751 MN908947.3 26523 29903
mRNA6 NC_004718.3 26913 29751 MN908947.3 27202 29903
mRNA7 NC_004718.3 27273 29751 MN908947.3 27394 29903
mRNA8 NC_004718.3 27779 29751 MN908947.3 27894 29903
mRNA9 NC_004718.3 28120 29751 MN908947.3 28274 29903
full minus strand NC_004718.3 -29751 -1 MN908947.3 -29903 -1
minus mRNA2 NC_004718.3 -29751 -21492 MN908947.3 -29903 -21563
minus mRNA3 NC_004718.3 -29751 -25268 MN908947.3 -29903 -25393
minus mRNA4 NC_004718.3 -29751 -26117 MN908947.3 -29903 -26245
minus mRNA5 NC_004718.3 -29751 -26398 MN908947.3 -29903 -26523
minus mRNA6 NC_004718.3 -29751 -26913 MN908947.3 -29903 -27202
minus mRNA7 NC_004718.3 -29751 -27273 MN908947.3 -29903 -27394
minus mRNA8 NC_004718.3 -29751 -27779 MN908947.3 -29903 -27894
minus mRNA9 NC_004718.3 -29751 -28120 MN908947.3 -29903 -28274
pp1a P0C6U8 P0DTC1
pp1a-nsp1 P0C6U8 1 180 P0DTC1 1 180
pp1a-nsp2 P0C6U8 181 818 P0DTC1 181 818
pp1a-nsp3 P0C6U8 819 2740 P0DTC1 819 2763
pp1a-nsp4 P0C6U8 2741 3240 P0DTC1 2764 3263
pp1a-nsp5 P0C6U8 3241 3546 P0DTC1 3264 3569
pp1a-nsp6 P0C6U8 3547 3836 P0DTC1 3570 3859
pp1a-nsp7 P0C6U8 3837 3919 P0DTC1 3860 3942
pp1a-nsp8 P0C6U8 3920 4117 P0DTC1 3943 4140
pp1a-nsp9 P0C6U8 4118 4230 P0DTC1 4141 4253
pp1a-nsp10 P0C6U8 4231 4369 P0DTC1 4254 4392
pp1a-nsp11 P0C6U8 4370 4382 P0DTC1 4393 4405
pp1a-nsp3-4 P0C6U8 819 3240 P0DTC1 819 3263
pp1a-nsp6-11 P0C6U8 3547 4382 P0DTC1 3570 4382
rep P0C6X7 P0DTD1
pp1ab-nsp1 P0C6X7 1 180 P0DTD1 1 180
pp1ab-nsp2 P0C6X7 181 818 P0DTD1 181 818
pp1ab-nsp3 P0C6X7 819 2740 P0DTD1 819 2763
pp1ab-nsp4 P0C6X7 2741 3240 P0DTD1 2764 3263
pp1ab-nsp5 P0C6X7 3241 3546 P0DTD1 3264 3569
pp1ab-nsp6 P0C6X7 3547 3836 P0DTD1 3570 3859
pp1ab-nsp7 P0C6X7 3837 3919 P0DTD1 3860 3942
pp1ab-nsp8 P0C6X7 3920 4117 P0DTD1 3943 4140
pp1ab-nsp9 P0C6X7 4118 4230 P0DTD1 4141 4253
pp1ab-nsp10 P0C6X7 4231 4369 P0DTD1 4254 4392
pp1ab-nsp12 P0C6X7 4370 5301 P0DTD1 4393 5324
pp1ab-nsp13 P0C6X7 5302 5902 P0DTD1 5325 5925
pp1ab-nsp14 P0C6X7 5903 6429 P0DTD1 5926 6452
pp1ab-nsp15 P0C6X7 6430 6775 P0DTD1 6453 6798
pp1ab-nsp15 P0C6X7 6776 7073 P0DTD1 6799 7096
S P59594 P0DTC2
3a P59632 P0DTC3
M P59596 P0DTC5
E P59637 P0DTC4
6 P59634 P0DTC6
7a P59635 P0DTC7
7b Q7TFA1 P0DTD8
8a Q7TFA0 P0DTC8
N P59595 P0DTC9
9b P59636 P0DTD2
205 changes: 159 additions & 46 deletions src/main/java/org/reactome/orthoinference/EWASInferrer.java

Large diffs are not rendered by default.

149 changes: 109 additions & 40 deletions src/main/java/org/reactome/orthoinference/EventsInferrer.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ public class EventsInferrer
private static List<GKInstance> manualHumanEvents = new ArrayList<>();
private static StableIdentifierGenerator stableIdentifierGenerator;
private static OrthologousPathwayDiagramGenerator orthologousPathwayDiagramGenerator;
private static Long sarsCOV1InfectionsPathwayDbId = 9678108L;

@SuppressWarnings("unchecked")
public static void inferEvents(Properties props, String species) throws Exception
public static void inferEvents(Properties props, String referenceSpecies, String targetSpecies) throws Exception
{
logger.info("Preparing DB Adaptor and setting project variables");
// Set up DB adaptor using config.properties file
Expand All @@ -65,6 +66,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio
int port = Integer.valueOf(props.getProperty("release.database.port"));

dbAdaptor = new MySQLAdaptor(host, database, username, password, port);
System.out.println(dbAdaptor.fetchMaxDbId());
dbAdaptorPrev = new MySQLAdaptor(host, prevDatabase, username, password, port);
if (dbAdaptor == null || dbAdaptorPrev == null) {
logger.fatal("Null MySQLAdaptor, terminating orthoinference");
Expand All @@ -91,82 +93,102 @@ public static void inferEvents(Properties props, String species) throws Exceptio
JSONObject jsonObject = (JSONObject) obj;

// Parse Species information (found in Species.json config file)
JSONObject speciesObject = (JSONObject) jsonObject.get(species);
JSONArray speciesNames = (JSONArray) speciesObject.get("name");
String speciesName = (String) speciesNames.get(0);
logger.info("Beginning orthoinference of " + speciesName);
JSONObject targetSpeciesObject = (JSONObject) jsonObject.get(targetSpecies);
JSONArray targetSpeciesNames = (JSONArray) targetSpeciesObject.get("name");
String targetSpeciesName = (String) targetSpeciesNames.get(0);

JSONObject refDb = (JSONObject) speciesObject.get("refdb");
String refDbUrl = (String) refDb.get("url");
String refDbProteinUrl = (String) refDb.get("access");
String refDbGeneUrl = (String) refDb.get("ensg_access");
JSONObject referenceSpeciesObject = (JSONObject) jsonObject.get(referenceSpecies);
JSONArray referenceSpeciesNames = (JSONArray) referenceSpeciesObject.get("name");
String referenceSpeciesName = (String) referenceSpeciesNames.get(0);

logger.info("Beginning orthoinference of " + targetSpeciesName);
JSONObject targetSpeciesRefDb = (JSONObject) targetSpeciesObject.get("refdb");
// String refDbUrl = (String) refDb.get("url");
// String refDbProteinUrl = (String) refDb.get("access");
// String refDbGeneUrl = (String) refDb.get("ensg_access");

// Creates two files that a) list reactions that are eligible for inference and b) those that are successfully inferred
String eligibleFilename = "eligible_" + species + "_75.txt";
String inferredFilename = "inferred_" + species + "_75.txt";
String eligibleFilename = "eligible_" + targetSpecies + "_75.txt";
String inferredFilename = "inferred_" + targetSpecies + "_75.txt";
createNewFile(eligibleFilename);
createNewFile(inferredFilename);
ReactionInferrer.setEligibleFilename(eligibleFilename);
ReactionInferrer.setInferredFilename(inferredFilename);

stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) speciesObject.get("abbreviation"));
stableIdentifierGenerator = new StableIdentifierGenerator(dbAdaptor, (String) targetSpeciesObject.get("abbreviation"));
// Set static variables (DB/Species Instances, mapping files) that will be repeatedly used
setInstanceEdits(personId);
try {
readAndSetHomologueMappingFile(species, "hsap", pathToOrthopairs);
readAndSetGeneNameMappingFile(species, pathToOrthopairs);
readAndSetHomologueMappingFile(targetSpecies, referenceSpecies, pathToOrthopairs);
// readAndSetGeneNameMappingFile(targetSpecies, pathToOrthopairs);
} catch (Exception e) {
logger.fatal("Unable to locate " + speciesName +" mapping file: hsap_" + species + "_mapping.tsv. Orthology prediction not possible.");
logger.fatal("Unable to locate " + targetSpeciesName +" mapping file: hsap_" + targetSpecies + "_mapping.tsv. Orthology prediction not possible.");
e.printStackTrace();
System.exit(1);
}
EWASInferrer.readENSGMappingFile(species, pathToOrthopairs);
EWASInferrer.fetchAndSetUniprotDbInstance();
EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl);
EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl);
try {
EWASInferrer.readAndSetCoordinateMappingFile(targetSpecies);
} catch (Exception e) {
logger.fatal("Unable to locate coordinate mapping file");
e.printStackTrace();
System.exit(1);
}
// EWASInferrer.readENSGMappingFile(targetSpecies, pathToOrthopairs);
EWASInferrer.fetchAndSetDbInstances();
// EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl);
// EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl);

JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb");
JSONObject altRefDbJSON = (JSONObject) targetSpeciesObject.get("alt_refdb");
if (altRefDbJSON != null)
{
logger.info("Alternate DB exists for " + speciesName);
logger.info("Alternate DB exists for " + targetSpeciesName);
EWASInferrer.createAlternateReferenceDBInstance(altRefDbJSON);
} else {
EWASInferrer.setAltRefDbToFalse();
}
createAndSetSpeciesInstance(speciesName);
setSummationInstance();
createAndSetSpeciesInstance(targetSpeciesName);
// setSummationInstance();
setEvidenceTypeInstance();
OrthologousEntityGenerator.setComplexSummationInstance();
InstanceUtilities.setDiseaseInstance(dbAdaptor.fetchInstance(9683915L));
// OrthologousEntityGenerator.setComplexSummationInstance();

/**
* Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the species.
* Start of ReactionlikeEvent inference. Retrieves all human ReactionlikeEvents, and attempts to infer each for the targetSpecies.
*/
// Gets DB instance of source species (human)
Collection<GKInstance> sourceSpeciesInst = (Collection<GKInstance>) dbAdaptor.fetchInstanceByAttribute("Species", "name", "=", "Homo sapiens");
if (sourceSpeciesInst.isEmpty())
// Gets DB instance of source targetSpecies (human)
Collection<GKInstance> referenceSpeciesInst = (Collection<GKInstance>) dbAdaptor.fetchInstanceByAttribute("Species", "name", "=", referenceSpeciesName);
if (referenceSpeciesInst.isEmpty())
{
logger.fatal("Could not find Species instance for Homo sapiens");
logger.fatal("Could not find Species instance for " + referenceSpeciesName);
System.exit(1);
}
long humanInstanceDbId = sourceSpeciesInst.iterator().next().getDBID();
orthologousPathwayDiagramGenerator = new OrthologousPathwayDiagramGenerator(dbAdaptor, dbAdaptorPrev, speciesInst, personId, humanInstanceDbId);
// Gets Reaction instances of source species (human)
Collection<GKInstance> reactionInstances = (Collection<GKInstance>) dbAdaptor.fetchInstanceByAttribute("ReactionlikeEvent", "species", "=", humanInstanceDbId);

long referenceSpeciesInstanceDbId = referenceSpeciesInst.iterator().next().getDBID();
orthologousPathwayDiagramGenerator = new OrthologousPathwayDiagramGenerator(dbAdaptor, dbAdaptorPrev, speciesInst, personId, referenceSpeciesInstanceDbId);
// Gets Reaction instances of source targetSpecies (human)
Collection<GKInstance> reactionInstances = new ArrayList<>(); //
if (referenceSpeciesName.equals("Human SARS coronavirus")) {
GKInstance covPathwayInst = dbAdaptor.fetchInstance(sarsCOV1InfectionsPathwayDbId);
Set<GKInstance> uniqueReactionInstances = new HashSet<>();
for (GKInstance hasEventInst : (Collection<GKInstance>) covPathwayInst.getAttributeValuesList(hasEvent)) {
uniqueReactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst));
}
reactionInstances.addAll(uniqueReactionInstances);
} else {
reactionInstances = (Collection<GKInstance>) dbAdaptor.fetchInstanceByAttribute(ReactionlikeEvent, species, "=", referenceSpeciesInstanceDbId);
}
List<Long> dbids = new ArrayList<>();
Map<Long, GKInstance> reactionMap = new HashMap<>();
for (GKInstance reactionInst : reactionInstances) {
dbids.add(reactionInst.getDBID());
reactionMap.put(reactionInst.getDBID(), reactionInst);
}
Collections.sort(dbids);

logger.info(sourceSpeciesInst.iterator().next().getDisplayName() + " ReactionlikeEvent instances: " + dbids.size());
logger.info(referenceSpeciesInst.iterator().next().getDisplayName() + " ReactionlikeEvent instances: " + dbids.size());
for (Long dbid : dbids)
{
GKInstance reactionInst = reactionMap.get(dbid);
logger.info("Attempting RlE inference: " + reactionInst);
// Check if the current Reaction already exists for this species, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute.
// Check if the current Reaction already exists for this targetSpecies, that it is a valid instance (passes some filters), and that it doesn't have a Disease attribute.
// Adds to manualHumanEvents array if it passes conditions. This code block allows you to re-run the code without re-inferring instances.
List<GKInstance> previouslyInferredInstances = new ArrayList<GKInstance>();
previouslyInferredInstances = checkIfPreviouslyInferred(reactionInst, orthologousEvent, previouslyInferredInstances);
Expand All @@ -185,7 +207,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio
continue;
}

// An inferred ReactionlikeEvent doesn't already exist for this species, and an orthologous inference will be attempted.
// An inferred ReactionlikeEvent doesn't already exist for this targetSpecies, and an orthologous inference will be attempted.
try {
ReactionInferrer.inferReaction(reactionInst);
logger.info("Successfully inferred " + reactionInst);
Expand All @@ -194,11 +216,58 @@ public static void inferEvents(Properties props, String species) throws Exceptio
System.exit(1);
}
}

// Outputs a file that contains all EWAS' that were inferred and their contained hasModifiedResidues.
// Map<GKInstance, List<GKInstance>> modifiedResiduesMapping = EWASInferrer.getModifiedResiduesMapping();
// String header = "COV-1 EWAS\tCOV-1 ModifiedResidues\n";
// Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), header.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
// for (GKInstance ewasInst : modifiedResiduesMapping.keySet()) {
// String outputLine = ewasInst + "\t";
// List<GKInstance> modifiedResidues = modifiedResiduesMapping.get(ewasInst);
// int count = 0;
// for (GKInstance modifiedResidueInst : modifiedResidues) {
// if (count != 0) {
// outputLine += "|" + modifiedResidueInst;
// } else {
// outputLine += modifiedResidueInst;
// count++;
// }
// }
// outputLine += "\n";
// Files.write(Paths.get("EWAS-ModifiedResidues-Mappings.tsv"), outputLine.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
// }

PathwaysInferrer.setInferredEvent(ReactionInferrer.getInferredEvent());
PathwaysInferrer.inferPathways(ReactionInferrer.getInferrableHumanEvents());
orthologousPathwayDiagramGenerator.generateOrthologousPathwayDiagrams();
outputReport(species);
logger.info("Finished orthoinference of " + speciesName);
outputReport(targetSpecies);
logger.info("Finished orthoinference of " + targetSpeciesName);

System.out.println(dbAdaptor.fetchMaxDbId());

// Find inferred instances that are referred to by multiple curated instances
// Likely due to lack of distinguishing 'defining' attributes.
// Collection<GKInstance> inferredInstances = dbAdaptor.fetchInstancesByClass(Event);
// inferredInstances.addAll(dbAdaptor.fetchInstancesByClass(PhysicalEntity));
// for (GKInstance inferredInst : inferredInstances) {
// GKInstance createdInst = (GKInstance) inferredInst.getAttributeValue(created);
// Collection<GKInstance> inferredFromInstances = inferredInst.getAttributeValuesList(inferredFrom);
// if (createdInst != null && createdInst.getDisplayName().contains("Justin") && inferredFromInstances.size() > 1) {
// System.out.println(inferredInst);
// }
// }
}

private static Set<GKInstance> getReactionsInEventHierarchy(GKInstance eventInst) throws Exception {
Set<GKInstance> reactionInstances = new HashSet<>();
if (eventInst.getSchemClass().isa(ReactionlikeEvent)) {
reactionInstances.add(eventInst);
} else {
for (GKInstance hasEventInst : (Collection<GKInstance>) eventInst.getAttributeValuesList(hasEvent)) {
reactionInstances.addAll(getReactionsInEventHierarchy(hasEventInst));
}
}
return reactionInstances;
}

/**
Expand Down
Loading