From c1b5a6dbc62c0c8715277141350b3bfba388fede Mon Sep 17 00:00:00 2001 From: jweiser Date: Mon, 9 Feb 2026 23:37:09 -0500 Subject: [PATCH 1/4] updates Species.json to use EnsEMBL ref dbs from gk_central --- src/main/resources/Species.json | 135 ++++---------------------------- 1 file changed, 15 insertions(+), 120 deletions(-) diff --git a/src/main/resources/Species.json b/src/main/resources/Species.json index bfbb9a7..a14585c 100644 --- a/src/main/resources/Species.json +++ b/src/main/resources/Species.json @@ -2,14 +2,7 @@ "ddis":{ "mart_url":"http:\/\/protists.ensembl.org\/biomart\/martservice", "refdb":{ - "url":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/Info\/Index", - "ensg_access":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/geneview?gene=###ID###&db=core", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Dictyostelium discoideum_PROTEIN" - ], - "access":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/Transcript\/ProteinSummary?peptide=###ID###" + "use_gk_central_ensembl_ref_db": "protist" }, "alt_refdb":{ "url":"http:\/\/www.dictybase.org\/", @@ -35,14 +28,7 @@ "url":"http:\/\/www.wormbase.org" }, "refdb":{ - "access":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_C_elegans_PROTEIN" - ], - "url":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/Info\/Index", - "ensg_access":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "main" }, "name":[ "Caenorhabditis elegans" @@ -58,14 +44,7 @@ "Saccharomyces cerevisiae" ], "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_S_cerevisiae_PROTEIN" - ], - "access":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/Transcript\/ProteinSummary?peptide=###ID###", - "url":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/Info\/Index", - "ensg_access":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "fungi" }, "alt_refdb":{ "access":"https:\/\/www.yeastgenome.org\/search?q=###ID###&category=locus", @@ -87,14 +66,7 @@ ], "mart_group":"sscrofa_gene_ensembl", "refdb":{ - "ensg_access":"http:\/\/www.ensembl.org\/Sus_scrofa\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Sus_scrofa\/Info\/Index\/", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Sus_scrofa_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Sus_scrofa\/Transcript\/ProteinSummary?peptide=###ID###" + "use_gk_central_ensembl_ref_db": "main" }, "abbreviation": "SSC" }, @@ -105,27 +77,13 @@ "Homo sapiens" ], "refdb":{ - "url":"http:\/\/www.ensembl.org\/Homo_sapiens\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Homo_sapiens\/geneview?gene=###ID###&db=core", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Homo_sapiens_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Homo_sapiens\/Transcript\/ProteinSummary?peptide=###ID###" + "use_gk_central_ensembl_ref_db": "main" }, "abbreviation": "HSA" }, "ggal":{ "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Gallus_gallus_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Gallus_gallus\/Transcript\/ProteinSummary?peptide=###ID###", - "ensg_access":"http:\/\/www.ensembl.org\/Gallus_gallus\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Gallus_gallus\/Info\/Index\/" + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"ggallus_gene_ensembl", "name":[ @@ -141,14 +99,7 @@ "Xenopus tropicalis" ], "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Xenopus_tropicalis_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/Transcript\/ProteinSummary?peptide=###ID###", - "url":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "main" }, "group":"Vertebrate", "compara":"core", @@ -170,27 +121,13 @@ }, "mart_url":"http:\/\/fungi.ensembl.org\/biomart\/martservice", "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_S_pombe_PROTEIN" - ], - "access":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/Transcript\/ProteinSummary?peptide=###ID###", - "ensg_access":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/geneview?gene=###ID###&db=core", - "url":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/Info\/Index" + "use_gk_central_ensembl_ref_db": "fungi" }, "abbreviation": "SPO" }, "btau":{ "refdb":{ - "url":"http:\/\/www.ensembl.org\/Bos_taurus\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Bos_taurus\/geneview?gene=###ID###&db=core", - "access":"http:\/\/www.ensembl.org\/Bos_taurus\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Bos_taurus_PROTEIN" - ] + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"btaurus_gene_ensembl", "name":[ @@ -202,14 +139,7 @@ }, "mmus":{ "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Mus_musculus_PROTEIN" - ], - "access":"http:\/\/www.ensembl.org\/Mus_musculus\/Transcript\/ProteinSummary?peptide=###ID###", - "url":"http:\/\/www.ensembl.org\/Mus_musculus\/Info\/Index\/", - "ensg_access":"http:\/\/www.ensembl.org\/Mus_musculus\/geneview?gene=###ID###&db=core" + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"mmusculus_gene_ensembl", "name":[ @@ -223,14 +153,7 @@ "compara":"core", "group":"Vertebrate", "refdb":{ - "access":"http:\/\/www.ensembl.org\/Canis_familiaris\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Canis_PROTEIN" - ], - "ensg_access":"http:\/\/www.ensembl.org\/Canis_familiaris\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Canis_familiaris\/Info\/Index\/" + "use_gk_central_ensembl_ref_db": "main" }, "name":[ "Canis familiaris" @@ -244,14 +167,7 @@ "Plasmodium falciparum" ], "refdb":{ - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_P_falciparum_PROTEIN" - ], - "access":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/Transcript\/ProteinSummary?peptide=###ID###", - "ensg_access":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/geneview?gene=###ID###&db=core", - "url":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/Info\/Index" + "use_gk_central_ensembl_ref_db": "protist" }, "mart_url":"http:\/\/protists.ensembl.org\/biomart\/martservice", "alt_refdb":{ @@ -269,14 +185,7 @@ "compara":"core", "group":"Metazoan", "refdb":{ - "ensg_access":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster\/geneview?gene=###ID###&db=core", - "url":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster", - "access":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_D_melanogaster_PROTEIN" - ] + "use_gk_central_ensembl_ref_db": "main" }, "alt_refdb":{ "access":"https:\/\/flybase.org\/reports\/###ID###.html", @@ -297,14 +206,7 @@ "Danio rerio" ], "refdb":{ - "access":"http:\/\/www.ensembl.org\/Danio_rerio\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Danio_rerio_PROTEIN" - ], - "ensg_access":"http:\/\/www.ensembl.org\/Danio_rerio\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Danio_rerio\/Info\/Index\/" + "use_gk_central_ensembl_ref_db": "main" }, "group":"Vertebrate", "compara":"core", @@ -314,14 +216,7 @@ "compara":"core", "group":"Vertebrate", "refdb":{ - "ensg_access":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/geneview?gene=###ID###&db=core", - "url":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/Info\/Index\/", - "access":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/Transcript\/ProteinSummary?peptide=###ID###", - "dbname":[ - "ENSEMBL", - "Ensembl", - "ENSEMBL_Rattus_norvegicus_PROTEIN" - ] + "use_gk_central_ensembl_ref_db": "main" }, "mart_group":"rnorvegicus_gene_ensembl", "name":[ From 66395ffb81f925aa956564100f959411ebd65a64 Mon Sep 17 00:00:00 2001 From: jweiser Date: Mon, 9 Feb 2026 23:38:31 -0500 Subject: [PATCH 2/4] updates inferrer classes to use EnsEMBL ref dbs from gk_central --- .../reactome/orthoinference/EWASInferrer.java | 85 +++++++++++-------- .../orthoinference/EventsInferrer.java | 7 +- 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 685a2b9..17f0a0a 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -339,53 +339,39 @@ public static void readENSGMappingFile(String toSpecies, String pathToOrthopairs // Fetches Uniprot DB instance @SuppressWarnings("unchecked") - public static void fetchAndSetUniprotDbInstance() throws Exception - { + public static void fetchAndSetUniprotDbInstance() throws Exception { Collection uniprotDbInstances = (Collection) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "UniProt"); uniprotDbInst = uniprotDbInstances.iterator().next(); } - // Creates instance pertaining to the species Ensembl Protein DB - public static void createEnsemblProteinDbInstance(String toSpeciesLong, String toSpeciesReferenceDbUrl, String toSpeciesEnspAccessUrl) throws Exception - { - String enspSpeciesDb = "ENSEMBL_" + toSpeciesLong + "_PROTEIN"; - enspDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); - enspDbInst.setDbAdaptor(dba); - enspDbInst.addAttributeValue(created, instanceEditInst); - enspDbInst.addAttributeValue(name, "Ensembl"); - enspDbInst.addAttributeValue(name, enspSpeciesDb); - enspDbInst.addAttributeValue(url, toSpeciesReferenceDbUrl); - enspDbInst.addAttributeValue(accessUrl, toSpeciesEnspAccessUrl); - enspDbInst.setAttributeValue(_displayName, "Ensembl"); - dba.storeInstance(enspDbInst); - } + public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType) throws Exception { + GKInstance ensemblDbInst = fetchEnsemblDbInstance(ensemblDatabaseType); + if (ensemblDbInst == null) { + throw new IllegalStateException( + "Unable to fetch EnsEMBL Reference Database for type: " + ensemblDatabaseType + ); + } - // Creates instance pertaining to the species Ensembl Gene DB - public static void createEnsemblGeneDBInstance(String toSpeciesLong, String toSpeciesReferenceDbUrl, String toSpeciesEnsgAccessUrl) throws Exception - { - String ensgSpeciesDb = "ENSEMBL_" + toSpeciesLong + "_GENE"; - ensgDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); - ensgDbInst.setDbAdaptor(dba); - ensgDbInst.addAttributeValue(created, instanceEditInst); - ensgDbInst.addAttributeValue(name, "ENSEMBL"); - ensgDbInst.addAttributeValue(name, ensgSpeciesDb); - ensgDbInst.addAttributeValue(url, toSpeciesReferenceDbUrl); - ensgDbInst.addAttributeValue(accessUrl, toSpeciesEnsgAccessUrl); - ensgDbInst.setAttributeValue(_displayName, "ENSEMBL"); - dba.storeInstance(ensgDbInst); + ensgDbInst = ensemblDbInst; + enspDbInst = ensemblDbInst; } // Create instance pertaining to any alternative reference DB for the species public static void createAlternateReferenceDBInstance(JSONObject altRefDbJSON) throws Exception { - alternateDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); - alternateDbInst.setDbAdaptor(dba); - alternateDbInst.addAttributeValue(created, instanceEditInst); - alternateDbInst.addAttributeValue(name, ((JSONArray) altRefDbJSON.get("dbname")).get(0)); - alternateDbInst.addAttributeValue(url, altRefDbJSON.get("url")); - alternateDbInst.addAttributeValue(accessUrl, altRefDbJSON.get("access")); - alternateDbInst.setAttributeValue(_displayName, ((JSONArray) altRefDbJSON.get("dbname")).get(0)); - alternateDbInst = InstanceUtilities.checkForIdenticalInstances(alternateDbInst, null); + String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbName")).get(0); + if (refDbExistsInDb(altRefDbDisplayName)) { + alternateDbInst = getRefDbFromDb(altRefDbDisplayName); + } else { + alternateDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase)); + alternateDbInst.setDbAdaptor(dba); + alternateDbInst.addAttributeValue(created, instanceEditInst); + alternateDbInst.addAttributeValue(name, altRefDbDisplayName); + alternateDbInst.addAttributeValue(url, altRefDbJSON.get("url")); + alternateDbInst.addAttributeValue(accessUrl, altRefDbJSON.get("access")); + alternateDbInst.setAttributeValue(_displayName, altRefDbDisplayName); + alternateDbInst = InstanceUtilities.checkForIdenticalInstances(alternateDbInst, null); + } if (altRefDbJSON.get("alt_id") != null) { altRefDbId = (String) altRefDbJSON.get("alt_id"); @@ -412,4 +398,29 @@ public static void setWormbaseMappings(Map> wormbaseMapping public static void setGeneNameMappingFile(Map geneNameMappingsCopy) { geneNameMappings = geneNameMappingsCopy; } + + private static boolean refDbExistsInDb(String refDbDisplayName) throws Exception { + Collection refDbInstances = dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName); + return !refDbInstances.isEmpty(); + } + + private static GKInstance getRefDbFromDb(String refDbDisplayName) throws Exception { + return (GKInstance) dba.fetchInstanceByAttribute( + ReferenceDatabase, _displayName, "=", refDbDisplayName + ).iterator().next(); + } + + private static GKInstance fetchEnsemblDbInstance(String ensemblDatabaseType) throws Exception { + GKInstance ensemblDbInst; + if (ensemblDatabaseType.equals("main")) { + ensemblDbInst = getRefDbFromDb("ENSEMBL"); + } else if (ensemblDatabaseType.equals("fungi")) { + ensemblDbInst = getRefDbFromDb("ENSEMBL Fungi"); + } else if (ensemblDatabaseType.equals("protist")) { + ensemblDbInst = getRefDbFromDb("ENSEMBL Protist"); + } else { + throw new IllegalStateException(ensemblDatabaseType + " is not a valid EnsEMBL database type"); + } + return ensemblDbInst; + } } diff --git a/src/main/java/org/reactome/orthoinference/EventsInferrer.java b/src/main/java/org/reactome/orthoinference/EventsInferrer.java index 40943d5..bcd3fe0 100644 --- a/src/main/java/org/reactome/orthoinference/EventsInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EventsInferrer.java @@ -95,9 +95,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio logger.info("Beginning orthoinference of " + speciesName); JSONObject refDb = (JSONObject) speciesObject.get("refdb"); - String refDbUrl = (String) refDb.get("url"); - String refDbProteinUrl = (String) refDb.get("access"); - String refDbGeneUrl = (String) refDb.get("ensg_access"); + String ensemblDatabaseType = (String) refDb.get("use_gk_central_ensembl_ref_db"); // Creates two files that a) list reactions that are eligible for inference and b) those that are successfully inferred String eligibleFilename = "eligible_" + species + "_75.txt"; @@ -120,8 +118,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio } EWASInferrer.readENSGMappingFile(species, pathToOrthopairs); EWASInferrer.fetchAndSetUniprotDbInstance(); - EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl); - EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl); + EWASInferrer.fetchAndSetEnsemblDbInstance(ensemblDatabaseType); JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb"); if (altRefDbJSON != null) From 9177479b174890dfe1077c39c42d8a152004eeaa Mon Sep 17 00:00:00 2001 From: jweiser Date: Tue, 10 Feb 2026 21:29:52 -0500 Subject: [PATCH 3/4] bug fix - corrects typo in dbname JSON key --- src/main/java/org/reactome/orthoinference/EWASInferrer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index 17f0a0a..b2e9189 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -359,7 +359,7 @@ public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType) thro // Create instance pertaining to any alternative reference DB for the species public static void createAlternateReferenceDBInstance(JSONObject altRefDbJSON) throws Exception { - String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbName")).get(0); + String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbname")).get(0); if (refDbExistsInDb(altRefDbDisplayName)) { alternateDbInst = getRefDbFromDb(altRefDbDisplayName); } else { From 291ad8fa03cec7f6d15ec1e92b6ae4810e1e3a6c Mon Sep 17 00:00:00 2001 From: jweiser Date: Tue, 10 Feb 2026 21:51:09 -0500 Subject: [PATCH 4/4] adds null check for fetched reference databases --- src/main/java/org/reactome/orthoinference/EWASInferrer.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/reactome/orthoinference/EWASInferrer.java b/src/main/java/org/reactome/orthoinference/EWASInferrer.java index b2e9189..23dc4c1 100644 --- a/src/main/java/org/reactome/orthoinference/EWASInferrer.java +++ b/src/main/java/org/reactome/orthoinference/EWASInferrer.java @@ -400,8 +400,9 @@ public static void setGeneNameMappingFile(Map geneNameMappingsCo } private static boolean refDbExistsInDb(String refDbDisplayName) throws Exception { - Collection refDbInstances = dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName); - return !refDbInstances.isEmpty(); + Collection refDbInstances = + dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName); + return refDbInstances != null && !refDbInstances.isEmpty(); } private static GKInstance getRefDbFromDb(String refDbDisplayName) throws Exception {