Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 49 additions & 37 deletions src/main/java/org/reactome/orthoinference/EWASInferrer.java
Original file line number Diff line number Diff line change
Expand Up @@ -339,53 +339,39 @@ public static void readENSGMappingFile(String toSpecies, String pathToOrthopairs

// Fetches Uniprot DB instance
@SuppressWarnings("unchecked")
public static void fetchAndSetUniprotDbInstance() throws Exception
{
public static void fetchAndSetUniprotDbInstance() throws Exception {
Collection<GKInstance> uniprotDbInstances = (Collection<GKInstance>) dba.fetchInstanceByAttribute(ReferenceDatabase, name, "=", "UniProt");
uniprotDbInst = uniprotDbInstances.iterator().next();
}

// Creates instance pertaining to the species Ensembl Protein DB
public static void createEnsemblProteinDbInstance(String toSpeciesLong, String toSpeciesReferenceDbUrl, String toSpeciesEnspAccessUrl) throws Exception
{
String enspSpeciesDb = "ENSEMBL_" + toSpeciesLong + "_PROTEIN";
enspDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase));
enspDbInst.setDbAdaptor(dba);
enspDbInst.addAttributeValue(created, instanceEditInst);
enspDbInst.addAttributeValue(name, "Ensembl");
enspDbInst.addAttributeValue(name, enspSpeciesDb);
enspDbInst.addAttributeValue(url, toSpeciesReferenceDbUrl);
enspDbInst.addAttributeValue(accessUrl, toSpeciesEnspAccessUrl);
enspDbInst.setAttributeValue(_displayName, "Ensembl");
dba.storeInstance(enspDbInst);
}
public static void fetchAndSetEnsemblDbInstance(String ensemblDatabaseType) throws Exception {
GKInstance ensemblDbInst = fetchEnsemblDbInstance(ensemblDatabaseType);
if (ensemblDbInst == null) {
throw new IllegalStateException(
"Unable to fetch EnsEMBL Reference Database for type: " + ensemblDatabaseType
);
}

// Creates instance pertaining to the species Ensembl Gene DB
public static void createEnsemblGeneDBInstance(String toSpeciesLong, String toSpeciesReferenceDbUrl, String toSpeciesEnsgAccessUrl) throws Exception
{
String ensgSpeciesDb = "ENSEMBL_" + toSpeciesLong + "_GENE";
ensgDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase));
ensgDbInst.setDbAdaptor(dba);
ensgDbInst.addAttributeValue(created, instanceEditInst);
ensgDbInst.addAttributeValue(name, "ENSEMBL");
ensgDbInst.addAttributeValue(name, ensgSpeciesDb);
ensgDbInst.addAttributeValue(url, toSpeciesReferenceDbUrl);
ensgDbInst.addAttributeValue(accessUrl, toSpeciesEnsgAccessUrl);
ensgDbInst.setAttributeValue(_displayName, "ENSEMBL");
dba.storeInstance(ensgDbInst);
ensgDbInst = ensemblDbInst;
enspDbInst = ensemblDbInst;
}

// Create instance pertaining to any alternative reference DB for the species
public static void createAlternateReferenceDBInstance(JSONObject altRefDbJSON) throws Exception
{
alternateDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase));
alternateDbInst.setDbAdaptor(dba);
alternateDbInst.addAttributeValue(created, instanceEditInst);
alternateDbInst.addAttributeValue(name, ((JSONArray) altRefDbJSON.get("dbname")).get(0));
alternateDbInst.addAttributeValue(url, altRefDbJSON.get("url"));
alternateDbInst.addAttributeValue(accessUrl, altRefDbJSON.get("access"));
alternateDbInst.setAttributeValue(_displayName, ((JSONArray) altRefDbJSON.get("dbname")).get(0));
alternateDbInst = InstanceUtilities.checkForIdenticalInstances(alternateDbInst, null);
String altRefDbDisplayName = (String) ((JSONArray) altRefDbJSON.get("dbname")).get(0);
if (refDbExistsInDb(altRefDbDisplayName)) {
alternateDbInst = getRefDbFromDb(altRefDbDisplayName);
} else {
alternateDbInst = new GKInstance(dba.getSchema().getClassByName(ReferenceDatabase));
alternateDbInst.setDbAdaptor(dba);
alternateDbInst.addAttributeValue(created, instanceEditInst);
alternateDbInst.addAttributeValue(name, altRefDbDisplayName);
alternateDbInst.addAttributeValue(url, altRefDbJSON.get("url"));
alternateDbInst.addAttributeValue(accessUrl, altRefDbJSON.get("access"));
alternateDbInst.setAttributeValue(_displayName, altRefDbDisplayName);
alternateDbInst = InstanceUtilities.checkForIdenticalInstances(alternateDbInst, null);
}
if (altRefDbJSON.get("alt_id") != null)
{
altRefDbId = (String) altRefDbJSON.get("alt_id");
Expand All @@ -412,4 +398,30 @@ public static void setWormbaseMappings(Map<String, List<String>> wormbaseMapping
public static void setGeneNameMappingFile(Map<String, String> geneNameMappingsCopy) {
geneNameMappings = geneNameMappingsCopy;
}

private static boolean refDbExistsInDb(String refDbDisplayName) throws Exception {
Collection<GKInstance> refDbInstances =
dba.fetchInstanceByAttribute(ReferenceDatabase, _displayName, "=", refDbDisplayName);
return refDbInstances != null && !refDbInstances.isEmpty();
}

private static GKInstance getRefDbFromDb(String refDbDisplayName) throws Exception {
return (GKInstance) dba.fetchInstanceByAttribute(
ReferenceDatabase, _displayName, "=", refDbDisplayName
).iterator().next();
}

private static GKInstance fetchEnsemblDbInstance(String ensemblDatabaseType) throws Exception {
GKInstance ensemblDbInst;
if (ensemblDatabaseType.equals("main")) {
ensemblDbInst = getRefDbFromDb("ENSEMBL");
} else if (ensemblDatabaseType.equals("fungi")) {
ensemblDbInst = getRefDbFromDb("ENSEMBL Fungi");
} else if (ensemblDatabaseType.equals("protist")) {
ensemblDbInst = getRefDbFromDb("ENSEMBL Protist");
} else {
throw new IllegalStateException(ensemblDatabaseType + " is not a valid EnsEMBL database type");
}
return ensemblDbInst;
}
}
7 changes: 2 additions & 5 deletions src/main/java/org/reactome/orthoinference/EventsInferrer.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio
logger.info("Beginning orthoinference of " + speciesName);

JSONObject refDb = (JSONObject) speciesObject.get("refdb");
String refDbUrl = (String) refDb.get("url");
String refDbProteinUrl = (String) refDb.get("access");
String refDbGeneUrl = (String) refDb.get("ensg_access");
String ensemblDatabaseType = (String) refDb.get("use_gk_central_ensembl_ref_db");

// Creates two files that a) list reactions that are eligible for inference and b) those that are successfully inferred
String eligibleFilename = "eligible_" + species + "_75.txt";
Expand All @@ -120,8 +118,7 @@ public static void inferEvents(Properties props, String species) throws Exceptio
}
EWASInferrer.readENSGMappingFile(species, pathToOrthopairs);
EWASInferrer.fetchAndSetUniprotDbInstance();
EWASInferrer.createEnsemblProteinDbInstance(speciesName, refDbUrl, refDbProteinUrl);
EWASInferrer.createEnsemblGeneDBInstance(speciesName, refDbUrl, refDbGeneUrl);
EWASInferrer.fetchAndSetEnsemblDbInstance(ensemblDatabaseType);

JSONObject altRefDbJSON = (JSONObject) speciesObject.get("alt_refdb");
if (altRefDbJSON != null)
Expand Down
135 changes: 15 additions & 120 deletions src/main/resources/Species.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,7 @@
"ddis":{
"mart_url":"http:\/\/protists.ensembl.org\/biomart\/martservice",
"refdb":{
"url":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/Info\/Index",
"ensg_access":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/geneview?gene=###ID###&db=core",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Dictyostelium discoideum_PROTEIN"
],
"access":"http:\/\/protists.ensembl.org\/Dictyostelium_discoideum\/Transcript\/ProteinSummary?peptide=###ID###"
"use_gk_central_ensembl_ref_db": "protist"
},
"alt_refdb":{
"url":"http:\/\/www.dictybase.org\/",
Expand All @@ -35,14 +28,7 @@
"url":"http:\/\/www.wormbase.org"
},
"refdb":{
"access":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/Transcript\/ProteinSummary?peptide=###ID###",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_C_elegans_PROTEIN"
],
"url":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/Info\/Index",
"ensg_access":"http:\/\/metazoa.ensembl.org\/Caenorhabditis_elegans\/geneview?gene=###ID###&db=core"
"use_gk_central_ensembl_ref_db": "main"
},
"name":[
"Caenorhabditis elegans"
Expand All @@ -58,14 +44,7 @@
"Saccharomyces cerevisiae"
],
"refdb":{
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_S_cerevisiae_PROTEIN"
],
"access":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/Transcript\/ProteinSummary?peptide=###ID###",
"url":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/Info\/Index",
"ensg_access":"http:\/\/fungi.ensembl.org\/Saccharomyces_cerevisiae\/geneview?gene=###ID###&db=core"
"use_gk_central_ensembl_ref_db": "fungi"
},
"alt_refdb":{
"access":"https:\/\/www.yeastgenome.org\/search?q=###ID###&category=locus",
Expand All @@ -87,14 +66,7 @@
],
"mart_group":"sscrofa_gene_ensembl",
"refdb":{
"ensg_access":"http:\/\/www.ensembl.org\/Sus_scrofa\/geneview?gene=###ID###&db=core",
"url":"http:\/\/www.ensembl.org\/Sus_scrofa\/Info\/Index\/",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Sus_scrofa_PROTEIN"
],
"access":"http:\/\/www.ensembl.org\/Sus_scrofa\/Transcript\/ProteinSummary?peptide=###ID###"
"use_gk_central_ensembl_ref_db": "main"
},
"abbreviation": "SSC"
},
Expand All @@ -105,27 +77,13 @@
"Homo sapiens"
],
"refdb":{
"url":"http:\/\/www.ensembl.org\/Homo_sapiens\/Info\/Index\/",
"ensg_access":"http:\/\/www.ensembl.org\/Homo_sapiens\/geneview?gene=###ID###&db=core",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Homo_sapiens_PROTEIN"
],
"access":"http:\/\/www.ensembl.org\/Homo_sapiens\/Transcript\/ProteinSummary?peptide=###ID###"
"use_gk_central_ensembl_ref_db": "main"
},
"abbreviation": "HSA"
},
"ggal":{
"refdb":{
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Gallus_gallus_PROTEIN"
],
"access":"http:\/\/www.ensembl.org\/Gallus_gallus\/Transcript\/ProteinSummary?peptide=###ID###",
"ensg_access":"http:\/\/www.ensembl.org\/Gallus_gallus\/geneview?gene=###ID###&db=core",
"url":"http:\/\/www.ensembl.org\/Gallus_gallus\/Info\/Index\/"
"use_gk_central_ensembl_ref_db": "main"
},
"mart_group":"ggallus_gene_ensembl",
"name":[
Expand All @@ -141,14 +99,7 @@
"Xenopus tropicalis"
],
"refdb":{
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Xenopus_tropicalis_PROTEIN"
],
"access":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/Transcript\/ProteinSummary?peptide=###ID###",
"url":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/Info\/Index\/",
"ensg_access":"http:\/\/www.ensembl.org\/Xenopus_tropicalis\/geneview?gene=###ID###&db=core"
"use_gk_central_ensembl_ref_db": "main"
},
"group":"Vertebrate",
"compara":"core",
Expand All @@ -170,27 +121,13 @@
},
"mart_url":"http:\/\/fungi.ensembl.org\/biomart\/martservice",
"refdb":{
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_S_pombe_PROTEIN"
],
"access":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/Transcript\/ProteinSummary?peptide=###ID###",
"ensg_access":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/geneview?gene=###ID###&db=core",
"url":"http:\/\/fungi.ensembl.org\/Schizosaccharomyces_pombe\/Info\/Index"
"use_gk_central_ensembl_ref_db": "fungi"
},
"abbreviation": "SPO"
},
"btau":{
"refdb":{
"url":"http:\/\/www.ensembl.org\/Bos_taurus\/Info\/Index\/",
"ensg_access":"http:\/\/www.ensembl.org\/Bos_taurus\/geneview?gene=###ID###&db=core",
"access":"http:\/\/www.ensembl.org\/Bos_taurus\/Transcript\/ProteinSummary?peptide=###ID###",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Bos_taurus_PROTEIN"
]
"use_gk_central_ensembl_ref_db": "main"
},
"mart_group":"btaurus_gene_ensembl",
"name":[
Expand All @@ -202,14 +139,7 @@
},
"mmus":{
"refdb":{
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Mus_musculus_PROTEIN"
],
"access":"http:\/\/www.ensembl.org\/Mus_musculus\/Transcript\/ProteinSummary?peptide=###ID###",
"url":"http:\/\/www.ensembl.org\/Mus_musculus\/Info\/Index\/",
"ensg_access":"http:\/\/www.ensembl.org\/Mus_musculus\/geneview?gene=###ID###&db=core"
"use_gk_central_ensembl_ref_db": "main"
},
"mart_group":"mmusculus_gene_ensembl",
"name":[
Expand All @@ -223,14 +153,7 @@
"compara":"core",
"group":"Vertebrate",
"refdb":{
"access":"http:\/\/www.ensembl.org\/Canis_familiaris\/Transcript\/ProteinSummary?peptide=###ID###",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Canis_PROTEIN"
],
"ensg_access":"http:\/\/www.ensembl.org\/Canis_familiaris\/geneview?gene=###ID###&db=core",
"url":"http:\/\/www.ensembl.org\/Canis_familiaris\/Info\/Index\/"
"use_gk_central_ensembl_ref_db": "main"
},
"name":[
"Canis familiaris"
Expand All @@ -244,14 +167,7 @@
"Plasmodium falciparum"
],
"refdb":{
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_P_falciparum_PROTEIN"
],
"access":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/Transcript\/ProteinSummary?peptide=###ID###",
"ensg_access":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/geneview?gene=###ID###&db=core",
"url":"http:\/\/protists.ensembl.org\/Plasmodium_falciparum\/Info\/Index"
"use_gk_central_ensembl_ref_db": "protist"
},
"mart_url":"http:\/\/protists.ensembl.org\/biomart\/martservice",
"alt_refdb":{
Expand All @@ -269,14 +185,7 @@
"compara":"core",
"group":"Metazoan",
"refdb":{
"ensg_access":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster\/geneview?gene=###ID###&db=core",
"url":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster",
"access":"http:\/\/metazoa.ensembl.org\/Drosophila_melanogaster\/Transcript\/ProteinSummary?peptide=###ID###",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_D_melanogaster_PROTEIN"
]
"use_gk_central_ensembl_ref_db": "main"
},
"alt_refdb":{
"access":"https:\/\/flybase.org\/reports\/###ID###.html",
Expand All @@ -297,14 +206,7 @@
"Danio rerio"
],
"refdb":{
"access":"http:\/\/www.ensembl.org\/Danio_rerio\/Transcript\/ProteinSummary?peptide=###ID###",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Danio_rerio_PROTEIN"
],
"ensg_access":"http:\/\/www.ensembl.org\/Danio_rerio\/geneview?gene=###ID###&db=core",
"url":"http:\/\/www.ensembl.org\/Danio_rerio\/Info\/Index\/"
"use_gk_central_ensembl_ref_db": "main"
},
"group":"Vertebrate",
"compara":"core",
Expand All @@ -314,14 +216,7 @@
"compara":"core",
"group":"Vertebrate",
"refdb":{
"ensg_access":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/geneview?gene=###ID###&db=core",
"url":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/Info\/Index\/",
"access":"http:\/\/www.ensembl.org\/Rattus_norvegicus\/Transcript\/ProteinSummary?peptide=###ID###",
"dbname":[
"ENSEMBL",
"Ensembl",
"ENSEMBL_Rattus_norvegicus_PROTEIN"
]
"use_gk_central_ensembl_ref_db": "main"
},
"mart_group":"rnorvegicus_gene_ensembl",
"name":[
Expand Down
Loading