Commit 2446a1b3 authored by tim.tucholski's avatar tim.tucholski
Browse files

Changed gene file reading for tribolium castaneum

parent 060b25d3
...@@ -14,7 +14,7 @@ public class OrthoAPI { ...@@ -14,7 +14,7 @@ public class OrthoAPI {
//NCBI IDs and OrthoDB URLs //NCBI IDs and OrthoDB URLs
private static final String DMEL = "7227"; //Drosophila melanogaster /*private static final String DMEL = "7227"; //Drosophila melanogaster
private static final String DPSE = "7237"; //Drosophila pseudoobscura private static final String DPSE = "7237"; //Drosophila pseudoobscura
private static final String DVIR = "7244"; //Drosophila virilis private static final String DVIR = "7244"; //Drosophila virilis
private static final String DANA = "7217"; //Drosophila ananassae private static final String DANA = "7217"; //Drosophila ananassae
...@@ -22,7 +22,7 @@ public class OrthoAPI { ...@@ -22,7 +22,7 @@ public class OrthoAPI {
private static final String TCAS = "7070"; //Tribolium castaneum private static final String TCAS = "7070"; //Tribolium castaneum
private static final String SEARCH_URL = "https://www.orthodb.org/v9/search?query=%s"; //For receiving the cluster IDs for an identifier private static final String SEARCH_URL = "https://www.orthodb.org/v9/search?query=%s"; //For receiving the cluster IDs for an identifier
private static final String ORTHO_URL = "https://www.orthodb.org/v9/orthologs?id=%s?species=%s"; // Takes a cluster ID, returns all genes private static final String ORTHO_URL = "https://www.orthodb.org/v9/orthologs?id=%s?species=%s"; // Takes a cluster ID, returns all genes
private static final String OGDETAIL_URL = "https://www.orthodb.org/v9/ogdetails?id=%s"; //Takes a gene id (taxid:geneid) and returns relevant data private static final String OGDETAIL_URL = "https://www.orthodb.org/v9/ogdetails?id=%s"; //Takes a gene id (taxid:geneid) and returns relevant data*/
/** /**
* Takes a JSON list of all known Flybase identifiers and returns a list of according gene clusters for each. * Takes a JSON list of all known Flybase identifiers and returns a list of according gene clusters for each.
......
...@@ -96,44 +96,67 @@ public class Tribolium implements GeneSource { ...@@ -96,44 +96,67 @@ public class Tribolium implements GeneSource {
/* Process the last semicolon-separated gff column and add the sub-columns to the list*/ /* Process the last semicolon-separated gff column and add the sub-columns to the list*/
String[] attribute_list = wordsArray[8].split(";"); String[] attribute_list = wordsArray[8].split(";");
String tc_id = null, gene_name = null; LinkedList<String> tc_id_list = new LinkedList<>();
String tc_id = null;
for (String attribute : attribute_list) { for (String attribute : attribute_list) {
if (attribute.contains("gene_synonym")) { //if (attribute.contains("gene_synonym")) {
gene_name = attribute.split("=")[1]; // gene_name = attribute.split("=")[1];
} //}
if (attribute.contains("Dbxref")) { if (attribute.contains("locus_tag") && !attribute.contains("old_locus_tag")) {
tc_id = attribute.split("=")[1]; tc_id = attribute.split("=")[1];
tc_id = tc_id.replace("BEETLEBASE:",""); tc_id = tc_id.replace("TcasGA2_","");
/* If there are multiple IDs pertaining to a gene, create separate Gene IDs for them
However, cross-links are created for those genes as an extra attribute */
for(String sub_id : tc_id.split(",")) {
tc_id_list.add(sub_id);
}
} }
refined_array.add(attribute); if (attribute.contains("old_locus_tag")) {
String old_tc_id = attribute.split("=")[1];
old_tc_id = old_tc_id.replace("TcasGA2_","");
refined_array.add("old_locus_tag="+old_tc_id);
}
} }
/* Check if the gene denoted by tc_id exists and add if needed */ for (String sub_id : tc_id_list) {
Gene new_gene = database_tribolium.get(tc_id); /* Check if the gene denoted by tc_id exists and add if needed */
if (new_gene == null){ Gene new_gene = database_tribolium.get(sub_id);
if (tc_id != null) { if (new_gene == null){
id_list.put(tc_id, "7070"); //Adds gene identifier and NCBI ID (7070) to the list if (sub_id != null) {
id_list.put(sub_id, "7070"); //Adds gene identifier and NCBI ID (7070) to the list
}
new_gene = new Gene(sub_id, "trib_gff");
database_tribolium.put(sub_id, new_gene);
}
/* Add TC ID and cross-links (if any) to the refined array */
LinkedList<String> temp_array = refined_array;
temp_array.add("locus_tag=" + sub_id);
if (tc_id_list.size() > 1) {
String crosslink = tc_id.replace((sub_id + ","), "");
crosslink = crosslink.replace((sub_id), "");
temp_array.add("crosslink=" + crosslink);
} }
new_gene = new Gene(tc_id, "trib_gff");
database_tribolium.put(tc_id, new_gene);
}
/* Iterate over the linked list and add all attributes to their specific gene entities*/ /* Iterate over the linked list and add all attributes to their specific gene entities*/
for (String attribute : refined_array) { for (String attribute : temp_array) {
LinkedList<String> newlist = new LinkedList<>(); LinkedList<String> newlist = new LinkedList<>();
String[] new_attribute = attribute.split("="); String[] new_attribute = attribute.split("=");
String attribute_key = new_attribute[0]; String attribute_key = new_attribute[0];
String attribute_value = new_attribute[1]; String attribute_value = new_attribute[1];
String[] potential_split = attribute_value.split("\\|"); String[] potential_split = attribute_value.split("\\|");
if (potential_split.length > 1) { if (potential_split.length > 1) {
for (String multiple_value : potential_split) { for (String multiple_value : potential_split) {
newlist.add(multiple_value); newlist.add(multiple_value);
}
}else{
newlist.add(attribute_value);
} }
}else{ new_gene.addInformation(attribute_key, newlist);
newlist.add(attribute_value);
} }
new_gene.addInformation(attribute_key, newlist);
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment