Commit 2446a1b3 authored by tim.tucholski's avatar tim.tucholski
Browse files

Changed gene file reading for tribolium castaneum

parent 060b25d3
......@@ -14,7 +14,7 @@ public class OrthoAPI {
//NCBI IDs and OrthoDB URLs
private static final String DMEL = "7227"; //Drosophila melanogaster
/*private static final String DMEL = "7227"; //Drosophila melanogaster
private static final String DPSE = "7237"; //Drosophila pseudoobscura
private static final String DVIR = "7244"; //Drosophila virilis
private static final String DANA = "7217"; //Drosophila ananassae
......@@ -22,7 +22,7 @@ public class OrthoAPI {
private static final String TCAS = "7070"; //Tribolium castaneum
private static final String SEARCH_URL = "https://www.orthodb.org/v9/search?query=%s"; //For receiving the cluster IDs for an identifier
private static final String ORTHO_URL = "https://www.orthodb.org/v9/orthologs?id=%s?species=%s"; // Takes a cluster ID, returns all genes
private static final String OGDETAIL_URL = "https://www.orthodb.org/v9/ogdetails?id=%s"; //Takes a gene id (taxid:geneid) and returns relevant data
private static final String OGDETAIL_URL = "https://www.orthodb.org/v9/ogdetails?id=%s"; //Takes a gene id (taxid:geneid) and returns relevant data*/
/**
* Takes a JSON list of all known Flybase identifiers and returns a list of according gene clusters for each.
......
......@@ -96,44 +96,67 @@ public class Tribolium implements GeneSource {
/* Process the last semicolon-separated gff column and add the sub-columns to the list*/
String[] attribute_list = wordsArray[8].split(";");
String tc_id = null, gene_name = null;
LinkedList<String> tc_id_list = new LinkedList<>();
String tc_id = null;
for (String attribute : attribute_list) {
if (attribute.contains("gene_synonym")) {
gene_name = attribute.split("=")[1];
}
if (attribute.contains("Dbxref")) {
//if (attribute.contains("gene_synonym")) {
// gene_name = attribute.split("=")[1];
//}
if (attribute.contains("locus_tag") && !attribute.contains("old_locus_tag")) {
tc_id = attribute.split("=")[1];
tc_id = tc_id.replace("BEETLEBASE:","");
tc_id = tc_id.replace("TcasGA2_","");
/* If there are multiple IDs pertaining to a gene, create separate Gene IDs for them
However, cross-links are created for those genes as an extra attribute */
for(String sub_id : tc_id.split(",")) {
tc_id_list.add(sub_id);
}
}
refined_array.add(attribute);
if (attribute.contains("old_locus_tag")) {
String old_tc_id = attribute.split("=")[1];
old_tc_id = old_tc_id.replace("TcasGA2_","");
refined_array.add("old_locus_tag="+old_tc_id);
}
}
/* Check if the gene denoted by tc_id exists and add if needed */
Gene new_gene = database_tribolium.get(tc_id);
if (new_gene == null){
if (tc_id != null) {
id_list.put(tc_id, "7070"); //Adds gene identifier and NCBI ID (7070) to the list
for (String sub_id : tc_id_list) {
/* Check if the gene denoted by tc_id exists and add if needed */
Gene new_gene = database_tribolium.get(sub_id);
if (new_gene == null){
if (sub_id != null) {
id_list.put(sub_id, "7070"); //Adds gene identifier and NCBI ID (7070) to the list
}
new_gene = new Gene(sub_id, "trib_gff");
database_tribolium.put(sub_id, new_gene);
}
/* Add TC ID and cross-links (if any) to the refined array */
LinkedList<String> temp_array = refined_array;
temp_array.add("locus_tag=" + sub_id);
if (tc_id_list.size() > 1) {
String crosslink = tc_id.replace((sub_id + ","), "");
crosslink = crosslink.replace((sub_id), "");
temp_array.add("crosslink=" + crosslink);
}
new_gene = new Gene(tc_id, "trib_gff");
database_tribolium.put(tc_id, new_gene);
}
/* Iterate over the linked list and add all attributes to their specific gene entities*/
for (String attribute : refined_array) {
LinkedList<String> newlist = new LinkedList<>();
String[] new_attribute = attribute.split("=");
String attribute_key = new_attribute[0];
String attribute_value = new_attribute[1];
String[] potential_split = attribute_value.split("\\|");
if (potential_split.length > 1) {
for (String multiple_value : potential_split) {
newlist.add(multiple_value);
/* Iterate over the linked list and add all attributes to their specific gene entities*/
for (String attribute : temp_array) {
LinkedList<String> newlist = new LinkedList<>();
String[] new_attribute = attribute.split("=");
String attribute_key = new_attribute[0];
String attribute_value = new_attribute[1];
String[] potential_split = attribute_value.split("\\|");
if (potential_split.length > 1) {
for (String multiple_value : potential_split) {
newlist.add(multiple_value);
}
}else{
newlist.add(attribute_value);
}
}else{
newlist.add(attribute_value);
new_gene.addInformation(attribute_key, newlist);
}
new_gene.addInformation(attribute_key, newlist);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment