Commit 9ac8feb8 authored by tim.tucholski's avatar tim.tucholski
Browse files

Integrated tribolium, fixed gene info service output

parent d2e7c3e2
This diff is collapsed.
package org.de.unigoettingen.ibeetlebase.api;
import org.de.unigoettingen.ibeetlebase.api.core.FlyBase;
import org.de.unigoettingen.ibeetlebase.api.core.Tribolium;
import org.de.unigoettingen.ibeetlebase.api.core.Gene;
import org.eclipse.microprofile.metrics.annotation.Metered;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import javax.json.Json;
import javax.json.JsonArray;
import javax.json.JsonNumber;
import javax.json.JsonObject;
import javax.json.JsonString;
import javax.json.JsonValue;
import javax.json.JsonValue.ValueType;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.core.MediaType;
......@@ -17,22 +30,85 @@ import javax.inject.Inject;
public class GeneinfoResource {
private static final String VERSION_STRING = "/{dataversion : [0-9]{6}|current}/v1";
//private static final int overlapSize = 1; // count registered sources
@Inject
FlyBase fb;
@Inject
Tribolium tb;
/**
* REST Endpoint for the Flybase data
* @param dataversion Version ID of the requested data - Required so older versions can be requested in the future
* @param fbgn The flybase identifier that information is requested for
* @return Gene information for all matching flybase gene identifiers (FBgn)
*/
@GET
@Metered
@Path(VERSION_STRING + "/flybase/{fbgn: FBgn[0-9]{7}}")
public Gene getFlybaseInfo(@PathParam("dataversion") String dataversion, @PathParam("fbgn") String fbgn) {
public Gene getFlybaseInfo(@PathParam("dataversion") final String dataversion, @PathParam("fbgn") final String fbgn) {
if (!checkFBgnID(fbgn)) {
return null;
}
return fb.retrieve(fbgn);
}
private boolean checkFBgnID(String fbgn) {
/**
* REST Endpoint for the Tribolium data (from the iBB gff file)
* @param dataversion Version ID of the requested data - Required so older versions can be requested in the future
* @param tc The TC identifier that information is requested for
* @return Gene information for all matching tribolium gene identifiers (TC)
*/
@GET
@Metered
@Path(VERSION_STRING + "/tribolium/{tc: TC[0-9]{6}}")
public Gene getTriboliumInfo(@PathParam("dataversion") final String dataversion, @PathParam("tc") final String tc) {
if (!checkTCID(tc)) {
return null;
}
return tb.retrieve(tc);
}
@GET
@Metered
@Path(VERSION_STRING + "/flybase/all")
public JsonObject getFlybaseIDs(@PathParam("dataversion") final String dataversion) {
JsonObject resultObject = Json.createObjectBuilder().build();
int id_counter = 0;
Map<String, String> ids = fb.getAllIDs();
for (String gene_key: ids.keySet()) {
JsonArray gene_name = Json.createArrayBuilder().build();
gene_name.add(Json.createObjectBuilder().add(gene_key, ids.get(gene_key)).build());
resultObject.put(Integer.toString(id_counter), gene_name);
}
return resultObject;
}
@GET
@Metered
@Path(VERSION_STRING + "/tribolium/all")
public JsonObject getTriboliumIDs(@PathParam("dataversion") final String dataversion) {
return tb.getAllIDs();
}
/**
* Checks if a FlyBase gene identifier (FBgn) is in the right format
* @param fbgn The Flybase gene identifier to be checked
* @return True if the identifier is correctly formatted, False otherwise
*/
private boolean checkFBgnID(final String fbgn) {
return fbgn.matches("FBgn[0-9]{7}");
}
/**
* Checks if a tribolium gene identifier (TC) is in the right format
* @param tc The TC gene identifier to be checked
* @return True if the identifier is correctly formatted, False otherwise
*/
private boolean checkTCID(final String tc) {
return tc.matches("TC[0-9]{6}");
}
}
\ No newline at end of file
......@@ -15,10 +15,15 @@ import org.de.unigoettingen.ibeetlebase.AppServiceBean;
public class FlyBase implements GeneSource {
private static final String SOURCE = "flybase";
private Map<String, Gene> database;
private Map<String, String> id_list;
@Inject
AppServiceBean service;
/**
* Fills the database with information from the gene file by calling the according method (only if the database is empty)
* @return The database containing all information from the FlyBase gene file
*/
private Map<String, Gene> getDB() {
if (this.database == null) {
int[] column_ind = new int[]{0,1,2,3,4,5,6,7};
......@@ -29,6 +34,11 @@ public class FlyBase implements GeneSource {
return database;
}
/**
* Prompts the search for gene information for a provided gene identifier
* @param fbgn The gene identifier that data is requested for
* @return The results of the search
*/
@Override
public Gene retrieve(String fbgn) {
Gene gene_to_search = new Gene();
......@@ -37,18 +47,31 @@ public class FlyBase implements GeneSource {
return search(fbgn, gene_to_search);
}
/**
* Called by retrieve(), returns the gene information of all genes found in the database that match the given identifier.
* Returns null if the identifier does not match any genes in the database.
* @param fbgn The gene identifier that is to be matched with the database
* @param resultObject A new Gene entity with the ID and source database already included
* @return The resulting Gene entity with the requested information, or null if no information is found.
*/
@Override
public Gene search(String fbgn, Gene resultObject) {
if (getDB().containsKey(fbgn)) {
Gene databaseEntry = getDB().get(fbgn); //Picks gene to be searched from the contained database
for (String column_key : databaseEntry.getInformation().keySet()) {
resultObject.addColumn(column_key, databaseEntry.getInformation().get(column_key));
resultObject.addInformation(column_key, databaseEntry.getInformation().get(column_key));
}
return resultObject;
}
return null;
}
/**
* Parses the gene database file and saves the contained information
* @param column_indices Indices of all columns in the file that are to be saved
* @param column_descriptors Column names for all columns that are to be saved, in the same order as the indices
* @return A database collection filled with information from the parsed file, or null if an exception occurs
*/
private Map<String, Gene> readGeneFile(int[] column_indices, String[] column_descriptors) {
try {
Map<String, Gene> database_flybase = new HashMap<>();
......@@ -66,13 +89,15 @@ public class FlyBase implements GeneSource {
}
wordsArray = lineJustFetched.split("\t");
String fbgn_id = wordsArray[2]; //wordsArray[2] contains FBGN id
/* Check if the gene denoted by fbgn_id exists and add if needed */
Gene new_gene = database_flybase.get(fbgn_id);
if (new_gene == null){
id_list.put(wordsArray[2], wordsArray[3]); //Adds gene identifier and full name to the id list
new_gene = new Gene(fbgn_id, "flybase");
database_flybase.put(fbgn_id, new_gene);
}
for (int index : column_indices) {
LinkedList<String> newlist = new LinkedList<>();
String[] potential_split = wordsArray[index].split(",");
......@@ -83,10 +108,9 @@ public class FlyBase implements GeneSource {
}else{
newlist.add(wordsArray[index]);
}
new_gene.addColumn(column_descriptors[index], newlist);
new_gene.addInformation(column_descriptors[index], newlist);
}
//database_flybase.put(new_gene.get(),o);
}
}
return database_flybase;
......@@ -98,5 +122,8 @@ public class FlyBase implements GeneSource {
return null;
}
public Map<String, String> getAllIDs() {
return id_list;
}
}
\ No newline at end of file
......@@ -3,61 +3,95 @@ package org.de.unigoettingen.ibeetlebase.api.core;
import java.util.*;
public class Gene {
private String format = "flybase";
private String gene_id;
private Map<String, Collection<String>> columns;
private String format = "flybase"; // Identifier for the source database which also implies a certain format of the contained information
private String gene_id; // Gene identifier
private Map<String, Collection<String>> columns; // Holds the gene information - The first String is the key, the String collection can hold N values for each key
/* Basic constructor */
/**
* Standard constructor for the Gene class
*/
public Gene(){
super();
}
/* Creates a gene denoted by ID and format */
/**
* Creates a Gene entity denoted by the format and the Gene ID
* @param gene_id The gene identifier for the created gene
* @param format Identifies the source database
*/
public Gene(String gene_id, String format) {
this.gene_id = gene_id;
this.format = format;
}
/* Creates a gene denoted with complete information */
/**
* Creates a Gene entity denoted by format and gene with a passed gene information data set
* @param gene_id The gene identifier for the created gene
* @param columns The gene information data, stored in a map
* @param format Identifies the source database
*/
public Gene(String gene_id, Map<String, Collection<String>> columns, String format) {
this.gene_id = gene_id;
this.columns = columns;
this.format = format;
}
/* Returns Gene ID */
/**
* Returns the gene identifier
* @return The gene ID pertaining to the entity
*/
public String getGeneID() {
return gene_id;
}
/* Returns format */
/**
* Returns the source database identifier
* @return The source database identifier of the gene entity
*/
public String getFormat() {
return format;
}
/* Primary method to access the information for a specific gene */
/**
* Returns the contained gene information
* @return The gene information contained in the gene entity
*/
public Map<String, Collection<String>> getInformation() {
return columns;
}
/* Wholistically change information for a specific gene after creation
This is to replaced with a more dynamic method later */
/**
* Adds or replaces complete information for the gene entity
* This is to be replaced by a more dynamic method later
* @param columns The data to be held by the gene entity, stored in a map
*/
public void setGeneInfo(Map<String, Collection<String>> columns) {
this.columns = columns;
}
/* Set Gene ID */
/**
* Sets the gene ID to a specific value
* @param gene_id The gene ID value to be set for the gene entity
*/
public void setGeneID(String gene_id) {
this.gene_id = gene_id;
}
/* Set Gene format */
/**
* Sets the format to a specific value
* @param format The format value to be set for the gene entity
*/
public void setFormat(String format) {
this.format = format;
}
/* Adds a single new column of information to the gene */
public void addColumn(String column_key, Collection<String> values) {
/**
* Adds a single data column - one key plus an arbitrary number of values - to the gene entity
* If the specific column already exists, only the information is added to the existing column
* @param column_key The key String (for example "Gene ID")
* @param values The values (stored in a map)
*/
public void addInformation(String column_key, Collection<String> values) {
if (this.columns == null) {
this.columns = new HashMap<String, Collection<String>>();
}
......@@ -65,15 +99,21 @@ public class Gene {
columns.put(column_key, new LinkedList<>());
}
for (String new_value : values) {
columns.get(column_key).add(new_value);
if (!columns.get(column_key).contains(new_value)) {
columns.get(column_key).add(new_value);
}
}
}
/* Adds multiple new columns, stored in a map */
/**
* Adds multiple data columns (stored in a map) to the gene entity
* @param new_columns The data to be added
*/
public void addMultipleColumns(Map<String, Collection<String>> new_columns) {
for (String key : new_columns.keySet()) {
this.addColumn(key, new_columns.get(key));
this.addInformation(key, new_columns.get(key));
}
}
}
\ No newline at end of file
package org.de.unigoettingen.ibeetlebase.api.core;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.LinkedList;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
import org.de.unigoettingen.ibeetlebase.AppServiceBean;
@ApplicationScoped
public class Tribolium implements GeneSource {
private static final String SOURCE = "trib_gff";
private Map<String, Gene> database;
private Map<String, String> id_list = new HashMap<>();
@Inject
AppServiceBean service;
/**
* Fills the database with information from the gene file by calling the according method (only if the database is empty)
* @return The database containing all information from the tribolium gene file
*/
private Map<String, Gene> getDB() {
if (this.database == null) {
String[] column_des = new String[]{"seqname", "source", "feature", "start", "end", "score", "strand", "frame"};
this.database = readGeneFile(column_des);
}
return database;
}
/**
* Prompts the search for gene information for a provided gene identifier
* @param tc The gene identifier that data is requested for
* @return The results of the search
*/
@Override
public Gene retrieve(String tc) {
Gene gene_to_search = new Gene();
gene_to_search.setGeneID(tc);
gene_to_search.setFormat(SOURCE);
return search(tc, gene_to_search);
}
/**
* Called by retrieve(), returns the gene information of all genes found in the database that match the given identifier.
* Returns null if the identifier does not match any genes in the database.
* @param tc The gene identifier that is to be matched with the database
* @param resultObject A new Gene entity with the ID and source database already included
* @return The resulting Gene entity with the requested information, or null if no information is found.
*/
@Override
public Gene search(String tc, Gene resultObject) {
if (getDB().containsKey(tc)) {
Gene databaseEntry = getDB().get(tc); //Picks gene to be searched from the contained database
for (String column_key : databaseEntry.getInformation().keySet()) {
resultObject.addInformation(column_key, databaseEntry.getInformation().get(column_key));
}
return resultObject;
}
return null;
}
/**
* Parses the gene database file and saves the contained information
* @param column_descriptors Column names for the tab-separated columns of the gff file
* @return A database collection filled with information from the parsed file, or null if an exception occurs
*/
private Map<String, Gene> readGeneFile(String[] column_descriptors) {
try {
Map<String, Gene> database_tribolium = new HashMap<>();
try (BufferedReader buf = new BufferedReader(
new FileReader(new File(service.getDataDir(),"tribolium.gff")))) {
String lineJustFetched;
String[] wordsArray;
while (true) {
lineJustFetched = buf.readLine();
if (lineJustFetched == null) {
break;
}
wordsArray = lineJustFetched.split("\t");
if (!wordsArray[2].equals("gene")) {
continue;
}
LinkedList<String> refined_array = new LinkedList<>(); // This will be iterated over to add all relevant information to the Gene entity
/* Add the first eight tab-separated columns of the gff file to the list */
for (int i = 0; i < 8; i++) {
refined_array.add(column_descriptors[i]+"="+wordsArray[i]);
}
/* Process the last semicolon-separated gff column and add the sub-columns to the list*/
String[] attribute_list = wordsArray[8].split(";");
String tc_id = null, gene_name = null;
for (String attribute : attribute_list) {
if (attribute.contains("gene_synonym")) {
gene_name = attribute.split("=")[1];
}
if (attribute.contains("Dbxref")) {
tc_id = attribute.split("=")[1];
tc_id = tc_id.replace("BEETLEBASE:","");
}
refined_array.add(attribute);
}
/* Check if the gene denoted by tc_id exists and add if needed */
Gene new_gene = database_tribolium.get(tc_id);
if (new_gene == null){
if (tc_id != null && gene_name != null) {
id_list.put(tc_id, gene_name); //Adds gene identifier and full name to the id list
}
new_gene = new Gene(tc_id, "trib_gff");
database_tribolium.put(tc_id, new_gene);
}
/* Iterate over the linked list and add all attributes to their specific gene entities*/
for (String attribute : refined_array) {
LinkedList<String> newlist = new LinkedList<>();
String[] new_attribute = attribute.split("=");
String attribute_key = new_attribute[0];
String attribute_value = new_attribute[1];
String[] potential_split = attribute_value.split("\\|");
if (potential_split.length > 1) {
for (String multiple_value : potential_split) {
newlist.add(multiple_value);
}
}else{
newlist.add(attribute_value);
}
new_gene.addInformation(attribute_key, newlist);
}
}
}
return database_tribolium;
} catch (IOException e) {
System.out.println("File not found: Database Tribolium.gff");
e.printStackTrace();
}
return null;
}
/**
* Returns a list of all identifiers and gene names
* @return A list containing identifiers and the according gene names
*/
public Map<String, String> getAllIDs() {
return id_list;
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment