Commit 02108438 authored by cnguyen2's avatar cnguyen2
Browse files

Add CDS, mRNA, protein sequences and refactor

parent fdfde2a9
data
# Eclipse
.project
.classpath
......@@ -32,4 +34,4 @@ target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
release.properties
\ No newline at end of file
release.properties
This diff is collapsed.
This diff is collapsed.
......@@ -12,10 +12,10 @@
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<quarkus-plugin.version>1.10.3.Final</quarkus-plugin.version>
<quarkus-plugin.version>1.12.0.Final</quarkus-plugin.version>
<quarkus.platform.artifact-id>quarkus-universe-bom</quarkus.platform.artifact-id>
<quarkus.platform.group-id>io.quarkus</quarkus.platform.group-id>
<quarkus.platform.version>1.10.3.Final</quarkus.platform.version>
<quarkus.platform.version>1.12.0.Final</quarkus.platform.version>
<surefire-plugin.version>2.22.1</surefire-plugin.version>
</properties>
<dependencyManagement>
......@@ -44,38 +44,14 @@
<artifactId>rest-assured</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-cache</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-hibernate-orm-panache</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-hibernate-orm</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-infinispan-client</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-smallrye-openapi</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-smallrye-metrics</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-resteasy-jsonb</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-container-image-docker</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
......
......@@ -17,7 +17,8 @@
FROM registry.access.redhat.com/ubi8/ubi-minimal:8.1
WORKDIR /work/
COPY target/*-runner /work/application
COPY data/* /work/data/
#COPY data/* /work/data/
# set up permissions for user `1001`
RUN chmod 775 /work /work/application \
......
package de.unigoettingen.ibeetlebase.geneinfo;
import javax.enterprise.context.ApplicationScoped;
import javax.enterprise.event.Observes;
import io.quarkus.runtime.ShutdownEvent;
import io.quarkus.runtime.StartupEvent;
import org.jboss.logging.Logger;
@ApplicationScoped
public class AppServiceBean {
private static final Logger LOGGER = Logger.getLogger("ListenerBean");
/**
* Standard constructor for the service bean
*/
public AppServiceBean(){
super();
}
/**
* Logs the service start
* @param ev The event observed by the Logger
*/
void onStart(@Observes StartupEvent ev) {
LOGGER.info("The application is starting...");
}
/**
* Logs the service shutdown
* @param ev The event observed by the Logger
*/
void onStop(@Observes ShutdownEvent ev) {
LOGGER.info("The application is stopping...");
}
/**
* Returns the relative data directory path depending on the working directory
* @return The data directory
*/
public String getDataDir(){
if (System.getProperty("user.dir").endsWith("target")){
return "../data";
}
return "data";
}
}
\ No newline at end of file
package de.unigoettingen.ibeetlebase.geneinfo.model;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
public class Gene {
private String format = "flybase"; // Identifier for the source database which also implies a certain format of the contained information
private String gene_id; // Gene identifier
private Map<String, Collection<String>> columns; // Holds the gene information - The first String is the key, the String collection can hold N values for each key
/**
* Standard constructor for the Gene class
*/
public Gene(){
super();
}
/**
* Creates a Gene entity denoted by the format and the Gene ID
* @param gene_id The gene identifier for the created gene
* @param format Identifies the source database
*/
public Gene(String gene_id, String format) {
this.gene_id = gene_id;
this.format = format;
}
/**
* Creates a Gene entity denoted by format and gene with a passed gene information data set
* @param gene_id The gene identifier for the created gene
* @param columns The gene information data, stored in a map
* @param format Identifies the source database
*/
public Gene(String gene_id, Map<String, Collection<String>> columns, String format) {
this.gene_id = gene_id;
this.columns = columns;
this.format = format;
}
/**
* Returns the gene identifier
* @return The gene ID pertaining to the entity
*/
public String getGeneID() {
return gene_id;
}
/**
* Returns the source database identifier
* @return The source database identifier of the gene entity
*/
public String getFormat() {
return format;
}
/**
* Returns the contained gene information
* @return The gene information contained in the gene entity
*/
public Map<String, Collection<String>> getInformation() {
return columns;
}
/**
* Adds or replaces complete information for the gene entity
* This is to be replaced by a more dynamic method later
* @param columns The data to be held by the gene entity, stored in a map
*/
public void setGeneInfo(Map<String, Collection<String>> columns) {
this.columns = columns;
}
/**
* Sets the gene ID to a specific value
* @param gene_id The gene ID value to be set for the gene entity
*/
public void setGeneID(String gene_id) {
this.gene_id = gene_id;
}
/**
* Sets the format to a specific value
* @param format The format value to be set for the gene entity
*/
public void setFormat(String format) {
this.format = format;
}
/**
* Adds a single data column - one key plus an arbitrary number of values - to the gene entity
* If the specific column already exists, only the information is added to the existing column
* @param column_key The key String (for example "Gene ID")
* @param values The values (stored in a map)
*/
public void addInformation(String column_key, Collection<String> values) {
if (this.columns == null) {
this.columns = new HashMap<String, Collection<String>>();
}
if (!this.columns.containsKey(column_key)) {
columns.put(column_key, new LinkedList<>());
}
for (String new_value : values) {
if (!columns.get(column_key).contains(new_value)) {
columns.get(column_key).add(new_value);
}
}
}
/**
* Adds multiple data columns (stored in a map) to the gene entity
* @param new_columns The data to be added
*/
public void addMultipleColumns(Map<String, Collection<String>> new_columns) {
for (String key : new_columns.keySet()) {
this.addInformation(key, new_columns.get(key));
}
}
}
\ No newline at end of file
package de.unigoettingen.ibeetlebase.geneinfo.provider;
import java.io.IOException;
import javax.ws.rs.container.ContainerRequestContext;
import javax.ws.rs.container.ContainerRequestFilter;
import javax.ws.rs.ext.Provider;
import org.jboss.logging.Logger;
@Provider
public class RequestLoggingFilter implements ContainerRequestFilter {
private static final Logger LOG = Logger.getLogger(RequestLoggingFilter.class);
@Override
public void filter(ContainerRequestContext requestContext)
throws IOException {
LOG.infov("{0} - {1}", requestContext.getMethod(), requestContext.getUriInfo().getPath());
}
}
package de.unigoettingen.ibeetlebase.geneinfo.repository;
import java.util.Collection;
import de.unigoettingen.ibeetlebase.geneinfo.model.DrosophilaGene;
public interface DrosophilaGeneRepository extends Repository<DrosophilaGene> {
Collection<DrosophilaGene> getBySymbol(String symbol);
Collection<DrosophilaGene> getByFullname(String fullname);
Collection<DrosophilaGene> getByAnnotationId(String annotationId);
}
\ No newline at end of file
package de.unigoettingen.ibeetlebase.geneinfo.repository;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toCollection;
import static java.util.stream.Collectors.toMap;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
import de.unigoettingen.ibeetlebase.geneinfo.model.DrosophilaGene;
import de.unigoettingen.ibeetlebase.geneinfo.model.Gene;
import de.unigoettingen.ibeetlebase.geneinfo.service.FlyBase;
import io.quarkus.runtime.Startup;
@Startup
@ApplicationScoped
public class InMemoryDrosophilaGeneRepository implements DrosophilaGeneRepository {
@Inject
FlyBase service;
private Map<String, DrosophilaGene> mapFromId;
private Map<String, Collection<DrosophilaGene>> mapFromSymbol;
private Map<String, Collection<DrosophilaGene>> mapFromFullname;
private Map<String, Collection<DrosophilaGene>> mapFromAnnotationId;
@Override
public DrosophilaGene get(String id) {
return mapFromId.get(id);
}
@Override
public Collection<DrosophilaGene> get() {
return mapFromId.values();
}
@Override
public Collection<DrosophilaGene> getBySymbol(String symbol) {
return mapFromSymbol.getOrDefault(symbol, Collections.emptyList());
}
@Override
public Collection<DrosophilaGene> getByFullname(String fullname) {
return mapFromFullname.getOrDefault(fullname, Collections.emptyList());
}
@Override
public Collection<DrosophilaGene> getByAnnotationId(String annotationId) {
return mapFromAnnotationId.getOrDefault(annotationId, Collections.emptyList());
}
@PostConstruct
void init() {
loadData();
}
private void loadData() {
mapFromId = service.getDB().entrySet()
.stream()
.collect(toMap(Map.Entry::getKey,
entry -> convertToFlybaseGene(entry.getValue())));
mapFromSymbol = mapFromId.values()
.stream()
.collect(groupingBy(DrosophilaGene::getSymbol, toCollection(LinkedList::new)));
mapFromFullname = mapFromId.values()
.stream()
.collect(groupingBy(DrosophilaGene::getFullname, toCollection(LinkedList::new)));
mapFromAnnotationId = mapFromId.values()
.stream()
.collect(groupingBy(DrosophilaGene::getAnnotationId, toCollection(LinkedList::new)));
}
private DrosophilaGene convertToFlybaseGene(Gene gene) {
Map<String, Collection<String>> info = gene.getInformation();
return new DrosophilaGene(gene.getGeneID(),
collectionToString(organismToTaxonomyId(info.get("organism"))),
collectionToString(info.get("gene_type")),
collectionToString(info.get("gene_symbol")),
collectionToString(info.get("gene_fullname")),
collectionToString(info.get("annotation_ID")),
collectionToString(info.get("transcript_Type")),
info.get("transcript_ID").stream().collect(Collectors.toSet()));
}
private Collection<String> organismToTaxonomyId(Collection<String> organisms) {
return organisms.stream().map(o -> {
switch (o) {
case "Dana":
return "7217";
case "Dmel":
return "7227";
case "Dvir":
return "7244";
case "Dpse":
return "7237";
case "Dsim":
return "7240";
}
return null;
}).collect(Collectors.toList());
}
private String collectionToString(Collection<String> items) {
return items.stream().collect(Collectors.joining(", "));
}
}
package de.unigoettingen.ibeetlebase.geneinfo.repository;
import static java.util.stream.Collectors.toMap;
import java.util.Collection;
import java.util.Map;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
import de.unigoettingen.ibeetlebase.geneinfo.model.Gene;
import de.unigoettingen.ibeetlebase.geneinfo.model.TriboliumGene;
import de.unigoettingen.ibeetlebase.geneinfo.service.Tribolium;
import io.quarkus.runtime.Startup;
@Startup
@ApplicationScoped
public class InMemoryTriboliumGeneRepository implements TriboliumGeneRepository {
@Inject
Tribolium service;
private Map<String, TriboliumGene> mapFromId;
@Override
public TriboliumGene get(String id) {
return mapFromId.get(id);
}
@Override
public Collection<TriboliumGene> get() {
return mapFromId.values();
}
@PostConstruct
void init() {
loadData();
}
private void loadData() {
mapFromId = service.getDB().entrySet()
.stream()
.collect(toMap(Map.Entry::getKey,
entry -> convertToTriboliumGene(entry.getValue())));
}
private TriboliumGene convertToTriboliumGene(Gene gene) {
Map<String, Collection<String>> info = gene.getInformation();
return new TriboliumGene(gene.getGeneID(),
"7070",
collectionToString(info.get("seqname")),
collectionToString(info.get("source")),
collectionToString(info.get("feature")),
collectionToString(info.get("start")),
collectionToString(info.get("end")),
collectionToString(info.get("score")),
collectionToString(info.get("strand")),
collectionToString(info.get("frame")),
collectionToString(info.get("locus_tag")));
}
private String collectionToString(Collection<String> items) {
return items.stream().collect(Collectors.joining(", "));
}
}
package de.unigoettingen.ibeetlebase.geneinfo.repository;
import java.util.Collection;
public interface Repository<T> {
T get(String id);
Collection<T> get();
}
package de.unigoettingen.ibeetlebase.geneinfo.repository;
import de.unigoettingen.ibeetlebase.geneinfo.model.TriboliumGene;
public interface TriboliumGeneRepository extends Repository<TriboliumGene>{
}
package de.unigoettingen.ibeetlebase.geneinfo.service;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
import de.unigoettingen.ibeetlebase.geneinfo.AppServiceBean;
import de.unigoettingen.ibeetlebase.geneinfo.model.Gene;
@ApplicationScoped
public class FlyBase implements GeneSource {
private static final String SOURCE = "flybase";
private Map<String, Gene> database;
private Map<String, String> id_list = new HashMap<>();
@Inject
AppServiceBean service;
/**
* Fills the database with information from the gene file by calling the according method (only if the database is empty)
* @return The database containing all information from the FlyBase gene file
*/
public Map<String, Gene> getDB() {
if (this.database == null) {
int[] column_ind = new int[]{0,1,2,3,4,5,6,7};
String[] column_des = new String[]{"organism", "gene_type", "gene_ID", "gene_symbol", "gene_fullname",
"annotation_ID", "transcript_Type", "transcript_ID"};
this.database = readGeneFile(column_ind, column_des);
}
return database;
}
/**
* Prompts the search for gene information for a provided gene identifier
* @param fbgn The gene identifier that data is requested for
* @return The results of the search
*/
@Override
public Gene retrieve(String fbgn) {
Gene gene_to_search = new Gene();
gene_to_search.setGeneID(fbgn);
gene_to_search.setFormat(SOURCE);
return search(fbgn, gene_to_search);
}
/**
* Called by retrieve(), returns the gene information of all genes found in the database that match the given identifier.
* Returns null if the identifier does not match any genes in the database.
* @param fbgn The gene identifier that is to be matched with the database
* @param resultObject A new Gene entity with the ID and source database already included
* @return The resulting Gene entity with the requested information, or null if no information is found.
*/
@Override
public Gene search(String fbgn, Gene resultObject) {
if (getDB().containsKey(fbgn)) {
Gene databaseEntry = getDB().get(fbgn); //Picks gene to be searched from the contained database
for (String column_key : databaseEntry.getInformation().keySet()) {
resultObject.addInformation(column_key, databaseEntry.getInformation().get(column_key));
}
return resultObject;
}
return null;
}
/**
* Parses the gene database file and saves the contained information
* @param column_indices Indices of all columns in the file that are to be saved
* @param column_descriptors Column names for all columns that are to be saved, in the same order as the indices
* @return A database collection filled with information from the parsed file, or null if an exception occurs
*/
private Map<String, Gene> readGeneFile(int[] column_indices, String[] column_descriptors) {
try {
Map<String, Gene> database_flybase = new HashMap<>();
try (BufferedReader buf = new BufferedReader(
new FileReader(new File(service.getDataDir(),"fbgn_fbtr_fbpp_expanded_fb_2020_02.tsv")))) {