Commit ba56b2dc authored by cnguyen2's avatar cnguyen2
Browse files

Reduce memory usage when initialize data

parent 9a6eb2ff
Pipeline #219748 passed with stages
in 10 minutes and 10 seconds
......@@ -16,7 +16,7 @@ build native:
rules:
- if: '$BUILD_TYPE == "native" && $CI_COMMIT_REF_NAME == $CI_DEFAULT_BRANCH'
script:
- mvn -f $CI_PROJECT_DIR/pom.xml clean package -Pnative -Dquarkus.container-image.build=false
- mvn -f $CI_PROJECT_DIR/pom.xml clean package -Pnative
artifacts:
paths:
- $CI_PROJECT_DIR/target/*-runner
......@@ -27,7 +27,7 @@ build jvm:
rules:
- if: '$BUILD_TYPE == "jvm" && $CI_COMMIT_REF_NAME == $CI_DEFAULT_BRANCH'
script:
- mvn -f $CI_PROJECT_DIR/pom.xml clean package -Dquarkus.container-image.build=false
- mvn -f $CI_PROJECT_DIR/pom.xml clean package
artifacts:
paths:
- $CI_PROJECT_DIR/target/*-runner.jar
......
# Gene Info Service for the iBeetle-Base project
## Running the application in dev mode
## Development
To run in development mode (hot deployment with background compilation)
```
./mvnw quarkus:dev
```
## Packaging and containerizing
The application can be packaged and containerized by one command (docker required)
To create a jar file
```
./mvnw package
./mvnw clean package
```
To create a native app inside a container
```
./mvnw clean package -Pnative -Dquarkus.container-image.build=true
```
FROM registry.access.redhat.com/ubi8/ubi-minimal:8.3
FROM registry.access.redhat.com/ubi8/ubi-minimal:8.4
WORKDIR /work/
COPY target/*-runner /work/application
......
package ibb.api.geneinfo.loader;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.PostConstruct;
import javax.enterprise.context.Dependent;
import javax.transaction.Transactional;
import org.eclipse.microprofile.config.inject.ConfigProperty;
......@@ -16,6 +15,7 @@ import io.quarkus.runtime.Startup;
@Deprecated
@Startup
@Dependent
public class DrosophilaGeneLoader {
private static final Logger LOG = Logger.getLogger(DrosophilaGeneLoader.class);
......@@ -28,7 +28,6 @@ public class DrosophilaGeneLoader {
if (DrosophilaGene.count() == 0) {
LOG.info("Attempting to initialize Drosophila data...");
Set<String> idSet = new HashSet<>();
Parser.parseTSV(geneSetPath, List.of(
"organism",
......@@ -46,15 +45,16 @@ public class DrosophilaGeneLoader {
if (!"Dmel".equals(record.get("organism"))) return;
String id = record.get("gene_ID");
if (idSet.contains(id)) return;
DrosophilaGene gene = new DrosophilaGene();
DrosophilaGene gene = DrosophilaGene.findById(id);
if (gene != null) return;
gene = new DrosophilaGene();
gene.id = id;
gene.symbol = record.get("gene_symbol");
gene.fullname = record.get("gene_fullname");
gene.annotationId = record.get("annotation_ID");
gene.persist();
idSet.add(id);
});
}
......
package ibb.api.geneinfo.loader;
public class GeneLoader {
// TODO: Generic gene loader with user configurations
}
package ibb.api.geneinfo.loader;
import static java.util.stream.Collectors.toMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.PostConstruct;
import javax.enterprise.context.Dependent;
import javax.transaction.Transactional;
import org.eclipse.microprofile.config.inject.ConfigProperty;
......@@ -24,6 +19,7 @@ import io.quarkus.runtime.Startup;
@Deprecated
@Startup
@Dependent
public class TriboliumGeneLoader {
private static final Logger LOG = Logger.getLogger(TriboliumGeneLoader.class);
private static final Pattern TC_PATTERN = Pattern.compile("(TC[0-9]{6})");
......@@ -45,53 +41,70 @@ public class TriboliumGeneLoader {
public void load() {
if (TriboliumGene.count() == 0) {
LOG.info("Attempting to initialize Tribolium data...");
List<TriboliumGene> genes = new ArrayList<>();
Parser.parseGFF(gff, record -> Optional.of(record)
.filter(r -> "gene".equals(r.getFeature()))
.map(this::getTCNo)
.map(tc -> {
TriboliumGene gene = new TriboliumGene();
gene.id = tc;
gene.seqname = record.getSeqname();
gene.start = record.getStart();
gene.end = record.getEnd();
gene.strand = record.getStrand();
return gene;
})
.ifPresent(genes::add));
Map<String, TriboliumGene> geneMap = genes.stream()
.collect(toMap(gene -> gene.id, Function.identity()));
Parser.parseFasta(cdsFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.CDS = record.getSequence());
});
Parser.parseFasta(mRNAFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.mRNA = record.getSequence());
});
Parser.parseFasta(proteinFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.protein = record.getSequence());
});
TriboliumGene.persist(genes);
loadGenes();
loadCDS();
loadMRNAs();
loadProteins();
}
LOG.infov("Tribolium gene count: {0}", TriboliumGene.count());
}
@Transactional
private void loadGenes() {
Parser.parseGFF(gff, record -> Optional.of(record)
.filter(r -> "gene".equals(r.getFeature()))
.map(this::getTCNo)
.map(id -> {
TriboliumGene gene = new TriboliumGene();
gene.id = id;
gene.seqname = record.getSeqname();
gene.start = record.getStart();
gene.end = record.getEnd();
gene.strand = record.getStrand();
return gene;
})
.ifPresent(gene -> gene.persist()));
}
@Transactional
private void loadCDS() {
Parser.parseFasta(cdsFasta, record -> Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(id -> TriboliumGene.findById(id))
.ifPresent(obj -> {
TriboliumGene gene = (TriboliumGene) obj;
gene.CDS = record.getSequence();
gene.persist();
}));
}
@Transactional
private void loadMRNAs() {
Parser.parseFasta(mRNAFasta, record -> Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(id -> TriboliumGene.findById(id))
.ifPresent(obj -> {
TriboliumGene gene = (TriboliumGene) obj;
gene.mRNA = record.getSequence();
gene.persist();
}));
}
@Transactional
private void loadProteins() {
Parser.parseFasta(proteinFasta, record -> Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(id -> TriboliumGene.findById(id))
.ifPresent(obj -> {
TriboliumGene gene = (TriboliumGene) obj;
gene.protein = record.getSequence();
gene.persist();
}));
}
private String getTCNo(GFFRecord record) {
return getTCNo(record.getAttributes().get("locus_tag"));
}
......@@ -100,5 +113,4 @@ public class TriboliumGeneLoader {
Matcher matcher = TC_PATTERN.matcher(str);
return matcher.find() ? matcher.group(1) : null;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment