Commit 5074cf9e authored by cnguyen2's avatar cnguyen2
Browse files

Create db schema on start

parent 9811ac15
Pipeline #216773 passed with stages
in 3 minutes and 14 seconds
data
db
# Eclipse
.project
......
......@@ -25,38 +25,39 @@ public class DrosophilaGeneLoader {
@PostConstruct
@Transactional
public void load() {
if (DrosophilaGene.count() > 0) return;
if (DrosophilaGene.count() == 0) {
LOG.info("Attempting to initialize Drosophila data...");
Set<String> idSet = new HashSet<>();
LOG.info("Attempting to initialize Drosophila data...");
Set<String> idSet = new HashSet<>();
Parser.parseTSV(geneSetPath, List.of(
"organism",
"gene_type",
"gene_ID",
"gene_symbol",
"gene_fullname",
"annotation_ID",
"transcript_type",
"transcript_ID",
"transcript_symbol",
"polypeptide_ID",
"polypeptide_symbol"), record -> {
if (!"Dmel".equals(record.get("organism"))) return;
String id = record.get("gene_ID");
if (idSet.contains(id)) return;
DrosophilaGene gene = new DrosophilaGene();
gene.id = id;
gene.symbol = record.get("gene_symbol");
gene.fullname = record.get("gene_fullname");
gene.annotationId = record.get("annotation_ID");
gene.persist();
idSet.add(id);
});
}
Parser.parseTSV(geneSetPath, List.of(
"organism",
"gene_type",
"gene_ID",
"gene_symbol",
"gene_fullname",
"annotation_ID",
"transcript_type",
"transcript_ID",
"transcript_symbol",
"polypeptide_ID",
"polypeptide_symbol"), record -> {
if (!"Dmel".equals(record.get("organism"))) return;
String id = record.get("gene_ID");
if (idSet.contains(id)) return;
DrosophilaGene gene = new DrosophilaGene();
gene.id = id;
gene.symbol = record.get("gene_symbol");
gene.fullname = record.get("gene_fullname");
gene.annotationId = record.get("annotation_ID");
gene.persist();
idSet.add(id);
});
LOG.infov("Loaded {0} Drosophila genes", DrosophilaGene.count());
LOG.infov("Drosophila gene count: {0}", DrosophilaGene.count());
}
}
......@@ -14,6 +14,7 @@ import javax.annotation.PostConstruct;
import javax.transaction.Transactional;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import org.jboss.logging.Logger;
import ibb.api.geneinfo.model.TriboliumGene;
import ibb.api.geneinfo.parser.FastaRecord;
......@@ -24,6 +25,7 @@ import io.quarkus.runtime.Startup;
@Deprecated
@Startup
public class TriboliumGeneLoader {
private static final Logger LOG = Logger.getLogger(TriboliumGeneLoader.class);
private static final Pattern TC_PATTERN = Pattern.compile("(TC[0-9]{6})");
@ConfigProperty(name = "data.tribolium.gene.gff")
......@@ -41,49 +43,53 @@ public class TriboliumGeneLoader {
@PostConstruct
@Transactional
public void load() {
List<TriboliumGene> genes = new ArrayList<>();
Parser.parseGFF(gff, record -> Optional.of(record)
.filter(r -> "gene".equals(r.getFeature()))
.map(this::getTCNo)
.map(tc -> {
TriboliumGene gene = new TriboliumGene();
gene.id = tc;
gene.seqname = record.getSeqname();
gene.start = record.getStart();
gene.end = record.getEnd();
gene.strand = record.getStrand();
return gene;
})
.ifPresent(genes::add));
Map<String, TriboliumGene> geneMap = genes.stream()
.collect(toMap(gene -> gene.id, Function.identity()));
Parser.parseFasta(cdsFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.CDS = record.getSequence());
});
Parser.parseFasta(mRNAFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.mRNA = record.getSequence());
});
Parser.parseFasta(proteinFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.protein = record.getSequence());
});
TriboliumGene.persist(genes);
if (TriboliumGene.count() == 0) {
LOG.info("Attempting to initialize Tribolium data...");
List<TriboliumGene> genes = new ArrayList<>();
Parser.parseGFF(gff, record -> Optional.of(record)
.filter(r -> "gene".equals(r.getFeature()))
.map(this::getTCNo)
.map(tc -> {
TriboliumGene gene = new TriboliumGene();
gene.id = tc;
gene.seqname = record.getSeqname();
gene.start = record.getStart();
gene.end = record.getEnd();
gene.strand = record.getStrand();
return gene;
})
.ifPresent(genes::add));
Map<String, TriboliumGene> geneMap = genes.stream()
.collect(toMap(gene -> gene.id, Function.identity()));
Parser.parseFasta(cdsFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.CDS = record.getSequence());
});
Parser.parseFasta(mRNAFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.mRNA = record.getSequence());
});
Parser.parseFasta(proteinFasta, record -> {
Optional.of(record)
.map(FastaRecord::getHeader)
.map(this::getTCNo)
.map(geneMap::get)
.ifPresent(gene -> gene.protein = record.getSequence());
});
TriboliumGene.persist(genes);
}
LOG.infov("Tribolium gene count: {0}", TriboliumGene.count());
}
private String getTCNo(GFFRecord record) {
......@@ -94,4 +100,5 @@ public class TriboliumGeneLoader {
Matcher matcher = TC_PATTERN.matcher(str);
return matcher.find() ? matcher.group(1) : null;
}
}
......@@ -2,50 +2,54 @@ quarkus:
http:
cors:
~: true
container-image:
group: ibb/api
name: geneinfoservice
tag: latest
registry: docker.gitlab.gwdg.de
build: true
datasource:
db-kind: h2
jdbc:
url: jdbc:h2:./geneinfoservice
url: jdbc:h2:./db/geneinfoservice
hibernate-orm:
database:
generation:
~: update
data:
dir: ./data
drosophila:
gene:
tsv: /data/fbgn_fbtr_fbpp_expanded_fb_2020_02.tsv.gz
tsv: ${data.dir}/fbgn_fbtr_fbpp_expanded_fb_2020_02.tsv.gz
tribolium:
gene:
gff: /data/OGS3.gff.gz
gff: ${data.dir}/OGS3.gff.gz
cds:
fasta: /data/OGS3_CDS.fasta.gz
fasta: ${data.dir}/OGS3_CDS.fasta.gz
mrna:
fasta: /data/OGS3_mRNA.fasta.gz
fasta: ${data.dir}/OGS3_mRNA.fasta.gz
protein:
fasta: /data/OGS3_proteins.fasta.gz
fasta: ${data.dir}/OGS3_proteins.fasta.gz
"%dev":
data:
dir: ../sample_data
drosophila:
gene:
tsv: ../sample_data/drosophila.gene.tsv.gz
tsv: ${data.dir}/drosophila.gene.tsv.gz
tribolium:
gene:
gff: ../sample_data/tribolium.gene.gff.gz
gff: ${data.dir}/tribolium.gene.gff.gz
cds:
fasta: ../sample_data/tribolium.cds.fasta.gz
fasta: ${data.dir}/tribolium.cds.fasta.gz
mrna:
fasta: ../sample_data/tribolium.mrna.fasta.gz
fasta: ${data.dir}/tribolium.mrna.fasta.gz
protein:
fasta: ../sample_data/tribolium.protein.fasta.gz
fasta: ${data.dir}/tribolium.protein.fasta.gz
quarkus:
datasource:
jdbc:
url: jdbc:h2:./geneinfoservice
hibernate-orm:
database:
generation:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment