Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
iBB
API
geneinfoservice
Commits
5074cf9e
Commit
5074cf9e
authored
Jul 29, 2021
by
cnguyen2
Browse files
Create db schema on start
parent
9811ac15
Pipeline
#216773
passed with stages
in 3 minutes and 14 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
5074cf9e
data
db
# Eclipse
.project
...
...
src/main/java/ibb/api/geneinfo/loader/DrosophilaGeneLoader.java
View file @
5074cf9e
...
...
@@ -25,38 +25,39 @@ public class DrosophilaGeneLoader {
@PostConstruct
@Transactional
public
void
load
()
{
if
(
DrosophilaGene
.
count
()
>
0
)
return
;
if
(
DrosophilaGene
.
count
()
==
0
)
{
LOG
.
info
(
"Attempting to initialize Drosophila data..."
);
Set
<
String
>
idSet
=
new
HashSet
<>();
LOG
.
info
(
"Attempting to initialize Drosophila data..."
);
Set
<
String
>
idSet
=
new
HashSet
<>();
Parser
.
parseTSV
(
geneSetPath
,
List
.
of
(
"organism"
,
"gene_type"
,
"gene_ID"
,
"gene_symbol"
,
"gene_fullname"
,
"annotation_ID"
,
"transcript_type"
,
"transcript_ID"
,
"transcript_symbol"
,
"polypeptide_ID"
,
"polypeptide_symbol"
),
record
->
{
if
(!
"Dmel"
.
equals
(
record
.
get
(
"organism"
)))
return
;
String
id
=
record
.
get
(
"gene_ID"
);
if
(
idSet
.
contains
(
id
))
return
;
DrosophilaGene
gene
=
new
DrosophilaGene
();
gene
.
id
=
id
;
gene
.
symbol
=
record
.
get
(
"gene_symbol"
);
gene
.
fullname
=
record
.
get
(
"gene_fullname"
);
gene
.
annotationId
=
record
.
get
(
"annotation_ID"
);
gene
.
persist
();
idSet
.
add
(
id
);
});
}
Parser
.
parseTSV
(
geneSetPath
,
List
.
of
(
"organism"
,
"gene_type"
,
"gene_ID"
,
"gene_symbol"
,
"gene_fullname"
,
"annotation_ID"
,
"transcript_type"
,
"transcript_ID"
,
"transcript_symbol"
,
"polypeptide_ID"
,
"polypeptide_symbol"
),
record
->
{
if
(!
"Dmel"
.
equals
(
record
.
get
(
"organism"
)))
return
;
String
id
=
record
.
get
(
"gene_ID"
);
if
(
idSet
.
contains
(
id
))
return
;
DrosophilaGene
gene
=
new
DrosophilaGene
();
gene
.
id
=
id
;
gene
.
symbol
=
record
.
get
(
"gene_symbol"
);
gene
.
fullname
=
record
.
get
(
"gene_fullname"
);
gene
.
annotationId
=
record
.
get
(
"annotation_ID"
);
gene
.
persist
();
idSet
.
add
(
id
);
});
LOG
.
infov
(
"Loaded {0} Drosophila genes"
,
DrosophilaGene
.
count
());
LOG
.
infov
(
"Drosophila gene count: {0}"
,
DrosophilaGene
.
count
());
}
}
src/main/java/ibb/api/geneinfo/loader/TriboliumGeneLoader.java
View file @
5074cf9e
...
...
@@ -14,6 +14,7 @@ import javax.annotation.PostConstruct;
import
javax.transaction.Transactional
;
import
org.eclipse.microprofile.config.inject.ConfigProperty
;
import
org.jboss.logging.Logger
;
import
ibb.api.geneinfo.model.TriboliumGene
;
import
ibb.api.geneinfo.parser.FastaRecord
;
...
...
@@ -24,6 +25,7 @@ import io.quarkus.runtime.Startup;
@Deprecated
@Startup
public
class
TriboliumGeneLoader
{
private
static
final
Logger
LOG
=
Logger
.
getLogger
(
TriboliumGeneLoader
.
class
);
private
static
final
Pattern
TC_PATTERN
=
Pattern
.
compile
(
"(TC[0-9]{6})"
);
@ConfigProperty
(
name
=
"data.tribolium.gene.gff"
)
...
...
@@ -41,49 +43,53 @@ public class TriboliumGeneLoader {
@PostConstruct
@Transactional
public
void
load
()
{
List
<
TriboliumGene
>
genes
=
new
ArrayList
<>();
Parser
.
parseGFF
(
gff
,
record
->
Optional
.
of
(
record
)
.
filter
(
r
->
"gene"
.
equals
(
r
.
getFeature
()))
.
map
(
this
::
getTCNo
)
.
map
(
tc
->
{
TriboliumGene
gene
=
new
TriboliumGene
();
gene
.
id
=
tc
;
gene
.
seqname
=
record
.
getSeqname
();
gene
.
start
=
record
.
getStart
();
gene
.
end
=
record
.
getEnd
();
gene
.
strand
=
record
.
getStrand
();
return
gene
;
})
.
ifPresent
(
genes:
:
add
));
Map
<
String
,
TriboliumGene
>
geneMap
=
genes
.
stream
()
.
collect
(
toMap
(
gene
->
gene
.
id
,
Function
.
identity
()));
Parser
.
parseFasta
(
cdsFasta
,
record
->
{
Optional
.
of
(
record
)
.
map
(
FastaRecord:
:
getHeader
)
.
map
(
this
::
getTCNo
)
.
map
(
geneMap:
:
get
)
.
ifPresent
(
gene
->
gene
.
CDS
=
record
.
getSequence
());
});
Parser
.
parseFasta
(
mRNAFasta
,
record
->
{
Optional
.
of
(
record
)
.
map
(
FastaRecord:
:
getHeader
)
.
map
(
this
::
getTCNo
)
.
map
(
geneMap:
:
get
)
.
ifPresent
(
gene
->
gene
.
mRNA
=
record
.
getSequence
());
});
Parser
.
parseFasta
(
proteinFasta
,
record
->
{
Optional
.
of
(
record
)
.
map
(
FastaRecord:
:
getHeader
)
.
map
(
this
::
getTCNo
)
.
map
(
geneMap:
:
get
)
.
ifPresent
(
gene
->
gene
.
protein
=
record
.
getSequence
());
});
TriboliumGene
.
persist
(
genes
);
if
(
TriboliumGene
.
count
()
==
0
)
{
LOG
.
info
(
"Attempting to initialize Tribolium data..."
);
List
<
TriboliumGene
>
genes
=
new
ArrayList
<>();
Parser
.
parseGFF
(
gff
,
record
->
Optional
.
of
(
record
)
.
filter
(
r
->
"gene"
.
equals
(
r
.
getFeature
()))
.
map
(
this
::
getTCNo
)
.
map
(
tc
->
{
TriboliumGene
gene
=
new
TriboliumGene
();
gene
.
id
=
tc
;
gene
.
seqname
=
record
.
getSeqname
();
gene
.
start
=
record
.
getStart
();
gene
.
end
=
record
.
getEnd
();
gene
.
strand
=
record
.
getStrand
();
return
gene
;
})
.
ifPresent
(
genes:
:
add
));
Map
<
String
,
TriboliumGene
>
geneMap
=
genes
.
stream
()
.
collect
(
toMap
(
gene
->
gene
.
id
,
Function
.
identity
()));
Parser
.
parseFasta
(
cdsFasta
,
record
->
{
Optional
.
of
(
record
)
.
map
(
FastaRecord:
:
getHeader
)
.
map
(
this
::
getTCNo
)
.
map
(
geneMap:
:
get
)
.
ifPresent
(
gene
->
gene
.
CDS
=
record
.
getSequence
());
});
Parser
.
parseFasta
(
mRNAFasta
,
record
->
{
Optional
.
of
(
record
)
.
map
(
FastaRecord:
:
getHeader
)
.
map
(
this
::
getTCNo
)
.
map
(
geneMap:
:
get
)
.
ifPresent
(
gene
->
gene
.
mRNA
=
record
.
getSequence
());
});
Parser
.
parseFasta
(
proteinFasta
,
record
->
{
Optional
.
of
(
record
)
.
map
(
FastaRecord:
:
getHeader
)
.
map
(
this
::
getTCNo
)
.
map
(
geneMap:
:
get
)
.
ifPresent
(
gene
->
gene
.
protein
=
record
.
getSequence
());
});
TriboliumGene
.
persist
(
genes
);
}
LOG
.
infov
(
"Tribolium gene count: {0}"
,
TriboliumGene
.
count
());
}
private
String
getTCNo
(
GFFRecord
record
)
{
...
...
@@ -94,4 +100,5 @@ public class TriboliumGeneLoader {
Matcher
matcher
=
TC_PATTERN
.
matcher
(
str
);
return
matcher
.
find
()
?
matcher
.
group
(
1
)
:
null
;
}
}
src/main/resources/application.yaml
View file @
5074cf9e
...
...
@@ -2,50 +2,54 @@ quarkus:
http
:
cors
:
~
:
true
container-image
:
group
:
ibb/api
name
:
geneinfoservice
tag
:
latest
registry
:
docker.gitlab.gwdg.de
build
:
true
datasource
:
db-kind
:
h2
jdbc
:
url
:
jdbc:h2:./geneinfoservice
url
:
jdbc:h2:./db/geneinfoservice
hibernate-orm
:
database
:
generation
:
~
:
update
data
:
dir
:
./data
drosophila
:
gene
:
tsv
:
/
data/fbgn_fbtr_fbpp_expanded_fb_2020_02.tsv.gz
tsv
:
${
data
.dir}
/fbgn_fbtr_fbpp_expanded_fb_2020_02.tsv.gz
tribolium
:
gene
:
gff
:
/
data/OGS3.gff.gz
gff
:
${
data
.dir}
/OGS3.gff.gz
cds
:
fasta
:
/
data/OGS3_CDS.fasta.gz
fasta
:
${
data
.dir}
/OGS3_CDS.fasta.gz
mrna
:
fasta
:
/
data/OGS3_mRNA.fasta.gz
fasta
:
${
data
.dir}
/OGS3_mRNA.fasta.gz
protein
:
fasta
:
/
data/OGS3_proteins.fasta.gz
fasta
:
${
data
.dir}
/OGS3_proteins.fasta.gz
"
%dev"
:
data
:
dir
:
../sample_data
drosophila
:
gene
:
tsv
:
../sample_data
/drosophila.gene.tsv.gz
tsv
:
${data.dir}
/drosophila.gene.tsv.gz
tribolium
:
gene
:
gff
:
../sample_data
/tribolium.gene.gff.gz
gff
:
${data.dir}
/tribolium.gene.gff.gz
cds
:
fasta
:
../sample_data
/tribolium.cds.fasta.gz
fasta
:
${data.dir}
/tribolium.cds.fasta.gz
mrna
:
fasta
:
../sample_data
/tribolium.mrna.fasta.gz
fasta
:
${data.dir}
/tribolium.mrna.fasta.gz
protein
:
fasta
:
../sample_data
/tribolium.protein.fasta.gz
fasta
:
${data.dir}
/tribolium.protein.fasta.gz
quarkus
:
datasource
:
jdbc
:
url
:
jdbc:h2:./geneinfoservice
hibernate-orm
:
database
:
generation
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment