Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
multicrispr
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
loosolab
software
multicrispr
Commits
b2619ace
Commit
b2619ace
authored
3 years ago
by
Rene Wiegandt
Browse files
Options
Downloads
Patches
Plain Diff
04_bed_to_granges.R: Filtering range duplicates
parent
8cc4d37e
No related branches found
No related tags found
No related merge requests found
Pipeline
#246544
passed
3 years ago
Stage: test
Stage: deploy
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
R/04_bed_to_granges.R
+209
-207
209 additions, 207 deletions
R/04_bed_to_granges.R
with
209 additions
and
207 deletions
R/04_bed_to_granges.R
+
209
−
207
View file @
b2619ace
#=============================================================================
#=============================================================================
# bed_to_granges
# bed_to_granges
#=============================================================================
#=============================================================================
annotate_granges
<-
function
(
gr
,
txdb
){
annotate_granges
<-
function
(
gr
,
txdb
){
# Assert. Import. Comply.
# Assert. Import. Comply.
assert_is_all_of
(
gr
,
'GRanges'
)
assert_is_all_of
(
gr
,
'GRanges'
)
assert_is_all_of
(
txdb
,
'TxDb'
)
assert_is_all_of
(
txdb
,
'TxDb'
)
gene_id
<-
NULL
gene_id
<-
NULL
# Align seqlevelStyle if required
# Align seqlevelStyle if required
if
(
seqlevelsStyle
(
gr
)
!=
seqlevelsStyle
(
txdb
)){
if
(
seqlevelsStyle
(
gr
)
!=
seqlevelsStyle
(
txdb
)){
message
(
"Setting seqlevelsStyle(txdb) <- seqlevelsStyle(gr)"
)
message
(
"Setting seqlevelsStyle(txdb) <- seqlevelsStyle(gr)"
)
seqlevelsStyle
(
txdb
)
<-
seqlevelsStyle
(
gr
)
seqlevelsStyle
(
txdb
)
<-
seqlevelsStyle
(
gr
)
}
}
# Drop seqinfo (to overlap smoothly)
# Drop seqinfo (to overlap smoothly)
txranges
<-
GenomicFeatures
::
genes
(
txdb
)
%>%
txranges
<-
GenomicFeatures
::
genes
(
txdb
)
%>%
as.data.table
()
%>%
as.data.table
()
%>%
extract
(
seqlevelsInUse
(
gr
),
on
=
'seqnames'
)
%>%
extract
(
seqlevelsInUse
(
gr
),
on
=
'seqnames'
)
%>%
extract
(,
c
(
'seqnames'
,
'start'
,
'end'
,
'strand'
,
'gene_id'
),
extract
(,
c
(
'seqnames'
,
'start'
,
'end'
,
'strand'
,
'gene_id'
),
with
=
FALSE
)
%>%
with
=
FALSE
)
%>%
as
(
'GRanges'
)
as
(
'GRanges'
)
# Overlap
# Overlap
granno
<-
as.data.table
(
gr
)
%>%
granno
<-
as.data.table
(
gr
)
%>%
as
(
'GRanges'
)
%>%
as
(
'GRanges'
)
%>%
plyranges
::
join_overlap_left
(
txranges
)
%>%
plyranges
::
join_overlap_left
(
txranges
)
%>%
as.data.table
()
%>%
as.data.table
()
%>%
extract
(
!
is.na
(
gene_id
)
,
extract
(
!
is.na
(
gene_id
)
,
gene_id
:=
paste0
(
gene_id
,
collapse
=
';'
),
gene_id
:=
paste0
(
gene_id
,
collapse
=
';'
),
by
=
c
(
'seqnames'
,
'start'
,
'end'
,
'strand'
))
%>%
by
=
c
(
'seqnames'
,
'start'
,
'end'
,
'strand'
))
%>%
unique
()
%>%
unique
()
%>%
as
(
'GRanges'
)
as
(
'GRanges'
)
seqlevels
(
granno
)
%<>%
setdiff
(
'.'
)
# patch plyranges bug
seqlevels
(
granno
)
%<>%
setdiff
(
'.'
)
# patch plyranges bug
seqinfo
(
granno
)
<-
seqinfo
(
gr
)
seqinfo
(
granno
)
<-
seqinfo
(
gr
)
granno
granno
}
}
#' Read bedfile into GRanges
#' Read bedfile into GRanges
#' @param bedfile file path
#' @param bedfile file path
#' @param genome string: UCSC genome name (e.g. 'mm10')
#' @param genome string: UCSC genome name (e.g. 'mm10')
#' @param txdb NULL (default) or \code{\link[GenomicFeatures]{TxDb-class}}
#' @param txdb NULL (default) or \code{\link[GenomicFeatures]{TxDb-class}}
#' (used for gene annotation)
#' (used for gene annotation)
#' @param do_order TRUE (default) or FALSE: order on seqnames and star?
#' @param do_order TRUE (default) or FALSE: order on seqnames and star?
#' @param plot TRUE (default) or FALSE: plot karyogram?
#' @param plot TRUE (default) or FALSE: plot karyogram?
#' @param verbose TRUE (default) or FALSE
#' @param verbose TRUE (default) or FALSE
#' @return \code{\link[GenomicRanges]{GRanges-class}}
#' @return \code{\link[GenomicRanges]{GRanges-class}}
#' @seealso \code{\link{char_to_granges}}, \code{\link{genes_to_granges}}
#' @seealso \code{\link{char_to_granges}}, \code{\link{genes_to_granges}}
#' @examples
#' @examples
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' (gr <- bed_to_granges(bedfile, genome='mm10'))
#' (gr <- bed_to_granges(bedfile, genome='mm10'))
#' @export
#' @export
bed_to_granges
<-
function
(
bed_to_granges
<-
function
(
bedfile
,
bedfile
,
genome
,
genome
,
txdb
=
NULL
,
txdb
=
NULL
,
do_order
=
TRUE
,
do_order
=
TRUE
,
plot
=
TRUE
,
plot
=
TRUE
,
verbose
=
TRUE
verbose
=
TRUE
){
){
.
<-
NULL
.
<-
NULL
# Assert
# Assert
assert_all_are_existing_files
(
bedfile
)
assert_all_are_existing_files
(
bedfile
)
if
(
!
is.null
(
txdb
))
assert_is_all_of
(
txdb
,
'TxDb'
)
if
(
!
is.null
(
txdb
))
assert_is_all_of
(
txdb
,
'TxDb'
)
assert_is_a_bool
(
do_order
)
assert_is_a_bool
(
do_order
)
assert_is_a_bool
(
plot
)
assert_is_a_bool
(
plot
)
assert_is_a_bool
(
verbose
)
assert_is_a_bool
(
verbose
)
# Read
# Read
if
(
verbose
)
cmessage
(
'\tRead %s into GRanges'
,
basename
(
bedfile
))
if
(
verbose
)
cmessage
(
'\tRead %s into GRanges'
,
basename
(
bedfile
))
gr
<-
rtracklayer
::
import.bed
(
bedfile
,
genome
=
genome
)
gr
<-
rtracklayer
::
import.bed
(
bedfile
,
genome
=
genome
)
if
(
verbose
)
cmessage
(
'\t\t%d ranges on %d chromosomes'
,
if
(
verbose
)
cmessage
(
'\t\t%d ranges on %d chromosomes'
,
length
(
gr
),
length
(
unique
(
seqnames
(
gr
))))
length
(
gr
),
length
(
unique
(
seqnames
(
gr
))))
# Annotate
# Annotate
if
(
!
is.null
(
txdb
)){
if
(
!
is.null
(
txdb
)){
cmessage
(
'\t\tAnnotate with txdb'
)
cmessage
(
'\t\tAnnotate with txdb'
)
gr
%<>%
annotate_granges
(
txdb
)
gr
%<>%
annotate_granges
(
txdb
)
}
}
# Plot
# Plot
genome1
<-
unique
(
genome
(
gr
))
genome1
<-
unique
(
genome
(
gr
))
assert_is_scalar
(
genome1
)
assert_is_scalar
(
genome1
)
title
<-
paste0
(
genome1
,
': '
,
basename
(
bedfile
))
title
<-
paste0
(
genome1
,
': '
,
basename
(
bedfile
))
if
(
plot
)
plot_karyogram
(
gr
,
title
)
if
(
plot
)
plot_karyogram
(
gr
,
title
)
# Order
# Order
if
(
do_order
)
gr
%<>%
sort
(
ignore.strand
=
TRUE
)
if
(
do_order
)
gr
%<>%
sort
(
ignore.strand
=
TRUE
)
#%<>% extract( order(seqnames(.), start(.)))
#%<>% extract( order(seqnames(.), start(.)))
# Filter duplicates
# Record
gr
<-
unique
(
gr
)
names
(
gr
)
<-
gr
$
targetname
<-
make_unique_names
(
gr
,
'T'
)
gr
$
targetstart
<-
GenomicRanges
::
start
(
gr
)
# Record
gr
$
targetend
<-
GenomicRanges
::
end
(
gr
)
names
(
gr
)
<-
gr
$
targetname
<-
make_unique_names
(
gr
,
'T'
)
gr
$
targetstart
<-
GenomicRanges
::
start
(
gr
)
# Return
gr
$
targetend
<-
GenomicRanges
::
end
(
gr
)
gr
}
# Return
gr
}
#' Convert character vector into GRanges
#' @param x character vector
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' Convert character vector into GRanges
#' @return \code{\link[GenomicRanges]{GRanges-class}}
#' @param x character vector
#' @examples
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' require(magrittr)
#' @return \code{\link[GenomicRanges]{GRanges-class}}
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' @examples
#' x <- c(PRNP = 'chr20:4699600:+', # snp
#' require(magrittr)
#' HBB = 'chr11:5227002:-', # snp
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' HEXA = 'chr15:72346580-72346583:-', # del
#' x <- c(PRNP = 'chr20:4699600:+', # snp
#' CFTR = 'chr7:117559593-117559595:+') # ins
#' HBB = 'chr11:5227002:-', # snp
#' gr <- char_to_granges(x, bsgenome)
#' HEXA = 'chr15:72346580-72346583:-', # del
#' plot_intervals(gr, facet_var = c('targetname', 'seqnames'))
#' CFTR = 'chr7:117559593-117559595:+') # ins
#' @seealso \code{\link{bed_to_granges}}, \code{\link{genes_to_granges}}
#' gr <- char_to_granges(x, bsgenome)
#' @export
#' plot_intervals(gr, facet_var = c('targetname', 'seqnames'))
char_to_granges
<-
function
(
x
,
bsgenome
){
#' @seealso \code{\link{bed_to_granges}}, \code{\link{genes_to_granges}}
gr
<-
GenomicRanges
::
GRanges
(
x
,
seqinfo
=
BSgenome
::
seqinfo
(
bsgenome
))
#' @export
names
(
gr
)
<-
gr
$
targetname
<-
make_unique_names
(
gr
,
'T'
)
char_to_granges
<-
function
(
x
,
bsgenome
){
gr
$
targetstart
<-
GenomicRanges
::
start
(
gr
)
gr
<-
GenomicRanges
::
GRanges
(
x
,
seqinfo
=
BSgenome
::
seqinfo
(
bsgenome
))
gr
$
targetend
<-
GenomicRanges
::
end
(
gr
)
names
(
gr
)
<-
gr
$
targetname
<-
make_unique_names
(
gr
,
'T'
)
gr
gr
$
targetstart
<-
GenomicRanges
::
start
(
gr
)
}
gr
$
targetend
<-
GenomicRanges
::
end
(
gr
)
gr
}
#' Convert geneids into GRanges
#' @param file Gene identifier file (one per row)
#' @param geneids Gene identifier vector
#' Convert geneids into GRanges
#' @param complement TRUE (default) or FALSE: add complementary strand?
#' @param file Gene identifier file (one per row)
#' @param txdb \code{\link[GenomicFeatures]{TxDb-class}} or
#' @param geneids Gene identifier vector
#' \code{\link[ensembldb]{EnsDb-class}}
#' @param complement TRUE (default) or FALSE: add complementary strand?
#' @param plot TRUE (default) or FALSE
#' @param txdb \code{\link[GenomicFeatures]{TxDb-class}} or
#' @param verbose TRUE (default) or FALSE
#' \code{\link[ensembldb]{EnsDb-class}}
#' @return \code{\link[GenomicRanges]{GRanges-class}}
#' @param plot TRUE (default) or FALSE
#' @seealso \code{\link{char_to_granges}}, \code{\link{bed_to_granges}}
#' @param verbose TRUE (default) or FALSE
#' @examples
#' @return \code{\link[GenomicRanges]{GRanges-class}}
#' # Entrez
#' @seealso \code{\link{char_to_granges}}, \code{\link{bed_to_granges}}
#' #-------
#' @examples
#' genefile <- system.file('extdata/SRF.entrez', package='multicrispr')
#' # Entrez
#' geneids <- as.character(read.table(genefile)[[1]])
#' #-------
#' txdb <- getFromNamespace('TxDb.Mmusculus.UCSC.mm10.knownGene',
#' genefile <- system.file('extdata/SRF.entrez', package='multicrispr')
#' 'TxDb.Mmusculus.UCSC.mm10.knownGene')
#' geneids <- as.character(read.table(genefile)[[1]])
#' (gr <- genes_to_granges(geneids, txdb))
#' txdb <- getFromNamespace('TxDb.Mmusculus.UCSC.mm10.knownGene',
#' (gr <- genefile_to_granges(genefile, txdb))
#' 'TxDb.Mmusculus.UCSC.mm10.knownGene')
#'
#' (gr <- genes_to_granges(geneids, txdb))
#' # Ensembl
#' (gr <- genefile_to_granges(genefile, txdb))
#' #--------
#'
#' # txdb <- AnnotationHub::AnnotationHub()[["AH75036"]]
#' # Ensembl
#' # genefile <- system.file('extdata/SRF.ensembl', package='multicrispr')
#' #--------
#' # geneids <- as.character(read.table(genefile)[[1]])
#' # txdb <- AnnotationHub::AnnotationHub()[["AH75036"]]
#' # (gr <- genes_to_granges(geneids, txdb))
#' # genefile <- system.file('extdata/SRF.ensembl', package='multicrispr')
#' # (gr <- genefile_to_granges(genefile, txdb))
#' # geneids <- as.character(read.table(genefile)[[1]])
#' @export
#' # (gr <- genes_to_granges(geneids, txdb))
genes_to_granges
<-
function
(
#' # (gr <- genefile_to_granges(genefile, txdb))
geneids
,
#' @export
txdb
,
genes_to_granges
<-
function
(
complement
=
TRUE
,
geneids
,
plot
=
TRUE
,
txdb
,
verbose
=
TRUE
complement
=
TRUE
,
){
plot
=
TRUE
,
verbose
=
TRUE
# Assert
){
assert_is_character
(
geneids
)
assert_is_any_of
(
txdb
,
c
(
'TxDb'
,
'EnsDb'
))
# Assert
assert_is_a_bool
(
complement
)
assert_is_character
(
geneids
)
assert_is_a_bool
(
plot
)
assert_is_any_of
(
txdb
,
c
(
'TxDb'
,
'EnsDb'
))
assert_is_a_bool
(
complement
)
# Convert
assert_is_a_bool
(
plot
)
gr
<-
GenomicFeatures
::
genes
(
txdb
)[
geneids
]
if
(
verbose
)
cmessage
(
'\t\tConvert %d genes to %d GRanges'
,
# Convert
length
(
geneids
),
length
(
gr
))
gr
<-
GenomicFeatures
::
genes
(
txdb
)[
geneids
]
if
(
verbose
)
cmessage
(
'\t\tConvert %d genes to %d GRanges'
,
# Add complementary strand
length
(
geneids
),
length
(
gr
))
if
(
complement
){
gr
%<>%
add_inverse_strand
(
plot
=
FALSE
,
verbose
=
verbose
)
# Add complementary strand
}
if
(
complement
){
gr
%<>%
add_inverse_strand
(
plot
=
FALSE
,
verbose
=
verbose
)
# Plot
}
if
(
plot
)
plot_karyogram
(
gr
)
# Plot
# Record
if
(
plot
)
plot_karyogram
(
gr
)
names
(
gr
)
<-
gr
$
targetname
<-
make_unique_names
(
gr
,
'T'
)
gr
$
targetstart
<-
GenomicRanges
::
start
(
gr
)
# Record
gr
$
targetend
<-
GenomicRanges
::
end
(
gr
)
names
(
gr
)
<-
gr
$
targetname
<-
make_unique_names
(
gr
,
'T'
)
gr
$
targetstart
<-
GenomicRanges
::
start
(
gr
)
# Return
gr
$
targetend
<-
GenomicRanges
::
end
(
gr
)
gr
}
# Return
gr
}
#' @rdname genes_to_granges
#' @export
genefile_to_granges
<-
function
(
file
,
txdb
,
complement
=
TRUE
,
plot
=
TRUE
){
#' @rdname genes_to_granges
assert_all_are_existing_files
(
file
)
#' @export
geneids
<-
read.table
(
file
)[[
1
]]
%>%
as.character
()
genefile_to_granges
<-
function
(
file
,
txdb
,
complement
=
TRUE
,
plot
=
TRUE
){
genes_to_granges
(
geneids
,
txdb
,
complement
=
complement
,
plot
=
plot
)
assert_all_are_existing_files
(
file
)
}
geneids
<-
read.table
(
file
)[[
1
]]
%>%
as.character
()
genes_to_granges
(
geneids
,
txdb
,
complement
=
complement
,
plot
=
plot
)
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment