Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
multicrispr
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
loosolab
software
multicrispr
Commits
f837a153
Commit
f837a153
authored
3 years ago
by
Rene Wiegandt
Browse files
Options
Downloads
Patches
Plain Diff
Fix for #1
parent
b2619ace
Branches
dev
Branches containing commit
No related tags found
1 merge request
!2
Fix for #1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
R/05_manipulate_ranges.R
+8
-0
8 additions, 0 deletions
R/05_manipulate_ranges.R
R/06_find_spacers.R
+1
-0
1 addition, 0 deletions
R/06_find_spacers.R
R/08_score_ontargets.R
+235
-234
235 additions, 234 deletions
R/08_score_ontargets.R
with
244 additions
and
234 deletions
R/05_manipulate_ranges.R
+
8
−
0
View file @
f837a153
...
...
@@ -398,4 +398,12 @@ double_flank <- function(
newgr
}
#' Filter N
#'
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}
#' @return \code{\link[GenomicRanges]{GRanges-class}}
filter_N
<-
function
(
spacers
,
col
=
"crisprspacer"
){
spacers
<-
spacers
[
!
grepl
(
"N"
,
elementMetadata
(
spacers
)[,
col
])]
spacers
[
!
grepl
(
"N"
,
elementMetadata
(
spacers
)[,
"crisprpam"
])]
}
This diff is collapsed.
Click to expand it.
R/06_find_spacers.R
+
1
−
0
View file @
f837a153
...
...
@@ -258,6 +258,7 @@ find_spacers <- function(gr, bsgenome, spacer = strrep('N', 20), pam = 'NGG',
spacers
$
crisprspacer
<-
getSeq
(
bsgenome
,
spacers
,
as.character
=
TRUE
)
spacers
$
crisprpam
<-
getSeq
(
bsgenome
,
pams
,
as.character
=
TRUE
)
if
(
verbose
)
message
(
'\tFound '
,
length
(
spacers
),
' spacers'
)
spacers
%<>%
filter_N
()
# Add on/offtargets
spacers
%<>%
count_offtargets
(
bsgenome
,
targets
=
if
(
subtract_targets
)
gr
else
NULL
,
...
...
This diff is collapsed.
Click to expand it.
R/08_score_ontargets.R
+
235
−
234
View file @
f837a153
#' Add [-4, +3] context
#'
#' Add context for Doench2016 scoring
#'
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}: spacer ranges
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' @param verbose logical(1)
#' @return character vector
#' @examples
#' # PE example
#' #-----------
#' require(magrittr)
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp
#' HBB = 'chr11:5227002:-', # snp
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+'), # ins
#' bsgenome)
#' spacers <- find_spacers(extend_for_pe(gr), bsgenome, complement=FALSE)
#' (add_context(spacers, bsgenome))
#'
#' # TFBS example
#' #-------------
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' targets <- extend(bed_to_granges(bedfile, 'mm10'))
#' spacers <- find_spacers(targets, bsgenome)
#' (spacers %<>% add_context(bsgenome))
#' @noRd
add_context
<-
function
(
spacers
,
bsgenome
,
verbose
=
TRUE
){
# Prevent from stats::start from being used (leads to bug!)
contexts
<-
extend
(
spacers
,
-4
,
+6
)
spacers
$
crisprcontext
<-
BSgenome
::
getSeq
(
bsgenome
,
contexts
,
as.character
=
TRUE
)
if
(
verbose
)
message
(
'\t\tAdd (4-23-3) contextseqs'
)
spacers
}
doench2014
<-
function
(
contextseqs
,
python
=
NULL
,
virtualenv
=
NULL
,
condaenv
=
NULL
,
verbose
){
# Assert
assert_is_character
(
contextseqs
)
assert_all_are_true
(
nchar
(
contextseqs
)
==
30
)
assert_is_a_bool
(
verbose
)
# Message
if
(
verbose
)
message
(
'\t\tScore contextseqs with Doench2014'
)
# Read featureWeightMatrix
fwfile
<-
system.file
(
"extdata/DoenchNBT2014.csv"
,
package
=
"CRISPRseek"
)
fwmatrix
<-
read.csv
(
fwfile
,
header
=
TRUE
)
# Score and return
CRISPRseek
::
calculategRNAEfficiency
(
contextseqs
,
baseBeforegRNA
=
4
,
featureWeightMatrix
=
fwmatrix
)
%>%
magrittr
::
extract
(,
1
)
}
# Note: mclapply vs. bplapply
# I first used BiocParallel::bplapply().
# That works great on linux, but it fails on windows.
# support.bioconductor.org/p/92587/ explains likely why
# Windows doensn't allow forking (where a child process inherits parent env)
# Instead bplapply defaults to multithreading.
# In that scenario, the reticulation env is not correctly passed.
# mclapply seems like best choice: fork on linux - execute serially on win
doench2016
<-
function
(
contextseqs
,
chunksize
=
10000
,
verbose
=
TRUE
){
# Assert
is_identical_to_true
(
reticulate
::
py_module_available
(
'azimuth'
))
assert_is_character
(
contextseqs
)
assert_all_are_true
(
nchar
(
contextseqs
)
==
30
)
assert_is_a_bool
(
verbose
)
# Message
if
(
verbose
)
message
(
'\t\tScore contextseqs with Doench2016 (azimuth)'
)
start_time
<-
Sys.time
()
# Score
azi
<-
reticulate
::
import
(
'azimuth.model_comparison'
,
delay_load
=
TRUE
)
nchunks
<-
ceiling
(
length
(
contextseqs
)
/
chunksize
)
contextchunks
<-
split
(
contextseqs
,
ceiling
(
seq_along
(
contextseqs
)
/
chunksize
))
txt
<-
paste0
(
'\t\tRun Doench2016 %d times on %d-seq chunks '
,
'to preserve memory'
)
cmessage
(
txt
,
length
(
contextchunks
),
chunksize
)
mc.cores
<-
if
(
is_windows
())
1
else
max
(
1
,
parallel
::
detectCores
()
-2
)
doench2016scores
<-
unlist
(
parallel
::
mclapply
(
contextchunks
,
function
(
x
){
reticulate
::
py_suppress_warnings
(
azi
$
predict
(
reticulate
::
np_array
(
x
),
aa_cut
=
NULL
,
percent_peptide
=
NULL
,
model
=
NULL
,
model_file
=
NULL
,
pam_audit
=
TRUE
,
length_audit
=
TRUE
,
learn_options_override
=
NULL
))},
mc.cores
=
mc.cores
))
# Return
end_time
<-
Sys.time
()
if
(
verbose
)
cmessage
(
'\t\tCompleted in %s'
,
format
(
end_time
-
start_time
,
digits
=
2
))
doench2016scores
}
#' Add on-target efficiency scores
#'
#' Add Doench2014 or Doench2016 on-target efficiency scores
#'
#' \code{add_ontargets} adds efficiency scores
#' \code{filter_ontargets} adds efficiency scores and filters on them
#'
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}: spacers
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' @param ontargetmethod 'Doench2014' (default) or 'Doench2016'
#' (requires non-NULL argument python, virtualenv, or condaenv)
#' @param chunksize Doench2016 is executed in chunks of chunksize
#' @param verbose TRUE (default) or FALSE
#' @param plot TRUE (default) or FALSE
#' @param ... passed to \code{\link{plot_intervals}}
#' @return numeric vector
#' @examples
#' # Install azimuth
#' #----------------
#' ## With reticulate
#' # require(reticulate)
#' # conda_create('azienv', c('python=2.7'))
#' # use_condaenv('azienv')
#' # py_install(c('azimuth', 'scikit-learn==0.17.1', 'biopython=='1.76'),
#' # 'azienv', pip = TRUE)
#'
#' ## Directly
#' # conda create --name azienv python=2.7
#' # conda activate azienv
#' # pip install scikit-learn==0.17.1
#' # pip install biopython==1.76
#' # pip install azimuth
#'
#' # PE example
#' #-----------
#' require(magrittr)
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' targets <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp
#' HBB = 'chr11:5227002:-', # snp
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+'), # ins
#' bsgenome)
#' spacers <- find_primespacers(targets, bsgenome, ontargetmethod=NULL,
#' offtargetmethod=NULL)
#' spacers %<>% score_ontargets(bsgenome, 'Doench2014')
#' # reticulate::use_condaenv('azienv')
#' # reticulate::import('azimuth')
#' # spacers %<>% score_ontargets(bsgenome, 'Doench2016')
#'
#' # TFBS example
#' #-------------
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' targets <- extend(bed_to_granges(bedfile, 'mm10'))
#' spacers <- find_spacers(targets, bsgenome, ontargetmethod=NULL,
#' offtargetmethod=NULL)
#' spacers %<>% score_ontargets(bsgenome, 'Doench2014')
#' # reticulate::use_condaenv('azienv')
#' # reticulate::import('azimuth')
#' # spacers %>% score_ontargets(bsgenome, 'Doench2016')
#' @references
#' Doench 2014, Rational design of highly active sgRNAs for
#' CRISPR-Cas9-mediated gene inactivation. Nature Biotechnology,
#' doi: 10.1038/nbt.3026
#'
#' Doench 2016, Optimized sgRNA design to maximize activity and minimize
#' off-target effects of CRISPR-Cas9. Nature Biotechnology,
#' doi: 10.1038/nbt.3437
#'
#' Python module azimuth: github/MicrosoftResearch/azimuth
#' @export
score_ontargets
<-
function
(
spacers
,
bsgenome
,
ontargetmethod
=
c
(
'Doench2014'
,
'Doench2016'
)[
1
],
chunksize
=
10000
,
verbose
=
TRUE
,
plot
=
TRUE
,
...
){
# Assert
crisprcontext
<-
NULL
assert_is_all_of
(
spacers
,
'GRanges'
)
if
(
is.null
(
ontargetmethod
))
return
(
spacers
)
assert_is_a_string
(
ontargetmethod
)
assert_is_subset
(
ontargetmethod
,
c
(
'Doench2014'
,
'Doench2016'
))
if
(
ontargetmethod
%in%
names
(
mcols
(
spacers
))){
mcols
(
spacers
)[[
ontargetmethod
]]
<-
NULL
}
# Add contextseq
if
(
verbose
)
cmessage
(
'\tScore ontargets'
)
spacers
%<>%
add_context
(
bsgenome
,
verbose
=
verbose
)
spacerdt
<-
gr2dt
(
spacers
)
scoredt
<-
data.table
(
crisprcontext
=
unique
(
spacerdt
$
crisprcontext
))
# Score
scores
<-
switch
(
ontargetmethod
,
Doench2014
=
doench2014
(
scoredt
$
crisprcontext
,
verbose
=
verbose
),
Doench2016
=
doench2016
(
scoredt
$
crisprcontext
,
chunksize
=
chunksize
,
verbose
=
verbose
))
scoredt
[
,
(
ontargetmethod
)
:=
scores
]
# Merge back in
mergedt
<-
merge
(
spacerdt
,
scoredt
,
by
=
'crisprcontext'
,
sort
=
FALSE
,
all.x
=
TRUE
)
mergedt
[,
crisprcontext
:=
NULL
]
spacers
<-
dt2gr
(
mergedt
,
seqinfo
=
seqinfo
(
spacers
))
# Plot
if
(
plot
)
print
(
plot_intervals
(
spacers
,
...
))
# Return
spacers
}
default_alpha_var
<-
function
(
gr
){
if
(
'off'
%in%
names
(
mcols
(
gr
)))
'off'
else
NULL
#' Add [-4, +3] context
#'
#' Add context for Doench2016 scoring
#'
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}: spacer ranges
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' @param verbose logical(1)
#' @return character vector
#' @examples
#' # PE example
#' #-----------
#' require(magrittr)
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp
#' HBB = 'chr11:5227002:-', # snp
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+'), # ins
#' bsgenome)
#' spacers <- find_spacers(extend_for_pe(gr), bsgenome, complement=FALSE)
#' (add_context(spacers, bsgenome))
#'
#' # TFBS example
#' #-------------
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' targets <- extend(bed_to_granges(bedfile, 'mm10'))
#' spacers <- find_spacers(targets, bsgenome)
#' (spacers %<>% add_context(bsgenome))
#' @noRd
add_context
<-
function
(
spacers
,
bsgenome
,
verbose
=
TRUE
){
# Prevent from stats::start from being used (leads to bug!)
contexts
<-
extend
(
spacers
,
-4
,
+6
)
spacers
$
crisprcontext
<-
BSgenome
::
getSeq
(
bsgenome
,
contexts
,
as.character
=
TRUE
)
if
(
verbose
)
message
(
'\t\tAdd (4-23-3) contextseqs'
)
spacers
}
doench2014
<-
function
(
contextseqs
,
python
=
NULL
,
virtualenv
=
NULL
,
condaenv
=
NULL
,
verbose
){
# Assert
assert_is_character
(
contextseqs
)
assert_all_are_true
(
nchar
(
contextseqs
)
==
30
)
assert_is_a_bool
(
verbose
)
# Message
if
(
verbose
)
message
(
'\t\tScore contextseqs with Doench2014'
)
# Read featureWeightMatrix
fwfile
<-
system.file
(
"extdata/DoenchNBT2014.csv"
,
package
=
"CRISPRseek"
)
fwmatrix
<-
read.csv
(
fwfile
,
header
=
TRUE
)
# Score and return
CRISPRseek
::
calculategRNAEfficiency
(
contextseqs
,
baseBeforegRNA
=
4
,
featureWeightMatrix
=
fwmatrix
)
%>%
magrittr
::
extract
(,
1
)
}
# Note: mclapply vs. bplapply
# I first used BiocParallel::bplapply().
# That works great on linux, but it fails on windows.
# support.bioconductor.org/p/92587/ explains likely why
# Windows doensn't allow forking (where a child process inherits parent env)
# Instead bplapply defaults to multithreading.
# In that scenario, the reticulation env is not correctly passed.
# mclapply seems like best choice: fork on linux - execute serially on win
doench2016
<-
function
(
contextseqs
,
chunksize
=
10000
,
verbose
=
TRUE
){
# Assert
is_identical_to_true
(
reticulate
::
py_module_available
(
'azimuth'
))
assert_is_character
(
contextseqs
)
assert_all_are_true
(
nchar
(
contextseqs
)
==
30
)
assert_is_a_bool
(
verbose
)
# Message
if
(
verbose
)
message
(
'\t\tScore contextseqs with Doench2016 (azimuth)'
)
start_time
<-
Sys.time
()
# Score
azi
<-
reticulate
::
import
(
'azimuth.model_comparison'
,
delay_load
=
TRUE
)
nchunks
<-
ceiling
(
length
(
contextseqs
)
/
chunksize
)
contextchunks
<-
split
(
contextseqs
,
ceiling
(
seq_along
(
contextseqs
)
/
chunksize
))
txt
<-
paste0
(
'\t\tRun Doench2016 %d times on %d-seq chunks '
,
'to preserve memory'
)
cmessage
(
txt
,
length
(
contextchunks
),
chunksize
)
mc.cores
<-
if
(
is_windows
())
1
else
max
(
1
,
parallel
::
detectCores
()
-2
)
doench2016scores
<-
unlist
(
parallel
::
mclapply
(
contextchunks
,
function
(
x
){
reticulate
::
py_suppress_warnings
(
azi
$
predict
(
reticulate
::
np_array
(
x
),
aa_cut
=
NULL
,
percent_peptide
=
NULL
,
model
=
NULL
,
model_file
=
NULL
,
pam_audit
=
TRUE
,
length_audit
=
TRUE
,
learn_options_override
=
NULL
))},
mc.cores
=
mc.cores
))
# Return
end_time
<-
Sys.time
()
if
(
verbose
)
cmessage
(
'\t\tCompleted in %s'
,
format
(
end_time
-
start_time
,
digits
=
2
))
doench2016scores
}
#' Add on-target efficiency scores
#'
#' Add Doench2014 or Doench2016 on-target efficiency scores
#'
#' \code{add_ontargets} adds efficiency scores
#' \code{filter_ontargets} adds efficiency scores and filters on them
#'
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}: spacers
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' @param ontargetmethod 'Doench2014' (default) or 'Doench2016'
#' (requires non-NULL argument python, virtualenv, or condaenv)
#' @param chunksize Doench2016 is executed in chunks of chunksize
#' @param verbose TRUE (default) or FALSE
#' @param plot TRUE (default) or FALSE
#' @param ... passed to \code{\link{plot_intervals}}
#' @return numeric vector
#' @examples
#' # Install azimuth
#' #----------------
#' ## With reticulate
#' # require(reticulate)
#' # conda_create('azienv', c('python=2.7'))
#' # use_condaenv('azienv')
#' # py_install(c('azimuth', 'scikit-learn==0.17.1', 'biopython=='1.76'),
#' # 'azienv', pip = TRUE)
#'
#' ## Directly
#' # conda create --name azienv python=2.7
#' # conda activate azienv
#' # pip install scikit-learn==0.17.1
#' # pip install biopython==1.76
#' # pip install azimuth
#'
#' # PE example
#' #-----------
#' require(magrittr)
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' targets <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp
#' HBB = 'chr11:5227002:-', # snp
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+'), # ins
#' bsgenome)
#' spacers <- find_primespacers(targets, bsgenome, ontargetmethod=NULL,
#' offtargetmethod=NULL)
#' spacers %<>% score_ontargets(bsgenome, 'Doench2014')
#' # reticulate::use_condaenv('azienv')
#' # reticulate::import('azimuth')
#' # spacers %<>% score_ontargets(bsgenome, 'Doench2016')
#'
#' # TFBS example
#' #-------------
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' targets <- extend(bed_to_granges(bedfile, 'mm10'))
#' spacers <- find_spacers(targets, bsgenome, ontargetmethod=NULL,
#' offtargetmethod=NULL)
#' spacers %<>% score_ontargets(bsgenome, 'Doench2014')
#' # reticulate::use_condaenv('azienv')
#' # reticulate::import('azimuth')
#' # spacers %>% score_ontargets(bsgenome, 'Doench2016')
#' @references
#' Doench 2014, Rational design of highly active sgRNAs for
#' CRISPR-Cas9-mediated gene inactivation. Nature Biotechnology,
#' doi: 10.1038/nbt.3026
#'
#' Doench 2016, Optimized sgRNA design to maximize activity and minimize
#' off-target effects of CRISPR-Cas9. Nature Biotechnology,
#' doi: 10.1038/nbt.3437
#'
#' Python module azimuth: github/MicrosoftResearch/azimuth
#' @export
score_ontargets
<-
function
(
spacers
,
bsgenome
,
ontargetmethod
=
c
(
'Doench2014'
,
'Doench2016'
)[
1
],
chunksize
=
10000
,
verbose
=
TRUE
,
plot
=
TRUE
,
...
){
# Assert
crisprcontext
<-
NULL
assert_is_all_of
(
spacers
,
'GRanges'
)
if
(
is.null
(
ontargetmethod
))
return
(
spacers
)
assert_is_a_string
(
ontargetmethod
)
assert_is_subset
(
ontargetmethod
,
c
(
'Doench2014'
,
'Doench2016'
))
if
(
ontargetmethod
%in%
names
(
mcols
(
spacers
))){
mcols
(
spacers
)[[
ontargetmethod
]]
<-
NULL
}
# Add contextseq
if
(
verbose
)
cmessage
(
'\tScore ontargets'
)
spacers
%<>%
add_context
(
bsgenome
,
verbose
=
verbose
)
spacers
%<>%
filter_N
(
col
=
"crisprcontext"
)
spacerdt
<-
gr2dt
(
spacers
)
scoredt
<-
data.table
(
crisprcontext
=
unique
(
spacerdt
$
crisprcontext
))
# Score
scores
<-
switch
(
ontargetmethod
,
Doench2014
=
doench2014
(
scoredt
$
crisprcontext
,
verbose
=
verbose
),
Doench2016
=
doench2016
(
scoredt
$
crisprcontext
,
chunksize
=
chunksize
,
verbose
=
verbose
))
scoredt
[
,
(
ontargetmethod
)
:=
scores
]
# Merge back in
mergedt
<-
merge
(
spacerdt
,
scoredt
,
by
=
'crisprcontext'
,
sort
=
FALSE
,
all.x
=
TRUE
)
mergedt
[,
crisprcontext
:=
NULL
]
spacers
<-
dt2gr
(
mergedt
,
seqinfo
=
seqinfo
(
spacers
))
# Plot
if
(
plot
)
print
(
plot_intervals
(
spacers
,
...
))
# Return
spacers
}
default_alpha_var
<-
function
(
gr
){
if
(
'off'
%in%
names
(
mcols
(
gr
)))
'off'
else
NULL
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment