Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
loosolab
software
multicrispr
Commits
4547d1fb
Commit
4547d1fb
authored
Feb 03, 2020
by
aditya.bhagwat
Browse files
Improvements in plotting, specificity, and efficiency analysis
parent
10a6e5f6
Pipeline
#120538
passed with stages
in 30 seconds
Changes
19
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitattributes
View file @
4547d1fb
...
...
@@ -2,3 +2,4 @@
*.bed filter=lfs diff=lfs merge=lfs -text
.ensembl filter=lfs diff=lfs merge=lfs -text
.entrez filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
NAMESPACE
View file @
4547d1fb
...
...
@@ -3,19 +3,23 @@
export(EnsDb.Hsapiens.v98)
export(EnsDb.Mmusculus.v98)
export(add_context)
export(add_efficiency)
export(add_genome_counts)
export(add_inverse_strand)
export(add_seq)
export(add_specificity)
export(add_target_counts)
export(bed_to_granges)
export(char_to_granges)
export(double_flank)
export(down_flank)
export(dt2gr)
export(extend)
export(extend_for_pe)
export(extend_pe_to_gg)
export(extract_matchranges)
export(extract_subranges)
export(filter_efficient)
export(filter_prime_specific)
export(filter_target_specific)
export(find_gg)
...
...
@@ -23,13 +27,13 @@ export(find_pe_spacers)
export(find_spacers)
export(genefile_to_granges)
export(genes_to_granges)
export(gr2dt)
export(index_genome)
export(index_targets)
export(match_seqs)
export(match_spacers)
export(plot_intervals)
export(plot_karyogram)
export(score_spacers)
export(up_flank)
importFrom(BSgenome,getSeq)
importFrom(BiocGenerics,"end<-")
...
...
R/00_imports.R
View file @
4547d1fb
...
...
@@ -34,4 +34,5 @@
#' @importFrom stringi stri_detect_regex stri_locate_all_fixed
#' @importFrom stringi stri_locate_all_regex
#' @importFrom stringi stri_replace_first_fixed
#' @importFrom stringi stri_startswith_fixed
NULL
R/03_plot.R
View file @
4547d1fb
...
...
@@ -106,7 +106,7 @@ to_megabase <- function(y){
#' Interval plot GRanges
#' @param gr \code{\link[GenomicRanges]{GRanges-class}}
#' @param y
by
'contig' (default) or name of gr variable
#' @param y 'contig' (default) or name of gr variable
#' @param color_var 'seqnames' (default) or other gr variable
#' @param linetype_var NULL (default) or gr variable mapped to linetype
#' @param size_var NULL (default) or gr variable mapped to size
...
...
@@ -117,51 +117,62 @@ to_megabase <- function(y){
#' @examples
#' # SRF sites
#' require(magrittr)
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' sites <- bed_to_granges(bedfile, 'mm10', plot = FALSE)
#' plot_intervals(sites)
#'
#' flanks <- up_flank(sites, stranded = FALSE)
#' sites$color <- 'sites'
#' flanks$color <- 'flanks'
#' plot_intervals(c(sites, flanks))
#' targets <- bed_to_granges(bedfile, 'mm10', plot = FALSE)
#' plot_intervals(targets)
#' targets %<>% extend(plot = TRUE)
#' spacers <- find_spacers(targets, bsgenome)
#' specific <- filter_target_specific(spacers, targets, bsgenome)
#' efficient <- filter_efficient(spacers, targets, )
#'
#' # PE targets
#' bs <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#'
sites <- GenomicRanges::GRanges(
#'
seqnames= c(PRNP = 'chr20:4699600:+
',
#'
HBB
= 'chr1
1:5227002
:-',
#'
HEXA
= 'chr
15:72346580-72346583:-',
#'
CFTR = 'chr7:117559593-117559595:+'),
#'
seqinfo = BSgenome::seqinfo(bs)
)
#'
sites$color <- names(sites
)
#'
plot_intervals(sites)
#' bs
genome
<- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#'
gr <- char_to_granges(c(PRNP = 'chr20:4699600:+',
#'
HBB = 'chr11:5227002:-
',
#'
HEXA
= 'chr1
5:72346580-72346583
:-',
#'
CFTR
= 'chr
7:117559593-117559595:+'),
#'
bsgenome)
#'
plot_intervals(gr
)
#'
plot_intervals(extend_for_pe(gr)
)
#'
#' @export
plot_intervals
<-
function
(
gr
,
yby
=
'contig'
,
color_var
=
'seqnames'
,
linetype_var
=
NULL
,
size_var
=
NULL
,
facet_var
=
'seqnames'
,
title
=
NULL
,
scales
=
'free'
gr
,
xref
=
'targetname'
,
y
=
'names'
,
nperchrom
=
1
,
color_var
=
'targetname'
,
facet_var
=
'seqnames'
,
linetype_var
=
NULL
,
size_var
=
NULL
,
alpha_var
=
NULL
,
title
=
NULL
,
scales
=
'free'
){
# Assert, Import, Comply
assert_is_all_of
(
gr
,
'GRanges'
)
assert_is_a_string
(
color_var
)
if
(
!
is.null
(
color_var
))
assert_is_a_string
(
color_var
)
assert_is_subset
(
color_var
,
names
(
as.data.table
(
gr
)))
contig
<-
group
<-
.N
<-
.SD
<-
seqnames
<-
start
<-
NULL
strand
<-
tmp
<-
width
<-
xstart
<-
xend
<-
y
<-
.
<-
NULL
contig
<-
.N
<-
.SD
<-
seqnames
<-
start
<-
NULL
strand
<-
tmp
<-
width
<-
xstart
<-
xend
<-
.
<-
NULL
# Identify contigs and order on them
gr
$
contig
<-
GenomicRanges
::
findOverlaps
(
gr
,
maxgap
=
30
,
select
=
'first'
,
ignore.strand
=
TRUE
)
gr
%<>%
extract
(
order
(
.
$
contig
))
#
gr$contig <- GenomicRanges::findOverlaps(
#
gr, maxgap = 30, select = 'first', ignore.strand = TRUE)
#
gr %<>% extract(order(.$contig))
# Prepare plotdt
plotdt
<-
as.data.table
(
gr
)
head_tail
<-
function
(
x
,
n
=
1
)
x
%in%
c
(
head
(
x
,
n
),
tail
(
x
,
n
))
plotdt
%<>%
extract
(
,
.SD
[
head_tail
(
contig
)],
by
=
c
(
'seqnames'
))
plotdt
<-
data.table
::
as.data.table
(
gr
)
%>%
cbind
(
names
=
names
(
gr
))
plotdt
%<>%
extract
(
order
(
seqnames
,
start
))
plotdt
%>%
extract
(,
y
:=
min
(
start
),
by
=
yby
)
head_tail
<-
function
(
x
,
n
=
nperchrom
)
x
[
x
%in%
c
(
head
(
x
,
n
),
tail
(
x
,
n
))
]
plotdt
%<>%
extract
(,
edge
:=
targetname
%in%
head_tail
(
unique
(
targetname
)),
by
=
'seqnames'
)
plotdt
%<>%
extract
(
edge
==
TRUE
)
#plotdt %<>% extract( , .SD[head_tail(contig)], by = c('seqnames'))
plotdt
%>%
extract
(,
y
:=
min
(
start
),
by
=
y
)
plotdt
%>%
extract
(,
y
:=
factor
(
format
(
y
,
big.mark
=
" "
)))
#plotdt %>% extract(,
y := factor(round(y*1e-6))
)
plotdt
%>%
extract
(,
xstart
:=
start
-
min
(
start
),
by
=
'contig'
)
#plotdt %>% extract(,
xstart := start-min(start), by = 'contig'
)
plotdt
%>%
extract
(,
xstart
:=
start
-
min
(
start
),
by
=
xref
)
plotdt
%>%
extract
(,
xend
:=
xstart
+
width
)
plotdt
%>%
extract
(
strand
==
'-'
,
tmp
:=
xend
)
plotdt
%>%
extract
(
strand
==
'-'
,
xend
:=
xstart
)
...
...
@@ -173,12 +184,13 @@ plot_intervals <- function(
aes_string
(
x
=
'xstart'
,
xend
=
'xend'
,
y
=
'y'
,
yend
=
'y'
,
color
=
color_var
,
linetype
=
linetype_var
,
size
=
size_var
))
+
size
=
size_var
,
alpha
=
alpha_var
))
+
facet_wrap
(
facet_var
,
scales
=
scales
)
+
geom_segment
(
arrow
=
arrow
(
length
=
unit
(
0.1
,
"inches"
)))
+
theme_bw
()
+
xlab
(
'Offset'
)
+
ylab
(
'Start'
)
+
ggtitle
(
title
)
#+
#xlab('Offset') + ylab('Start') + ggtitle(title) #+
xlab
(
NULL
)
+
ylab
(
NULL
)
+
ggtitle
(
title
)
#+
# Print and return
print
(
p
)
p
...
...
R/04_bed_to_granges.R
View file @
4547d1fb
...
...
@@ -162,7 +162,8 @@ bed_to_granges <- function(
#' HBB = 'chr11:5227002:-', # snp
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+') # ins
#' char_to_granges(x, bsgenome)
#' gr <- char_to_granges(x, bsgenome)
#' plot_intervals(gr, facet_var = c('targetname', 'seqnames'))
#' @seealso \code{\link{bed_to_granges}}, \code{\link{genes_to_granges}}
#' @export
char_to_granges
<-
function
(
x
,
bsgenome
){
...
...
R/05_manipulate_ranges.R
View file @
4547d1fb
...
...
@@ -86,7 +86,7 @@ summarize_loci <- function(gr){
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+'),# ins
#' bsgenome = bsgenome)
#' gr %>% up_flank(-22, -1, plot = TRUE)
#' gr %>% up_flank(-22, -1, plot = TRUE
, facet_var = c('targetname', 'seqnames')
)
#' gr %>% up_flank(-22, -1, plot = TRUE, strandaware = FALSE)
#' gr %>% down_flank(+1, +22, plot = TRUE)
#' gr %>% down_flank(+1, +22, plot = TRUE, strandaware = FALSE)
...
...
@@ -103,12 +103,12 @@ summarize_loci <- function(gr){
#' @export
up_flank
<-
function
(
gr
,
start
=
-200
,
end
=
-1
,
start
=
-200
,
end
=
-1
,
strandaware
=
TRUE
,
bsgenome
=
NULL
,
verbose
=
FALSE
,
plot
=
FALSE
,
bsgenome
=
NULL
,
verbose
=
FALSE
,
plot
=
FALSE
,
...
){
# Assert
...
...
@@ -215,12 +215,13 @@ down_flank <- function(
#' @export
extend
<-
function
(
gr
,
start
=
-22
,
end
=
22
,
start
=
-22
,
end
=
22
,
strandaware
=
TRUE
,
bsgenome
=
NULL
,
verbose
=
FALSE
,
plot
=
FALSE
,
bsgenome
=
NULL
,
verbose
=
FALSE
,
plot
=
FALSE
,
linetype_var
=
'set'
,
...
){
...
...
@@ -260,7 +261,7 @@ extend <- function(
newgr
$
set
<-
'extensions'
allgr
<-
c
(
gr
,
newgr
)
allgr
$
set
%<>%
factor
(
c
(
'original'
,
'extensions'
))
plot_intervals
(
allgr
,
color
_var
=
'set'
,
...
,
title
=
txt
)
plot_intervals
(
allgr
,
linetype
_var
=
'set'
,
...
,
title
=
txt
)
newgr
$
set
<-
NULL
}
if
(
verbose
)
message
(
txt
)
...
...
R/06_find_pe_spacers.R
View file @
4547d1fb
...
...
@@ -67,7 +67,7 @@ extend_pe_to_gg <- function(gr, nrt=16, plot = FALSE){
gr
$
set
<-
'targets'
fw
$
set
<-
'possible GG of fwd strand'
rv
$
set
<-
'possible GG on rev strand'
plot_intervals
(
c
(
gr
,
fw
,
rv
),
color_var
=
'set'
,
y
by
=
'set'
)
plot_intervals
(
c
(
gr
,
fw
,
rv
),
color_var
=
'set'
,
y
=
'set'
)
}
# Return
...
...
@@ -203,7 +203,7 @@ find_pe_spacers <- function(gr, bsgenome, fixes = get_plus_seq(bsgenome, gr),
ext
$
part
<-
"3' extension"
allranges
<-
c
(
spacer
,
ext
)
allranges
$
part
%<>%
factor
(
rev
(
c
(
"spacer"
,
"3' extension"
)))
plot_intervals
(
allranges
,
y
by
=
'crisprname'
,
color_var
=
'part'
,
plot_intervals
(
allranges
,
y
=
'crisprname'
,
color_var
=
'part'
,
size_var
=
'part'
,
facet_var
=
c
(
'seqnames'
,
'targetname'
))
spacer
$
part
<-
NULL
}
...
...
R/06_find_spacers.R
View file @
4547d1fb
...
...
@@ -45,7 +45,7 @@ extract_subranges <- function(gr, ir, plot = FALSE){
if
(
plot
){
mr
$
rangename
<-
names
(
mr
)
plot_intervals
(
mr
,
color_var
=
'names'
,
facet_var
=
c
(
'seqnames'
),
y
by
=
'rangename'
)
facet_var
=
c
(
'seqnames'
),
y
=
'rangename'
)
}
# Return
...
...
@@ -149,7 +149,7 @@ find_spacers <- function(
spacers
$
crisprpam
<-
BSgenome
::
getSeq
(
bsgenome
,
pams
,
as.character
=
TRUE
)
spacers
%>%
sort
(
ignore.strand
=
TRUE
)
if
(
plot
){
plot_intervals
(
spacers
,
yb
y
=
'crisprname'
)
plot_intervals
(
spacers
,
y
=
'crisprname'
)
spacers
$
sitename
<-
NULL
}
spacers
...
...
@@ -170,19 +170,22 @@ find_spacers <- function(
#' @examples
#' require(magrittr)
#' bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
#' gr <- GenomicRanges::GRanges(
#' seqnames = c(PRNP = 'chr20:4699600', # snp
#' HBB = 'chr11:5227002', # snp
#' HEXA = 'chr15:72346580-72346583', # del
#' CFTR = 'chr7:117559593-117559595'), # ins
#' strand = c(PRNP = '+', HBB = '-', HEXA = '-', CFTR = '+'),
#' seqinfo = BSgenome::seqinfo(bsgenome))
#' gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp
#' HBB = 'chr11:5227002:-', # snp
#' HEXA = 'chr15:72346580-72346583:-', # del
#' CFTR = 'chr7:117559593-117559595:+'), # ins
#' bsgenome = bsgenome)
#' find_pe_spacers(gr, bsgenome)
#' (grext <- extend_for_pe(gr))
#' find_spacers(grext, bsgenome, complement = FALSE)
#' @export
extend_for_pe
<-
function
(
gr
,
bsgenome
,
nrt
=
16
,
spacer
=
strrep
(
'N'
,
20
),
pam
=
'NGG'
,
plot
=
TRUE
gr
,
bsgenome
,
nrt
=
16
,
spacer
=
strrep
(
'N'
,
20
),
pam
=
'NGG'
,
plot
=
TRUE
){
fw
<-
copy
(
gr
,
start
=
end
(
gr
)
+1
-
nrt
-17
,
end
=
start
(
gr
)
-1+6
,
strand
=
'+'
)
rv
<-
copy
(
gr
,
start
=
end
(
gr
)
+1-6
,
end
=
start
(
gr
)
-1
+
nrt
+17
,
strand
=
'-'
)
...
...
@@ -190,10 +193,10 @@ extend_for_pe <- function(
names
(
rv
)
%<>%
paste0
(
'_r'
)
ext
<-
c
(
fw
,
rv
)
if
(
plot
){
gr
$
set
<-
'
original
'
fw
$
set
<-
'fw'
rv
$
set
<-
'rv'
plot_intervals
(
c
(
fw
,
gr
,
rv
),
color_var
=
'set'
,
y
by
=
'set'
)
gr
$
set
<-
'
PE target
'
fw
$
set
<-
"potential '+' spacers"
rv
$
set
<-
"potential '-' spacers"
plot_intervals
(
c
(
fw
,
gr
,
rv
),
color_var
=
'set'
,
y
=
'set'
)
}
ext
}
...
...
@@ -275,7 +278,7 @@ extend_for_pe <- function(
# original$set <- 'target'
# spacer$set <- 'spacer'
# plot_intervals(c(original, spacer), color_var = 'set',
# size_var = 'set', y
by
= 'site')
# size_var = 'set', y = 'site')
# sites$set <- NULL
# }
# if (verbose) cmessage('\t\t%d cas9 spacers across %d ranges',
...
...
@@ -344,7 +347,8 @@ extend_for_pe <- function(
# assert_is_a_number(nprimer)
# assert_is_a_number(nrt)
# assert_all_are_less_than(nprimer, 17)
#
# .
# # Extensions
# extension <- invertStrand(down_flank(spacers, -2-nprimer, -3+nrt))
# extension$seq <- get_plus_seq(bsgenome, extension) # Get "+" seq
...
...
@@ -358,7 +362,7 @@ extend_for_pe <- function(
# spacer$part<-'spacer'; primer$part<-'primer'; rtranscripts$part<-'rtranscripts'
# allranges <- c(spacer, primer, rtranscripts)
# allranges$part %<>% factor(rev(c('spacer', 'rtranscripts', 'primer')))
# plot_intervals(allranges, y
by
= 'site', color_var = 'part',
# plot_intervals(allranges, y = 'site', color_var = 'part',
# size_var = 'part', facet_var = c('seqnames', 'targetname'))
# spacer$part <- primer$part <- rtranscripts$part <- NULL
# }
...
...
R/07_score_spacers.R
View file @
4547d1fb
...
...
@@ -104,22 +104,20 @@ doench2016 <- function(
}
#'
Score spacer
s
#'
Add efficiency score
s
#'
#'
Score spacers with
Doench2014 or Doench2016
#'
Add
Doench2014 or Doench2016
efficiency scores
#'
#' Doench2014 is readily available.
#' Doench2016 is available after installing python module
#' [azimuth](https://github.com/MicrosoftResearch/Azimuth) (see examples).
#'
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}: spacers
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' @param spacers \code{\link[GenomicRanges]{GRanges-class}}: spacers
#' @param bsgenome \code{\link[BSgenome]{BSgenome-class}}
#' @param method 'Doench2014' (default) or 'Doench2016'
#' (requires non-NULL argument python, virtualenv, or condaenv)
#' @param python NULL (default) or python binary path with module azimuth
#' @param virtualenv NULL (default) or python virtualenv with module azimuth
#' @param condaenv NULL (default) or python condaenv with module azimuth
#' @param verbose TRUE (default) or FALSE
#' @param plot TRUE (default) or FALSE
#' @param alpha_var NULL or string: var mapped to alpha in plot
#' @return numeric vector
#' @examples
#' # PE example
...
...
@@ -132,22 +130,23 @@ doench2016 <- function(
#' CFTR = 'chr7:117559593-117559595:+'), # ins
#' bsgenome)
#' spacers <- find_spacers(extend_for_pe(gr), bsgenome, complement = FALSE)
#'
score_
spacers
(spacers,
bsgenome, 'Doench2014')
#'
(
spacers
%<>% add_efficiency(
bsgenome, 'Doench2014')
)
#' # conda create --name azimuthenv python=2.7
#' # conda activate azimuthenv
#' # pip install azimuth
#' # pip install scikit-learn==0.17.1
#' # s
core_spacers(spacers,
bsgenome, 'Doench2016', condaenv = 'azimuthenv')
#'
#' # s
pacers %<>% add_efficiency(
bsgenome, 'Doench2016', condaenv = 'azimuthenv')
#'
# filter_efficient(spacers, bsgenome, 'Doench2016', 0.4, condaenv='azimuthenv')
#' # TFBS example
#' #-------------
#' bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
#' bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
#' gr <- extend(bed_to_granges(bedfile, 'mm10'))
#' spacers <- find_spacers(gr, bsgenome)
#' (spacers %<>% score_spacers(bsgenome, 'Doench2014'))
#' # (spacers %<>% score_spacers(bsgenome, 'Doench2016'))
#'
#' targets <- extend(bed_to_granges(bedfile, 'mm10'))
#' spacers <- find_spacers(targets, bsgenome)
#' # (spacers %<>% add_specificity(targets, bsgenome))
#' # (spacers %>% add_efficiency(bsgenome, 'Doench2014'))
#' # (spacers %>% add_efficiency(bsgenome, 'Doench2016'))
#' # spacers %>% filter_efficient(bsgenome, 'Doench2016', 0.4)
#' @references
#' Doench 2014, Rational design of highly active sgRNAs for
#' CRISPR-Cas9-mediated gene inactivation. Nature Biotechnology,
...
...
@@ -159,14 +158,11 @@ doench2016 <- function(
#'
#' Python module azimuth: github/MicrosoftResearch/azimuth
#' @export
score_spacers
<-
function
(
spacers
,
bsgenome
,
method
=
c
(
'Doench2014'
,
'Doench2016'
)[
1
],
python
=
NULL
,
virtualenv
=
NULL
,
condaenv
=
NULL
,
verbose
=
TRUE
add_efficiency
<-
function
(
spacers
,
bsgenome
,
method
=
c
(
'Doench2014'
,
'Doench2016'
)[
1
],
python
=
NULL
,
virtualenv
=
NULL
,
condaenv
=
NULL
,
verbose
=
TRUE
,
plot
=
TRUE
,
alpha_var
=
default_alpha_var
(
spacers
)
){
# Assert
assert_is_all_of
(
spacers
,
'GRanges'
)
...
...
@@ -176,14 +172,65 @@ score_spacers <- function(
# Add contextseq
if
(
verbose
)
cmessage
(
'\tScore crispr spacers'
)
spacers
%<>%
add_context
(
bsgenome
,
verbose
=
verbose
)
spacerdt
<-
as.data.table
(
spacers
)
spacerdt
<-
gr2dt
(
spacers
)
scoredt
<-
data.table
(
crisprcontext
=
unique
(
spacerdt
$
crisprcontext
))
# Score
scorefun
<-
switch
(
method
,
Doench2014
=
doench2014
,
Doench2016
=
doench2016
)
scoredt
[
,
(
method
)
:=
scorefun
(
scoredt
$
crisprcontext
,
verbose
=
verbose
)
]
# Merge back in
and Return
# Merge back in
mergedt
<-
merge
(
spacerdt
,
scoredt
,
by
=
'crisprcontext'
,
sort
=
FALSE
,
all.x
=
TRUE
)
GRanges
(
mergedt
,
seqinfo
=
seqinfo
(
spacers
))
spacers
<-
dt2gr
(
mergedt
,
seqinfo
=
seqinfo
(
spacers
))
# Plot
if
(
plot
){
scores
<-
mcols
(
spacers
)[[
method
]]
tertiles
<-
quantile
(
scores
,
c
(
0.33
,
0.66
,
1
))
labels
<-
sprintf
(
'%s < %s (%s)'
,
method
,
as.character
(
round
(
tertiles
,
2
)),
names
(
tertiles
))
spacers
$
efficiency
<-
cut
(
scores
,
c
(
0
,
tertiles
),
labels
)
p
<-
plot_intervals
(
spacers
,
size_var
=
'efficiency'
,
alpha_var
=
'specific'
)
+
ggplot2
::
scale_size_manual
(
values
=
c
(
0.1
,
1
,
2
))
print
(
p
)
spacers
$
efficiency
<-
NULL
}
# Return
spacers
}
#' @rdname add_efficiency
#' @export
filter_efficient
<-
function
(
spacers
,
bsgenome
,
method
=
c
(
'Doench2014'
,
'Doench2016'
)[
1
],
cutoff
,
python
=
NULL
,
virtualenv
=
NULL
,
condaenv
=
NULL
,
verbose
=
TRUE
,
plot
=
TRUE
,
alpha_var
=
default_alpha_var
(
gr
)
){
spacers
%<>%
add_efficiency
(
bsgenome
=
bsgenome
,
method
=
method
,
python
=
python
,
virtualenv
=
virtualenv
,
condaenv
=
condaenv
,
verbose
=
verbose
,
plot
=
plot
,
alpha_var
=
alpha_var
)
idx
<-
mcols
(
spacers
)[[
method
]]
>
cutoff
if
(
verbose
){
width
<-
nchar
(
length
(
idx
))
cmessage
(
'\t\t%s ranges'
,
formatC
(
length
(
idx
),
width
=
width
))
cmessage
(
'\t\t%s ranges after filtering for %s > %s'
,
formatC
(
sum
(
idx
),
width
=
width
),
method
,
as.character
(
cutoff
))
}
spacers
%>%
extract
(
idx
)
}
default_alpha_var
<-
function
(
gr
){
if
(
'specific'
%in%
names
(
mcols
(
gr
)))
'specific'
else
NULL
}
\ No newline at end of file
R/08_add_genome_matches.R
View file @
4547d1fb
default_outdir
<-
function
()
paste0
(
tempdir
(),
'/multicrispr/bowtie'
)
default_indexedgenome
<-
function
(
x
){
bsgenome
<-
if
(
is
(
x
,
'BSgenome'
)){
x
}
else
if
(
is
(
x
,
'GRanges'
)){
getBSgenome
(
genome
(
x
)[
1
])
}
file.path
(
'~/.multicrispr/bowtie'
,
bsgenome
@
pkgname
)
}
#' Index genome
#'
#' Bowtie index genome
...
...
@@ -10,7 +18,7 @@
#' @export
index_genome
<-
function
(
bsgenome
,
indexedgenome
=
file.path
(
'~/.multicrispr/bowtie'
,
bsgenome
@
pkgna
me
)
indexedgenome
=
default_indexedgenome
(
bsgeno
me
)
){
# Assert
...
...
@@ -317,15 +325,19 @@ add_target_counts <- function(
spacers
,
indexedtargets
,
norc
=
TRUE
,
outfile
=
outfile
,
pam
=
pam
,
count_vars
=
count_vars
,
verbose
=
verbose
)
# Add counts to spacers. Return.
dt
<-
as.data.table
(
spacers
)
%>%
merge
(
matches
,
by
=
'crisprspacer'
,
sort
=
FALSE
)
# Add counts to spacers
dt
<-
gr2dt
(
spacers
)
%>%
merge
(
matches
,
by
=
'crisprspacer'
,
sort
=
FALSE
)
# Organize columns and return
targetvars
<-
names
(
dt
)
%>%
extract
(
stri_startswith_fixed
(
.
,
'target'
))
crisprvars
<-
c
(
'crisprname'
,
'crisprspacer'
,
'crisprpam'
,
'crisprext'
)
%>%
intersect
(
names
(
dt
))
othervars
<-
setdiff
(
names
(
dt
),
c
(
targetvars
,
crisprvars
))
dt
%<>%
extract
(,
c
(
targetvars
,
crisprvars
,
othervars
),
with
=
FALSE
)
GRanges
(
dt
,
seqinfo
=
seqinfo
(
spacers
))
dt
%>%
extract
(,
c
(
targetvars
,
crisprvars
,
othervars
),
with
=
FALSE
)
%>%
dt2gr
(
seqinfo
(
spacers
))
}
#' Add genome counts
...
...
@@ -367,10 +379,10 @@ add_target_counts <- function(
#' @export
add_genome_counts
<-
function
(
spacers
,
indexedgenome
,
outdir
=
default_outdir
(),
pam
=
'NGG'
,
verbose
=
TRUE
indexedgenome
=
default_indexedgenome
(
spacers
)
,
outdir
=
default_outdir
(),
pam
=
'NGG'
,
verbose
=
TRUE
){
# Add genome matches
if
(
verbose
)
message
(
'\tAdd genome match counts'
)
...
...
@@ -379,81 +391,128 @@ add_genome_counts <- function(
matches
<-
match_spacers
(
spacers
,
indexedgenome
,
norc
=
FALSE
,
outfile
=
outfile
,
pam
=
pam
,
count_vars
=
count_vars
,
verbose
=
verbose
)
dt
<-
spacers
%>%
as.data.table
()
%>%
merge
(
matches
,
by
=
'crisprspacer'
,
sort
=
FALSE
)
dt
<-
gr2dt
(
spacers
)
%>%
merge
(
matches
,
by
=
'crisprspacer'
,
sort
=
FALSE
)
# Organize columns
# Organize columns
and return
targetvars
<-
names
(
dt
)
%>%
extract
(
stringi
::
stri_startswith_fixed
(
.
,
'target'
))
crisprvars
<-
c
(
'crisprname'
,
'crisprspacer'
,
'crisprpam'
,
'crisprext'
)
%>%
intersect
(
names
(
dt
))
othervars
<-
setdiff
(
names
(
dt
),
c
(
targetvars
,
crisprvars
))
dt
%<>%
extract
(,
c
(
targetvars
,
crisprvars
,
othervars
),
with
=
FALSE
)
# Return GRanges
GRanges
(
dt
,
seqinfo
=
seqinfo
(
spacers
))
dt
%>%
extract
(,
c
(
targetvars
,
crisprvars
,
othervars
),
with
=
FALSE
)
%>%
dt2gr
(
seqinfo
(
spacers
))
}
default_outdir
<-
function
()
paste0
(
tempdir
(),
'/multicrispr/bowtie'
)
#' @rdname filter_target_specific
#' @export
add_specificity
<-
function
(
spacers
,
targets
,
bsgenome
,
indexedgenome
=
default_indexedgenome
(
spacers
),
outdir
=
default_outdir
(),
pam
=
'NGG'
,
plot
=
TRUE
,
verbose
=
TRUE
,
filter
=
TRUE
){
# Add target matches
spacers
%<>%
add_target_counts
(
targets
,
bsgenome
,
outdir
,
pam
=
pam
,
verbose
=
verbose
)
# Add genome matches
spacers
%<>%
add_genome_counts
(
indexedgenome
,
outdir
,
pam
=
pam
,
verbose
=
verbose
)