add_efficiency.Rd
Add Doench2014 or Doench2016 efficiency scores and filter on them
add_efficiency( spacers, bsgenome, method = c("Doench2014", "Doench2016")[1], chunksize = 10000, verbose = TRUE, plot = TRUE, alpha_var = default_alpha_var(spacers), size_var = method ) filter_efficient( spacers, bsgenome, method = c("Doench2014", "Doench2016")[1], cutoff, verbose = TRUE, plot = TRUE, alpha_var = default_alpha_var(spacers) )
spacers |
|
---|---|
bsgenome | |
method | 'Doench2014' (default) or 'Doench2016' (requires non-NULL argument python, virtualenv, or condaenv) |
chunksize | Doench2016 is executed in chunks of chunksize |
verbose | TRUE (default) or FALSE |
plot | TRUE (default) or FALSE |
alpha_var | NULL or string: var mapped to alpha in plot |
size_var | NULL or string: var mapped to size in plot |
cutoff | value to filter on |
numeric vector
add_efficiency
adds efficiency scores
filter_efficiency
adds efficiency scores and filters on them
Doench 2014, Rational design of highly active sgRNAs for CRISPR-Cas9-mediated gene inactivation. Nature Biotechnology, doi: 10.1038/nbt.3026
Doench 2016, Optimized sgRNA design to maximize activity and minimize off-target effects of CRISPR-Cas9. Nature Biotechnology, doi: 10.1038/nbt.3437
Python module azimuth: github/MicrosoftResearch/azimuth
# Install azimuth #---------------- ## With reticulate # require(reticulate) # conda_create('azienv', c('python=2.7')) # use_condaenv('azienv') # py_install(c('azimuth', 'scikit-learn==0.17.1'), 'azienv', pip = TRUE) ## Directly # conda create --name azienv python=2.7 # conda activate azienv # pip install azimuth # pip install scikit-learn==0.17.1 # PE example #----------- require(magrittr) bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 targets <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp HBB = 'chr11:5227002:-', # snp HEXA = 'chr15:72346580-72346583:-', # del CFTR = 'chr7:117559593-117559595:+'), # ins bsgenome) spacers <- find_pe_spacers(targets, bsgenome)#spacers <- find_spacers(extend_for_pe(gr), bsgenome, complement = FALSE) (spacers %<>% add_efficiency(bsgenome, 'Doench2014'))#>#>#>#> GRanges object with 10 ranges and 11 metadata columns: #> seqnames ranges strand | crisprcontext #> <Rle> <IRanges> <Rle> | <character> #> CFTR_1 chr7 117559575-117559594 + | CACCATTAAAGAAAATATCATCTTTGGTGT #> CFTR_2 chr7 117559606-117559625 - | CGCTTCTGTATCTATATTCATCATAGGAAA #> HBB_1 chr11 5227003-5227022 - | ACACCATGGTGCATCTGACTCCTGAGGAGA #> HBB_2 chr11 5226984-5227003 + | GGCAGTAACGGCAGACTTCTCCTCAGGAGT #> HEXA_1 chr15 72346551-72346570 + | ACTATGTAGAAATCCTTCCAGTCAGGGCCA #> HEXA_2 chr15 72346558-72346577 + | AGAAATCCTTCCAGTCAGGGCCATAGGATA #> PRNP_1 chr20 4699568-4699587 + | CTGCAGCAGCTGGGGCAGTGGTGGGGGGCC #> PRNP_2 chr20 4699569-4699588 + | TGCAGCAGCTGGGGCAGTGGTGGGGGGCCT #> PRNP_3 chr20 4699575-4699594 + | AGCTGGGGCAGTGGTGGGGGGCCTTGGCGG #> PRNP_4 chr20 4699578-4699597 + | TGGGGCAGTGGTGGGGGGCCTTGGCGGCTA #> targetname targetstart targetend crisprname crisprspacer #> <character> <integer> <integer> <character> <character> #> CFTR_1 CFTR 117559593 117559595 CFTR_1 ATTAAAGAAAATATCATCTT #> CFTR_2 CFTR 117559593 117559595 CFTR_2 TCTGTATCTATATTCATCAT #> HBB_1 HBB 5227002 5227002 HBB_1 CATGGTGCATCTGACTCCTG #> HBB_2 HBB 5227002 5227002 HBB_2 GTAACGGCAGACTTCTCCTC #> HEXA_1 HEXA 72346580 72346583 HEXA_1 TGTAGAAATCCTTCCAGTCA #> HEXA_2 HEXA 72346580 72346583 HEXA_2 ATCCTTCCAGTCAGGGCCAT #> PRNP_1 PRNP 4699600 4699600 PRNP_1 AGCAGCTGGGGCAGTGGTGG #> PRNP_2 PRNP 4699600 4699600 PRNP_2 GCAGCTGGGGCAGTGGTGGG #> PRNP_3 PRNP 4699600 4699600 PRNP_3 GGGGCAGTGGTGGGGGGCCT #> PRNP_4 PRNP 4699600 4699600 PRNP_4 GCAGTGGTGGGGGGCCTTGG #> crisprpam primer revtranscript #> <character> <character> <character> #> CFTR_1 TGG AAGAAAATATCAT CTTTGGTGTTTCCTAT #> CFTR_2 AGG TATCTATATTCAT CATAGGAAACACCAAA #> HBB_1 AGG GTGCATCTGACTC CTGAGGAGAAGTCTGC #> HBB_2 AGG CGGCAGACTTCTC CTCAGGAGTCAGATGC #> HEXA_1 GGG GAAATCCTTCCAG TCAGGGCCATAGGATA #> HEXA_2 AGG TTCCAGTCAGGGC CATAGGATATACGGTT #> PRNP_1 GGG GCTGGGGCAGTGG TGGGGGGCCTTGGCGG #> PRNP_2 GGG CTGGGGCAGTGGT GGGGGGCCTTGGCGGC #> PRNP_3 TGG CAGTGGTGGGGGG CCTTGGCGGCTACATG #> PRNP_4 CGG TGGTGGGGGGCCT TGGCGGCTACATGCTG #> extension Doench2014 #> <character> <numeric> #> CFTR_1 ATAGGAAACACCAAAGATGATATTTTCTT 0.0339998970537971 #> CFTR_2 TTTGGTGTTTCCTATGATGAATATAGATA 0.223370560176775 #> HBB_1 GCAGACTTCTCCTCAGGAGTCAGATGCAC 0.13111422370078 #> HBB_2 GCATCTGACTCCTGAGGAGAAGTCTGCCG 0.413980645125447 #> HEXA_1 TATCCTATGGCCCTGACTGGAAGGATTTC 0.107283614825074 #> HEXA_2 AACCGTATATCCTATGGCCCTGACTGGAA 0.0394252780338432 #> PRNP_1 CCGCCAAGGCCCCCCACCACTGCCCCAGC 0.00994899162104746 #> PRNP_2 GCCGCCAAGGCCCCCCACCACTGCCCCAG 0.0449662673405108 #> PRNP_3 CATGTAGCCGCCAAGGCCCCCCACCACTG 0.00996253852673941 #> PRNP_4 CAGCATGTAGCCGCCAAGGCCCCCCACCA 0.20935862741641 #> ------- #> seqinfo: 455 sequences (1 circular) from hg38 genome# reticulate::use_condaenv('azienv') # reticulate::import('azimuth') # spacers %<>% add_efficiency(bsgenome, 'Doench2016') # filter_efficient(spacers, bsgenome, 'Doench2016', 0.4) # TFBS example #------------- bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr') bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10 targets <- extend(bed_to_granges(bedfile, 'mm10'))#>#>#># reticulate::use_condaenv('azienv') # reticulate::import('azienv') # (spacers %<>% add_offtargets(bsgenome, targets)) # (spacers %>% add_efficiency(bsgenome, 'Doench2014')) # (spacers %>% add_efficiency(bsgenome, 'Doench2016'))