Add context for Doench2016 scoring

add_context(spacers, bsgenome, verbose = TRUE)

Arguments

spacers

GRanges-class: spacer ranges

bsgenome

BSgenome-class

verbose

logical(1)

Value

character vector

Examples

# PE example #----------- require(magrittr)
#> Lade nötiges Paket: magrittr
bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp HBB = 'chr11:5227002:-', # snp HEXA = 'chr15:72346580-72346583:-', # del CFTR = 'chr7:117559593-117559595:+'), # ins bsgenome) spacers <- find_spacers(extend_for_pe(gr), bsgenome, complement=FALSE)
(add_context(spacers, bsgenome))
#> Add (4-23-3) contextseqs
#> GRanges object with 10 ranges and 7 metadata columns: #> seqnames ranges strand | targetname targetstart #> <Rle> <IRanges> <Rle> | <character> <integer> #> CFTR_f chr7 117559575-117559594 + | CFTR 117559593 #> CFTR_r chr7 117559606-117559625 - | CFTR 117559593 #> HBB_f chr11 5226984-5227003 + | HBB 5227002 #> HBB_r chr11 5227003-5227022 - | HBB 5227002 #> HEXA_f_1 chr15 72346551-72346570 + | HEXA 72346580 #> HEXA_f_2 chr15 72346558-72346577 + | HEXA 72346580 #> PRNP_f_1 chr20 4699568-4699587 + | PRNP 4699600 #> PRNP_f_2 chr20 4699569-4699588 + | PRNP 4699600 #> PRNP_f_3 chr20 4699575-4699594 + | PRNP 4699600 #> PRNP_f_4 chr20 4699578-4699597 + | PRNP 4699600 #> targetend crisprname crisprspacer crisprpam #> <integer> <character> <character> <character> #> CFTR_f 117559595 CFTR_f ATTAAAGAAAATATCATCTT TGG #> CFTR_r 117559595 CFTR_r TCTGTATCTATATTCATCAT AGG #> HBB_f 5227002 HBB_f GTAACGGCAGACTTCTCCTC AGG #> HBB_r 5227002 HBB_r CATGGTGCATCTGACTCCTG AGG #> HEXA_f_1 72346583 HEXA_f_1 TGTAGAAATCCTTCCAGTCA GGG #> HEXA_f_2 72346583 HEXA_f_2 ATCCTTCCAGTCAGGGCCAT AGG #> PRNP_f_1 4699600 PRNP_f_1 AGCAGCTGGGGCAGTGGTGG GGG #> PRNP_f_2 4699600 PRNP_f_2 GCAGCTGGGGCAGTGGTGGG GGG #> PRNP_f_3 4699600 PRNP_f_3 GGGGCAGTGGTGGGGGGCCT TGG #> PRNP_f_4 4699600 PRNP_f_4 GCAGTGGTGGGGGGCCTTGG CGG #> crisprcontext #> <character> #> CFTR_f CACCATTAAAGAAAATATCATCTTTGGTGT #> CFTR_r CGCTTCTGTATCTATATTCATCATAGGAAA #> HBB_f GGCAGTAACGGCAGACTTCTCCTCAGGAGT #> HBB_r ACACCATGGTGCATCTGACTCCTGAGGAGA #> HEXA_f_1 ACTATGTAGAAATCCTTCCAGTCAGGGCCA #> HEXA_f_2 AGAAATCCTTCCAGTCAGGGCCATAGGATA #> PRNP_f_1 CTGCAGCAGCTGGGGCAGTGGTGGGGGGCC #> PRNP_f_2 TGCAGCAGCTGGGGCAGTGGTGGGGGGCCT #> PRNP_f_3 AGCTGGGGCAGTGGTGGGGGGCCTTGGCGG #> PRNP_f_4 TGGGGCAGTGGTGGGGGGCCTTGGCGGCTA #> ------- #> seqinfo: 455 sequences (1 circular) from hg38 genome
# TFBS example #------------- bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr') bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10 targets <- extend(bed_to_granges(bedfile, 'mm10'))
#> Read SRF.bed into GRanges
#> #> Attache Paket: 'BiocGenerics'
#> The following objects are masked from 'package:parallel': #> #> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, #> clusterExport, clusterMap, parApply, parCapply, parLapply, #> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from 'package:stats': #> #> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base': #> #> anyDuplicated, append, as.data.frame, basename, cbind, colnames, #> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, #> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, #> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, #> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, #> union, unique, unsplit, which, which.max, which.min
#> #> Attache Paket: 'S4Vectors'
#> The following object is masked from 'package:base': #> #> expand.grid
#> #> Attache Paket: 'IRanges'
#> The following object is masked from 'package:grDevices': #> #> windows
#> #> Attache Paket: 'Biostrings'
#> The following object is masked from 'package:base': #> #> strsplit
#> 1974 ranges on 21 chromosomes
spacers <- find_spacers(targets, bsgenome)
#> 3948 ranges after adding inverse strands
(spacers %<>% add_context(bsgenome))
#> Add (4-23-3) contextseqs
#> GRanges object with 10159 ranges and 9 metadata columns: #> seqnames ranges strand | name score #> <Rle> <IRanges> <Rle> | <character> <numeric> #> T0001_r_5 chr1 4712619-4712638 - | SRF_MA0083.3 10.49542 #> T0001_r_4 chr1 4712620-4712639 - | SRF_MA0083.3 10.49542 #> T0001_r_3 chr1 4712627-4712646 - | SRF_MA0083.3 10.49542 #> T0001_r_2 chr1 4712633-4712652 - | SRF_MA0083.3 10.49542 #> T0001_r_1 chr1 4712634-4712653 - | SRF_MA0083.3 10.49542 #> ... ... ... ... . ... ... #> T1974_r_3 chrY 89126492-89126511 - | SRF_MA0083.3 4.54393 #> T1974_f_3 chrY 89126496-89126515 + | SRF_MA0083.3 4.54393 #> T1974_r_2 chrY 89126499-89126518 - | SRF_MA0083.3 4.54393 #> T1974_r_1 chrY 89126500-89126519 - | SRF_MA0083.3 4.54393 #> T1974_f_4 chrY 89126501-89126520 + | SRF_MA0083.3 4.54393 #> targetname targetstart targetend crisprname crisprspacer #> <character> <integer> <integer> <character> <character> #> T0001_r_5 T0001 4712628 4712643 T0001_r_5 ATATAAGGGCATTGGAAGAA #> T0001_r_4 T0001 4712628 4712643 T0001_r_4 AATATAAGGGCATTGGAAGA #> T0001_r_3 T0001 4712628 4712643 T0001_r_3 TGGAGACAATATAAGGGCAT #> T0001_r_2 T0001 4712628 4712643 T0001_r_2 TTCTGCTGGAGACAATATAA #> T0001_r_1 T0001 4712628 4712643 T0001_r_1 CTTCTGCTGGAGACAATATA #> ... ... ... ... ... ... #> T1974_r_3 T1974 89126494 89126509 T1974_r_3 TTTGCCCATACTAGGGAGAG #> T1974_f_3 T1974 89126494 89126509 T1974_f_3 CCCTAGTATGGGCAAATATA #> T1974_r_2 T1974 89126494 89126509 T1974_r_2 CCATATATTTGCCCATACTA #> T1974_r_1 T1974 89126494 89126509 T1974_r_1 TCCATATATTTGCCCATACT #> T1974_f_4 T1974 89126494 89126509 T1974_f_4 GTATGGGCAAATATATGGAA #> crisprpam crisprcontext #> <character> <character> #> T0001_r_5 GGG GACAATATAAGGGCATTGGAAGAAGGGAGT #> T0001_r_4 AGG AGACAATATAAGGGCATTGGAAGAAGGGAG #> T0001_r_3 TGG CTGCTGGAGACAATATAAGGGCATTGGAAG #> T0001_r_2 GGG CACCTTCTGCTGGAGACAATATAAGGGCAT #> T0001_r_1 AGG ACACCTTCTGCTGGAGACAATATAAGGGCA #> ... ... ... #> T1974_r_3 CGG TATATTTGCCCATACTAGGGAGAGCGGCTT #> T1974_f_3 TGG CTCTCCCTAGTATGGGCAAATATATGGAAT #> T1974_r_2 GGG CATTCCATATATTTGCCCATACTAGGGAGA #> T1974_r_1 AGG CCATTCCATATATTTGCCCATACTAGGGAG #> T1974_f_4 TGG CCTAGTATGGGCAAATATATGGAATGGAAA #> ------- #> seqinfo: 66 sequences (1 circular) from mm10 genome