Commit 1bd74215 authored by aditya.bhagwat's avatar aditya.bhagwat
Browse files

Fix pam double count

parent 40100b33
Package: BSgenome.SarsCov2.UCSC.wuhCor1
Title: Full genome sequence for SARS-Cov-2 (UCSC version wuhCor1)
Description: Contains sequence of Wuhan Coronavirus.
Version: 0.0.1
Author: Aditya Bhagwat
Maintainer: Aditya Bhagwat <aditya.bhagwat@mpi-bn.mpg.de>
Depends: BSgenome (>= 1.54.0)
Imports: BSgenome
Suggests:
License: Artistic-2.0
organism: SARS-Cov-2
common_name: Wuhan Coronavirus
provider: UCSC
provider_version: wuhCor1
release_date: Jan. 2020
release_name: wuhCor1
source_url: http://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/bigZips
biocViews: AnnotationData, Genetics, BSgenome, SarsCov2
import(BSgenome)
### Don't export BSgenome.SarsCov2.UCSC.wuhCor1 or SarsCov2 (the new and old names of the
### BSgenome object defined in this package): the object is created and its 2
### names are dynamically exported at load time (refer to R/zzz.R for the
### details).
#export(BSgenome.SarsCov2.UCSC.wuhCor1)
#export(SarsCov2)
###
###
.pkgname <- "BSgenome.SarsCov2.UCSC.wuhCor1"
.seqnames <- NULL
.circ_seqs <- NULL
.mseqnames <- NULL
.onLoad <- function(libname, pkgname)
{
if (pkgname != .pkgname)
stop("package name (", pkgname, ") is not ",
"the expected name (", .pkgname, ")")
extdata_dirpath <- system.file("extdata", package=pkgname,
lib.loc=libname, mustWork=TRUE)
## Make and export BSgenome object.
bsgenome <- BSgenome(
organism="SARS-Cov-2",
common_name="Wuhan Coronavirus",
provider="UCSC",
provider_version="wuhCor1",
release_date="Jan. 2020",
release_name="wuhCor1",
source_url="http://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/bigZips",
seqnames=.seqnames,
circ_seqs=.circ_seqs,
mseqnames=.mseqnames,
seqs_pkgname=pkgname,
seqs_dirpath=extdata_dirpath
)
ns <- asNamespace(pkgname)
objname <- pkgname
assign(objname, bsgenome, envir=ns)
namespaceExport(ns, objname)
old_objname <- "SarsCov2"
assign(old_objname, bsgenome, envir=ns)
namespaceExport(ns, old_objname)
}
\name{BSgenome.SarsCov2.UCSC.wuhCor1}
\docType{package}
\alias{BSgenome.SarsCov2.UCSC.wuhCor1-package}
\alias{BSgenome.SarsCov2.UCSC.wuhCor1}
\alias{SarsCov2}
\title{Full genome sequence for SARS-Cov-2 (UCSC version wuhCor1)}
\description{
Contains sequence of Wuhan Coronavirus.
}
\details{
}
\note{
This BSgenome data package was made from the following source data files:
\preformatted{
wuhCor1.2bit from http://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/bigZips
}
See \code{?\link[BSgenome]{BSgenomeForge}} and the BSgenomeForge
vignette (\code{vignette("BSgenomeForge")}) in the \pkg{BSgenome}
software package for how to make a BSgenome data package.
}
\author{Aditya Bhagwat}
\seealso{
\itemize{
\item \link[BSgenome]{BSgenome} objects and the
\code{\link[BSgenome]{available.genomes}} function
in the \pkg{BSgenome} software package.
\item \link[Biostrings]{DNAString} objects in the \pkg{Biostrings}
package.
\item The BSgenomeForge vignette (\code{vignette("BSgenomeForge")})
in the \pkg{BSgenome} software package for how to make a BSgenome
data package.
}
}
\examples{
BSgenome.SarsCov2.UCSC.wuhCor1
genome <- BSgenome.SarsCov2.UCSC.wuhCor1
head(seqlengths(genome))
## ---------------------------------------------------------------------
## Genome-wide motif searching
## ---------------------------------------------------------------------
## See the GenomeSearching vignette in the BSgenome software
## package for some examples of genome-wide motif searching using
## Biostrings and the BSgenome data packages:
if (interactive())
vignette("GenomeSearching", package="BSgenome")
}
\keyword{package}
\keyword{data}
......@@ -6,6 +6,7 @@ export(add_context)
export(add_efficiency)
export(add_genome_counts)
export(add_inverse_strand)
export(add_match_counts)
export(add_seq)
export(add_specificity)
export(add_target_counts)
......@@ -31,8 +32,6 @@ export(gr2dt)
export(has_been_indexed)
export(index_genome)
export(index_targets)
export(match_seqs)
export(match_spacers)
export(plot_intervals)
export(plot_karyogram)
export(up_flank)
......@@ -64,10 +63,13 @@ importFrom(GenomeInfoDb,standardChromosomes)
importFrom(GenomicRanges,"mcols<-")
importFrom(GenomicRanges,GRanges)
importFrom(GenomicRanges,mcols)
importFrom(Rbowtie,bowtie)
importFrom(assertive.base,assert_all_are_false)
importFrom(assertive.base,assert_all_are_true)
importFrom(assertive.base,is_identical_to_true)
importFrom(assertive.files,assert_all_are_dirs)
importFrom(assertive.files,assert_all_are_existing_files)
importFrom(assertive.numbers,assert_all_are_greater_than_or_equal_to)
importFrom(assertive.numbers,assert_all_are_less_than)
importFrom(assertive.properties,assert_has_names)
importFrom(assertive.properties,has_names)
......@@ -84,9 +86,11 @@ importFrom(data.table,":=")
importFrom(data.table,.SD)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(data.table,fread)
importFrom(data.table,setnafill)
importFrom(data.table,setnames)
importFrom(data.table,setorderv)
importFrom(data.table,tstrsplit)
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,facet_wrap)
......@@ -107,6 +111,7 @@ importFrom(magrittr,extract)
importFrom(magrittr,extract2)
importFrom(magrittr,set_names)
importFrom(methods,as)
importFrom(stringi,stri_count_fixed)
importFrom(stringi,stri_detect_regex)
importFrom(stringi,stri_locate_all_fixed)
importFrom(stringi,stri_locate_all_regex)
......
#' @importFrom assertive.base assert_all_are_true is_identical_to_true
#' @importFrom assertive.base assert_all_are_false
#' @importFrom assertive.files assert_all_are_dirs
#' @importFrom assertive.files assert_all_are_existing_files
#' @importFrom assertive.numbers assert_all_are_greater_than_or_equal_to
#' @importFrom assertive.numbers assert_all_are_less_than
#' @importFrom assertive.properties has_names assert_has_names
#' @importFrom assertive.reflection is_windows
......@@ -16,6 +18,7 @@
#' @importFrom BSgenome getSeq getBSgenome
#' @importFrom data.table := data.table as.data.table setnames
#' @importFrom data.table setnames setorderv setnafill .SD
#' @importFrom data.table tstrsplit fread
#' @importFrom GenomeInfoDb genome
#' @importFrom GenomeInfoDb seqinfo seqinfo<-
#' @importFrom GenomeInfoDb seqlevels seqlevels<- seqlevelsInUse
......@@ -31,9 +34,11 @@
#' @importFrom magrittr %>% %<>% and
#' @importFrom magrittr extract extract2 set_names
#' @importFrom methods as
#' @importFrom Rbowtie bowtie
#' @importFrom tidyr separate_rows
#' @importFrom utils getFromNamespace head tail
#' @importFrom utils read.csv read.table
#' @importFrom stringi stri_count_fixed
#' @importFrom stringi stri_detect_regex stri_locate_all_fixed
#' @importFrom stringi stri_locate_all_regex
#' @importFrom stringi stri_replace_first_fixed
......
This diff is collapsed.
require(magrittr)
# AnnotationHub has 2bit files for 224 organisms
ah <- AnnotationHub::AnnotationHub()
ah %<>% extract(.$rdataclass == 'TwoBitFile')
length(unique(ah$species))
table(ah$species)
# Create a BSgenome for SarsCov2
require(BSgenome)
url <- 'http://hgdownload.soe.ucsc.edu/goldenPath/wuhCor1/bigZips/wuhCor1.2bit'
download.file(
url, paste0('../multicrisprout/indexedgenomes/SarsCov2', basename(url)))
forgeBSgenomeDataPkg(
'../multicrisprout/indexedgenomes/SarsCov2/BSgenome.SarsCov2.UCSC.wuhCor1-seed',
destdir = '../multicrisprout/indexedgenomes/SarsCov2/SarsCov2')
bspackage <- '../multicrisprout/indexedgenomes/SarsCov2/SarsCov2/BSgenome.SarsCov2.UCSC.wuhCor1'
devtools::check(bspackage)
devtools::install(bspackage)
require(BSgenome.SarsCov2.UCSC.wuhCor1)
bsgenome <- BSgenome.SarsCov2.UCSC.wuhCor1::BSgenome.SarsCov2.UCSC.wuhCor1
#devtools::build(bspackage)
# Multicrispr
# Suppose we want to study the role of a SarsCov2 protein through a Crispr-based
# technology. One such protein is the protein ORF3, for which we see that the
# UCSC genome browser currently has no known function
require(multicrispr)
gr <- char_to_granges('NC_045512v2:25393-26220:+', bsgenome)
BSgenome::getSeq(bsgenome, gr)
spacers <- multicrispr::find_spacers(gr, bsgenome, complement = FALSE)
index_genome(bsgenome)
spacers %<>% add_genome_counts(bsgenome)
# Is this correct?
# Suppose we
spacers %<>%
nsp5 <- 'agugguuuuagaaaaauggcauucccaucugguaaaguugaggguuguaugguacaaguaacuugugguacaacuacacuuaacggucuuuggcuugaugacguaguuuacuguccaagacaugugaucugcaccucugaagacaugcuuaacccuaauuaugaagauuuacucauucguaagucuaaucauaauuucuugguacaggcugguaauguucaacucaggguuauuggacauucuaugcaaaauuguguacuuaagcuuaagguugauacagccaauccuaagacaccuaaguauaaguuuguucgcauucaaccaggacagacuuuuucaguguuagcuuguuacaaugguucaccaucugguguuuaccaaugugcuaugaggcccaauuucacuauuaaggguucauuccuuaaugguucaugugguaguguugguuuuaacauagauuaugacugugucucuuuuuguuacaugcaccauauggaauuaccaacuggaguucaugcuggcacagacuuagaagguaacuuuuauggaccuuuuguugacaggcaaacagcacaagcagcugguacggacacaacuauuacaguuaauguuuuagcuugguuguacgcugcuguuauaaauggagacaggugguuucucaaucgauuuaccacaacucuuaaugacuuuaaccuuguggcuaugaaguacaauuaugaaccucuaacacaagaccauguugacauacuaggaccucuuucugcucaaacuggaauugccguuuuagauaugugugcuucauuaaaagaauuacugcaaaaugguaugaauggacguaccauauuggguagugcuuuauuagaagaugaauuuacaccuuuugauguuguuagacaaugcucagguguuacuuuccaa'
nsp5 %<>% toupper()
nsp5 %<>% stringi::stri_replace_all_fixed('U', 'T')
stringi::stri_detect_fixed(bsgenome$NC_045512v2, nsp5)
bsgenome$NC_045512v2
\ No newline at end of file
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/07_filter_specific.R
\name{add_genome_counts}
\name{add_match_counts}
\alias{add_match_counts}
\alias{add_target_counts}
\alias{add_genome_counts}
\title{Add genome counts}
\title{Add genome/targetset match counts}
\usage{
add_match_counts(
spacers,
indexdir,
norc,
mismatches = 2,
outdir = OUTDIR,
pam = "NGG",
verbose = TRUE
)
add_target_counts(
spacers,
targets,
bsgenome,
mismatches = 2,
pam = "NGG",
outdir = OUTDIR,
verbose = TRUE
)
add_genome_counts(
spacers,
bsgenome = getBSgenome(genome(spacers)[1]),
......@@ -17,23 +39,25 @@ add_genome_counts(
\arguments{
\item{spacers}{spacer \code{\link[GenomicRanges]{GRanges-class}}}
\item{bsgenome}{\code{\link[BSgenome]{BSgenome-class}}}
\item{mismatches}{number (default 2): max number of mismatches to consider}
\item{pam}{string (default 'NGG') pam pattern to expand}
\item{outdir}{dir where output is written to}
\item{indexedgenomesdir}{string: dir with indexed genomes}
\item{pam}{string (default 'NGG') pam pattern to expand}
\item{verbose}{TRUE (default) or FALSE}
\item{targets}{target \code{\link[GenomicRanges]{GRanges-class}}}
\item{bsgenome}{\code{\link[BSgenome]{BSgenome-class}}}
\item{indexedgenomesdir}{string: dir with indexed genomes}
}
\value{
spacer GRanges with additional mcols
}
\description{
Count spacer matches to genome and add to GRanges
Count spacer matches to targetset/genome and add to GRanges
}
\details{
Expands iupac amgiguities in the pam sequence.
......@@ -41,28 +65,27 @@ Matches all resulting sequences against (indexes) target and genome.
Adds match counts to GRanges object, and then returns it.
}
\examples{
# TFBS example
#-------------
bs <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
spacers <- find_spacers(targets, bs)
add_target_counts(spacers, targets, bs)
add_match_counts( spacers, index_targets(targets, bs), norc=FALSE)
# add_genome_counts(spacers, bs, indexedgenomesdir=index_genome(bs))
# PE example
#-----------
require(magrittr)
bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
bs <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp
HBB = 'chr11:5227002:-', # snp
HEXA = 'chr15:72346580-72346583:-', # del
CFTR = 'chr7:117559593-117559595:+'), # ins
bsgenome)
spacers <- find_pe_spacers(gr, bsgenome)
# index_genome(bsgenome)
# add_genome_counts(spacers, bsgenome, mismatches=1)
# TFBS example
#-------------
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
spacers <- find_spacers(targets, bsgenome)
# index_genome(bsgenome)
# add_genome_counts(spacers, bsgenome)
# add_genome_counts(spacers, bsgenome, mismatches=3)
bs)
spacers <- find_pe_spacers(gr, bs)
# add_genome_counts(spacers, bs, indexedgenomesdir = index_genome(bs))
}
\seealso{
\code{\link{index_genome}}, \code{\link{index_targets}}
......
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/07_filter_specific.R
\name{add_target_counts}
\alias{add_target_counts}
\title{Add target counts}
\usage{
add_target_counts(
spacers,
targets,
bsgenome,
mismatches = 2,
pam = "NGG",
outdir = OUTDIR,
verbose = TRUE
)
}
\arguments{
\item{spacers}{spacer \code{\link[GenomicRanges]{GRanges-class}}}
\item{targets}{target \code{\link[GenomicRanges]{GRanges-class}}}
\item{bsgenome}{\code{\link[BSgenome]{BSgenome-class}}}
\item{mismatches}{number (default 2): max number of mismatches to consider}
\item{pam}{string (default 'NGG') pam pattern to expand}
\item{outdir}{dir where output is written to}
\item{verbose}{TRUE (default) or FALSE}
}
\value{
updated spacer \code{\link[GenomicRanges]{GRanges-class}}
}
\description{
Count spacer matches among targets
}
\details{
Expands iupac amgiguities in the pam sequence.
Matches all resulting sequences against (indexes) target and genome.
Adds match counts to GRanges object, and then returns it.
}
\examples{
# TFBS example
#-------------
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
spacers <- find_spacers(targets, bsgenome)
add_target_counts(spacers, targets, bsgenome)
}
\seealso{
\code{\link{index_genome}}, \code{\link{index_targets}}
}
......@@ -25,7 +25,7 @@ Bowtie index genome
}
\examples{
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
index_genome(bsgenome)
#index_genome(bsgenome)
bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
index_genome(bsgenome)
#index_genome(bsgenome)
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/07_filter_specific.R
\name{match_seqs}
\alias{match_seqs}
\title{Match spacer sequences}
\usage{
match_seqs(
seqs,
indexdir,
norc,
mismatches = 2,
outdir = OUTDIR,
verbose = TRUE
)
}
\arguments{
\item{seqs}{character vector: sequences to match against indexed ref}
\item{indexdir}{string: dir containing indexed reference.
This can be an indexed genome( \code{\link{index_genome}}
It can also be indexed targets (\code{\link{index_targets}})}
\item{norc}{TRUE or FALSE: whether to run bowtie also with revcompls
Generally TRUE for genome and FALSE for target matches,
because target ranges generally include both strands.}
\item{mismatches}{max number of mismatches to consider}
\item{outdir}{string: multicrispr output directory}
\item{verbose}{TRUE (default) or FALSE}
}
\value{
data.table
}
\description{
Count matches to indexed target/genome
}
\examples{
# TFBS example
#-------------
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
indexdir <- index_targets(targets, bsgenome)
spacers <- find_spacers(targets, bsgenome)
seqs <- unique(paste0(spacers$crisprspacer, spacers$crisprpam))
match_seqs(seqs, indexdir, norc=FALSE)
match_seqs(seqs, indexdir, norc=FALSE, mismatches=3)
}
\seealso{
\code{\link{index_genome}}, \code{\link{index_targets}}
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/07_filter_specific.R
\name{match_spacers}
\alias{match_spacers}
\title{Match spacers}
\usage{
match_spacers(
spacers,
indexdir,
norc,
mismatches = 2,
outdir = OUTDIR,
pam = "NGG",
verbose = TRUE
)
}
\arguments{
\item{spacers}{spacer \code{\link[GenomicRanges]{GRanges-class}}}
\item{indexdir}{string: dir containing indexed reference.
This can be an indexed genome( \code{\link{index_genome}}
It can also be indexed targets (\code{\link{index_targets}})}
\item{norc}{TRUE or FALSE: whether to run bowtie also with revcompls
Generally TRUE for genome and FALSE for target matches,
because target ranges generally include both strands.}
\item{mismatches}{number (default 2): max number of mismatches to consider}
\item{outdir}{string: file where to output bowtie results}
\item{pam}{string (default 'NGG') pam pattern to expand}
\item{verbose}{TRUE (default) or FALSE}
}
\value{
data.table
}
\description{
Count matches to indexed target/genome and add to GRanges
}
\details{
Expands iupac amgiguities in the pam sequence.
Matches all resulting sequences against (indexes) target and genome.
Adds match counts to GRanges object, and then returns it.
}
\examples{
# TFBS example
#-------------
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr')
targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
indexdir <- index_targets(targets, bsgenome)
spacers <- find_spacers(targets, bsgenome)
match_spacers(spacers, indexdir, norc=FALSE, mismatches = 1)
}
\seealso{
\code{\link{index_genome}}, \code{\link{index_targets}}
}
......@@ -18,8 +18,8 @@ The task of designing a Crispr/Cas9 gRNA library to target a set of genomic loci
1. Define genomic targets
2. Extend or flank targets (e.g. extend to ensure 23-bp width, flank to target promotors/enhancers)
3. Find potential N20NGG Cas9 sites within target loci
4. Predict targeting efficiency and filter for expected efficiency.
5. Find offtarget (mis)matches and filter for offtarget-free Cas9
4. Find offtarget (mis)matches and filter for offtarget-free Cas9
5. Predict targeting efficiency and filter for expected efficiency.
6. Return offtarget-free, expectedly efficient Crispr/Cas9 gRNA site/sequence library
......@@ -27,6 +27,30 @@ The task of designing a Crispr/Cas9 gRNA library to target a set of genomic loci
knitr::opts_chunk$set(echo = TRUE, collapse = TRUE)
```
# Install and load package azimuth (for on-target scoring)
To enable Doench2016 computation, first install the (python) packag azimuth from Doench et al. (2016). This can be easily done using reticulate:
```{r, eval = FALSE}
# Install azimuth
# Important: run R(Studio) with admin privileges for this to work
install_azimuth <- FALSE
if (install_azimuth){
reticulate::conda_create('azienv', 'python=2.7') # Create condaenv 'azimuth'
reticulate::conda_install('azienv', 'azimuth', pip = TRUE) # Install azimuth
reticulate::conda_install('azienv', 'scikit-learn==0.17.1', pip = TRUE) # Install scikit-learn
}
```
Once installed, make sure for this R session to use this conda environment:
```{r}
require(reticulate)
if ('azienv' %in% conda_list()){
use_condaenv('azienv')
}
```
# Define targets
As a first step, genomic targets are defined as `GRanges` (R/BioC class to represent genomic ranges).
......@@ -69,21 +93,21 @@ targets0 <- bed_to_granges(bedfile, genome = 'mm10')
As a second step, extension or flanking may be required. The functions `left_flank`, `right_flank`, and `double_flank` flank target ranges, e.g. in order to target promoters or enhancers rather than TSS:
```{r}
# Left flank
targets <- left_flank( targets0, leftstart =-200, leftend = -1)
# Upstream flank
targets <- up_flank( targets0, -200, -1, plot = TRUE)
# Right flank
targets <- right_flank( targets0, rightstart= 1, rightend=200)
# Downstream flank
targets <- down_flank( targets0, +1, +200, plot = TRUE)
# Double flank
targets <- double_flank(targets0, leftstart = -200, leftend=-1, rightstart=1, rightend=200)
targets <- double_flank(targets0, -200, -1, +1, +200, plot = TRUE)
```
The function `extend` expands target ranges in either (or both) direction(s), ensuring proper width to contain 23 base Cas9 sites.
```{r}
targets <- extend(targets0, leftstart=-22, rightend=22)
targets <- extend(targets0, -22, 22, plot = TRUE)
```
......@@ -96,8 +120,20 @@ The associated plot shows that (nearly) all targets have Cas9 sites.
```{r}
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
targets <- add_seq(targets, bsgenome)
cas9s <- find_crisprsites(targets)
spacers <- find_spacers(targets, bsgenome)
```
# Prevent offtarget effects
We can restrict the Cas9 sites to only those that have no offtarget effects.
For purposes of demonstration, let us restrict ourselves to Y-chromosome targets.
```{r}
if (has_been_indexed(bsgenome)){
spacers %<>% add_specificity(targets, bsgenome)
spacers %<>% subset(specific==TRUE)
}
```
# Predict targeting efficiency
......@@ -105,8 +141,8 @@ cas9s <- find_crisprsites(targets)
Not all N~20~NGG gRNA sequences target equally well (even when matching sequence perfectly). For each position in the 23-bp gRNA sequence, the nucleotide present in current, previous and next position has an effect on targeting