Count spacer matches among targets

add_target_counts(
  spacers,
  targets,
  bsgenome,
  mismatches = 2,
  pam = "NGG",
  outdir = OUTDIR,
  verbose = TRUE
)

Arguments

spacers

spacer GRanges-class

targets

target GRanges-class

bsgenome

BSgenome-class

mismatches

number (default 2): max number of mismatches to consider

pam

string (default 'NGG') pam pattern to expand

outdir

dir where output is written to

verbose

TRUE (default) or FALSE

Value

updated spacer GRanges-class

Details

Expands iupac amgiguities in the pam sequence. Matches all resulting sequences against (indexes) target and genome. Adds match counts to GRanges object, and then returns it.

See also

Examples

# TFBS example #------------- bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10 bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr') targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
#> Read SRF.bed into GRanges
#> 1974 ranges on 21 chromosomes
spacers <- find_spacers(targets, bsgenome)
#> 3948 ranges after adding inverse strands
add_target_counts(spacers, targets, bsgenome)
#> Index target sequences
#> 1974 target ranges
#> 3948 ranges after adding inverse strands
#> 3932 ranges after merging overlaps
#> Write seqs to ~/multicrisprout/targets/targets.fa
#> Write index to ~/multicrisprout/targets/targets
#> Add target counts
#> Expand iupac ambiguities in pam
#> Write reads to ~/multicrisprout/spacers.fa
#> Map reads: ~/multicrisprout/spacers/spacers_to_targets.txt
#> Load results
#> Count matches
#> GRanges object with 10159 ranges and 11 metadata columns: #> seqnames ranges strand | targetname targetstart #> <Rle> <IRanges> <Rle> | <character> <integer> #> T0001_r_5 chr1 4712619-4712638 - | T0001 4712628 #> T0001_r_4 chr1 4712620-4712639 - | T0001 4712628 #> T0001_r_3 chr1 4712627-4712646 - | T0001 4712628 #> T0001_r_2 chr1 4712633-4712652 - | T0001 4712628 #> T0001_r_1 chr1 4712634-4712653 - | T0001 4712628 #> ... ... ... ... . ... ... #> T1974_r_3 chrY 89126492-89126511 - | T1974 89126494 #> T1974_f_3 chrY 89126496-89126515 + | T1974 89126494 #> T1974_r_2 chrY 89126499-89126518 - | T1974 89126494 #> T1974_r_1 chrY 89126500-89126519 - | T1974 89126494 #> T1974_f_4 chrY 89126501-89126520 + | T1974 89126494 #> targetend crisprname crisprspacer crisprpam name #> <integer> <character> <character> <character> <character> #> T0001_r_5 4712643 T0001_r_5 ATATAAGGGCATTGGAAGAA GGG SRF_MA0083.3 #> T0001_r_4 4712643 T0001_r_4 AATATAAGGGCATTGGAAGA AGG SRF_MA0083.3 #> T0001_r_3 4712643 T0001_r_3 TGGAGACAATATAAGGGCAT TGG SRF_MA0083.3 #> T0001_r_2 4712643 T0001_r_2 TTCTGCTGGAGACAATATAA GGG SRF_MA0083.3 #> T0001_r_1 4712643 T0001_r_1 CTTCTGCTGGAGACAATATA AGG SRF_MA0083.3 #> ... ... ... ... ... ... #> T1974_r_3 89126509 T1974_r_3 TTTGCCCATACTAGGGAGAG CGG SRF_MA0083.3 #> T1974_f_3 89126509 T1974_f_3 CCCTAGTATGGGCAAATATA TGG SRF_MA0083.3 #> T1974_r_2 89126509 T1974_r_2 CCATATATTTGCCCATACTA GGG SRF_MA0083.3 #> T1974_r_1 89126509 T1974_r_1 TCCATATATTTGCCCATACT AGG SRF_MA0083.3 #> T1974_f_4 89126509 T1974_f_4 GTATGGGCAAATATATGGAA TGG SRF_MA0083.3 #> score T0 T1 T2 #> <numeric> <integer> <integer> <integer> #> T0001_r_5 10.49542 1 973 97 #> T0001_r_4 10.49542 1 960 105 #> T0001_r_3 10.49542 1 975 95 #> T0001_r_2 10.49542 2 827 237 #> T0001_r_1 10.49542 825 242 19 #> ... ... ... ... ... #> T1974_r_3 4.54393 5 3 7 #> T1974_f_3 4.54393 5 1 9 #> T1974_r_2 4.54393 5 1 9 #> T1974_r_1 4.54393 6 0 9 #> T1974_f_4 4.54393 6 0 9 #> ------- #> seqinfo: 66 sequences (1 circular) from mm10 genome