Count matches to indexed target/genome

match_seqs(
  seqs,
  indexdir,
  norc,
  mismatches = 2,
  outdir = OUTDIR,
  verbose = TRUE
)

Arguments

seqs

character vector: sequences to match against indexed ref

indexdir

string: dir containing indexed reference. This can be an indexed genome( index_genome It can also be indexed targets (index_targets)

norc

TRUE or FALSE: whether to run bowtie also with revcompls Generally TRUE for genome and FALSE for target matches, because target ranges generally include both strands.

mismatches

max number of mismatches to consider

outdir

string: multicrispr output directory

verbose

TRUE (default) or FALSE

Value

data.table

See also

Examples

# PE example #----------- require(magrittr) bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp HBB = 'chr11:5227002:-', # snp HEXA = 'chr15:72346580-72346583:-', # del CFTR = 'chr7:117559593-117559595:+'), # ins bsgenome) spacers <- find_pe_spacers(gr, bsgenome)
# indexdir <- genome_dir(indexedgenomesdir = INDEXEDGENOMESDIR, bsgenome) # match_seqs(spacers$crisprspacer, indexdir, norc=TRUE, mismatches = 1) # TFBS example #------------- bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10 bedfile <- system.file('extdata/SRF.bed', package = 'multicrispr') targets <- extend(bed_to_granges(bedfile, genome = 'mm10'))
#> Read SRF.bed into GRanges
#> 1974 ranges on 21 chromosomes
indexdir <- index_targets(targets, bsgenome)
#> Index target sequences
#> 1974 target ranges
#> 3948 ranges after adding inverse strands
#> 3932 ranges after merging overlaps
#> Write seqs to ~/multicrisprout/targets.fa
#> Write index to ~/multicrisprout/targets
spacers <- find_spacers(targets, bsgenome)
#> 3948 ranges after adding inverse strands
seqs <- unique(paste0(spacers$crisprspacer, spacers$crisprpam)) match_seqs(seqs, indexdir, norc=FALSE)
#> Write reads to ~/multicrisprout/spacers.fa
#> Map reads: ~/multicrisprout/spacers/spacers_to_targets.txt
#> Load results
#> Count matches
#> readseq MM0 MM1 MM2 #> 1: ATATAAGGGCATTGGAAGAAGGG 2 1910 190 #> 2: AATATAAGGGCATTGGAAGAAGG 2 1908 210 #> 3: TGGAGACAATATAAGGGCATTGG 2 1948 188 #> 4: TTCTGCTGGAGACAATATAAGGG 4 1646 474 #> 5: CTTCTGCTGGAGACAATATAAGG 1644 480 38 #> --- #> 3040: AGATGAGGAATATGCAAATAAGG 2 0 0 #> 3041: TTGCATATTCCTCATCTGATTGG 2 0 0 #> 3042: AGTGTGCTTATAAGGGGGGAAGG 2 0 0 #> 3043: AGAGAGTGTGCTTATAAGGGGGG 2 4 6 #> 3044: AGCACACTCTCTTAGTAAATTGG 2 6 4
match_seqs(seqs, indexdir, norc=FALSE, mismatches=3)
#> Write reads to ~/multicrisprout/spacers.fa
#> Map reads: ~/multicrisprout/spacers/spacers_to_targets.txt
#> Load results
#> Count matches
#> readseq MM0 MM1 MM2 MM3 #> 1: ATATAAGGGCATTGGAAGAAGGG 2 1910 190 18 #> 2: AATATAAGGGCATTGGAAGAAGG 2 1908 210 22 #> 3: TGGAGACAATATAAGGGCATTGG 2 1948 188 22 #> 4: TTCTGCTGGAGACAATATAAGGG 4 1646 474 40 #> 5: CTTCTGCTGGAGACAATATAAGG 1644 480 38 12 #> --- #> 3040: AGATGAGGAATATGCAAATAAGG 2 0 0 0 #> 3041: TTGCATATTCCTCATCTGATTGG 2 0 0 0 #> 3042: AGTGTGCTTATAAGGGGGGAAGG 2 0 0 0 #> 3043: AGAGAGTGTGCTTATAAGGGGGG 2 4 6 2 #> 3044: AGCACACTCTCTTAGTAAATTGG 2 6 4 0