Commit cb24872f authored by aditya.bhagwat's avatar aditya.bhagwat
Browse files

Split run_bowtie and aggregate_bowtie_results

parent e88030f1
Pipeline #130852 passed with stages
in 24 seconds
......@@ -125,18 +125,6 @@ index_targets <- function(
}
rm_pam_mismatches <- function(dt, pam){
mismatches <- NULL
if (pam=='NGG'){
pattern <- '20:[ACGT][>][ACGT]'
dt %<>% extract(!stri_detect_regex(mismatches, pattern))
} else {
message('\t\tOfftarget analysis requires additional work ',
'for pam patterns other than NGG ')
}
return(dt)
}
run_bowtie <- function(
crisprfasta, indexdir, outfile, norc, mismatches = 2, pam = 'NGG'
){
......@@ -160,16 +148,24 @@ run_bowtie <- function(
norc = norc, # no reverse complement
outfile = outfile,
force = TRUE)
}
# Read results
aggregate_bowtie_results <- function(outfile, pam){
dt <- fread(outfile,
col.names = c('crisprname', 'strand', 'target', 'position',
'crisprseq', 'quality', 'matches', 'mismatches'))
dt[, spacername := crisprname %>% tstrsplit('.', fixed=TRUE) %>% extract(1)]
dt[, pam := crisprname %>% tstrsplit('.', fixed=TRUE) %>% extract(2)]
# Rm pam mismatches
if (pam=='NGG'){
pattern <- '20:[ACGT][>][ACGT]'
dt %<>% extract(!stri_detect_regex(mismatches, pattern))
} else {
message('\t\tOfftarget analysis requires additional work ',
'for pam patterns other than NGG ')
}
# Count matches
dt %<>% rm_pam_mismatches(pam) # dont double count pams!
dt[ is.na(mismatches), mismatch := 0]
dt[!is.na(mismatches), mismatch := stri_count_fixed(mismatches, '>')]
results <- dt[ , .N, keyby = .(spacername, mismatch)] %>%
......@@ -233,8 +229,10 @@ add_match_counts <- function(spacers, indexdir, norc, mismatches = 2,
outfile <- crisprseq_matchfile(outdir, indexdir)
if (verbose) cmessage('\t\tBowtie crisprseqs to %s: %s',
basename(indexdir), outfile)
matches <- run_bowtie(
crisprfasta, indexdir, outfile, norc = norc, mismatches = mismatches, pam = pam)
run_bowtie( crisprfasta, indexdir, outfile, norc = norc,
mismatches = mismatches, pam = pam)
if (verbose) cmessage('\t\tAggregate Bowtie results')
matches <- aggregate_bowtie_results(outfile, pam)
spacerdt %<>% merge(matches, by='spacername', all=TRUE, sort=FALSE)
mcols(spacers) %<>% merge(spacerdt[, -1], by = 'crisprspacer', all = TRUE,
sort = FALSE)
......
......@@ -10,22 +10,18 @@ reticulate::use_condaenv('azienv')
bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10
bedfile <- system.file('extdata/SRF.bed', package='multicrispr')
targets <- multicrispr::bed_to_granges(bedfile, genome='mm10', plot = FALSE)
png('graphs/srf_karyogram.png')
multicrispr::plot_karyogram(targets, title = NULL)
dev.off()
#png('graphs/srf_karyogram.png')
#multicrispr::plot_karyogram(targets, title = NULL)
#dev.off()
extended <- extend(targets, -22, +22)
targets['T0151'] %>% plot_intervals()
targets['T0151'] %>% extend() %>% plot_intervals()
targets['T0151'] %>% up_flank() %>% plot_intervals()
targets['T0151'] %>% down_flank() %>% plot_intervals()
spacers <- extended %>%
find_spacers(bsgenome) %>%
add_specificity(extended, bsgenome) %>%
add_efficiency(bsgenome, method = 'Doench2016')
#targets['T0151'] %>% plot_intervals()
#targets['T0151'] %>% extend() %>% plot_intervals()
#targets['T0151'] %>% up_flank() %>% plot_intervals()
#targets['T0151'] %>% down_flank() %>% plot_intervals()
spacers <- extended %>% find_spacers(bsgenome)
spacers %<>% add_specificity(extended, bsgenome)
spacers %<>% add_efficiency(bsgenome, method = 'Doench2016')
spacers %>% subset(seqnames == 'chr1') %>%
subset(specific == TRUE) %>%
gr2dt() %>%
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment