Commit 0f2d719d authored by Joerg Buescher's avatar Joerg Buescher
Browse files

removing need to change working directory to the path where the .mzML files are

parent e51e8320
# Generated by roxygen2: do not edit by hand
export(initialize_prm)
export(qqq_auto_integrate)
export(qqq_model_train)
export(process_batch)
export(train_model)
......@@ -62,7 +62,7 @@ for (id in 1:prm$nsmpl) {
# fine-adjust rt shift only for really polar
if (prm$polarmets && !prm$sosomets){
if (prm$verbose >=2 ){
pdf(file = "shiftplots.pdf", height = 6, width = 6, family = "Helvetica")
pdf(file = file.path(prm$batchdir, "shiftplots.pdf"), height = 6, width = 6, family = "Helvetica")
}
fitqual <- numeric()
for (id in 1:prm$nsmpl) {
......@@ -340,7 +340,7 @@ for (im in 1:prm$nmet) {
if (prm$verbose >= 1) {
# write log of quality scores for training to tsv file
write.table(qslog, file = 'qslog.tsv', sep = '\t', row.names = FALSE)
write.table(qslog, file = file.path(prm$batchdir, 'qslog.tsv'), sep = '\t', row.names = FALSE)
# write template for training solution to xlsx file
wb <- openxlsx::createWorkbook()
......@@ -357,7 +357,7 @@ if (prm$verbose >= 1) {
openxlsx::writeData(wb, 'Sheet1', prm$unirt[pstartmat[ ,im]] , startCol = nowcol+1, startRow = 4, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', prm$unirt[pendmat[ ,im]] , startCol = nowcol+2, startRow = 4, rowNames = FALSE, colNames = FALSE)
}
openxlsx::saveWorkbook(wb, "manual_peakcheck_template.xlsx", overwrite = TRUE)
openxlsx::saveWorkbook(wb, file.path(prm$batchdir, "manual_peakcheck_template.xlsx"), overwrite = TRUE)
}
msd # return msd
......
......@@ -31,9 +31,6 @@ initialize_prm <- function() {
prm$pathprefix <- '/'
}
# Set up logging (log to file only when running on server) ------------------------------
prm$log_con <- file("R_messages.log",open="a")
# Set global parameters for peak detection
prm$timerange <- c(0,5) # time range in minutes --> will be re-determined based on first sample further down
prm$samplingfrequency <- 2 # samplingfrequency (time resolution of analysis) in Hz
......
......@@ -8,8 +8,8 @@ peak_detection <- function(metab, smpl, prm) {
cat('Now integrating peaks in from all files. \n Counting files...', file=prm$log_con)
print(' ')
qslog_i <- 1
# initialize varibiables
# initialize varibiables
# outer list: metabolites
# inner lists: samples
msdelement <- list('x1' = rep(0,prm$nscan) , # quantifier chromatogram
......@@ -52,7 +52,7 @@ peak_detection <- function(metab, smpl, prm) {
cat(paste(as.character(id),'-'), file=prm$log_con)
print(paste0(" Peak detection of file: ", id, ": ", smpl$samplenames[id], ' '))
}
#loop over metabolites (quantifier ids)
for(im in 1:prm$nmet){
# im <- 1
......@@ -62,7 +62,7 @@ peak_detection <- function(metab, smpl, prm) {
if (prm$verbose > 0) {
cat('\r', paste(" Working on: ", metab[[im]]$name ," - ","ID: ",im, ' '))
}
# Set default values and continue if metabolite WAS NOT MEASURED in sample
if (!metab[[im]]$measured) {
if (prm$verbose >= 3) {
......@@ -77,17 +77,17 @@ peak_detection <- function(metab, smpl, prm) {
msd[[im]][[id]]$height13 <- 0
msd[[im]][[id]]$RT <- NA
msd[[im]][[id]]$qs <- -1
next
next
} # not measured
# start new metabolite parameter set
candidates <- list()
candqs <- numeric()
candrt <- numeric()
anypeak <- FALSE
x1 <- x2 <- x3 <- rep(0,prm$nscan)
# Get rollmean of quantifier
if (length(smpl$chroms[[id]][[ metab[[im]]$quant$MRM ]] ) > prm$smoothnum ) {
nonanpos <- !is.na(smpl$chroms[[id]][[metab[[im]]$quant$MRM]])
......@@ -100,7 +100,7 @@ peak_detection <- function(metab, smpl, prm) {
}
}
nowdata <- x1
# Get rollmean 12c qualifiers
if (length(smpl$chroms[[id]][[ metab[[im]]$qual12c$MRM[1] ]] ) > prm$smoothnum ) {
nonanpos <- !is.na(smpl$chroms[[id]][[metab[[im]]$qual12c$MRM[1]]])
......@@ -111,7 +111,7 @@ peak_detection <- function(metab, smpl, prm) {
nowdata <- cbind(nowdata,x2)
}
} else {
if (msd[[im]][[id]]$foundchrom[2]) {
print(paste('12C qualifier not found for', metab[[im]]$name, 'in', smpl$samplenames[id], ' '))
} else {
......@@ -120,7 +120,7 @@ peak_detection <- function(metab, smpl, prm) {
}
}
}
# Get 13c qualifier
if (length(smpl$chroms[[id]][[ metab[[im]]$qual13c$MRM[1] ]] ) > prm$smoothnum ) {
nonanpos <- !is.na(smpl$chroms[[id]][[metab[[im]]$qual13c$MRM[1]]])
......@@ -131,7 +131,7 @@ peak_detection <- function(metab, smpl, prm) {
nowdata <- cbind(nowdata,x3)
}
} else {
if (msd[[im]][[id]]$foundchrom[3]) {
print(paste('13C qualifier not found for', metab[[im]]$name, 'in', smpl$samplenames[id], ' '))
} else {
......@@ -146,31 +146,31 @@ peak_detection <- function(metab, smpl, prm) {
msd[[im]][[id]]$x2 <- x2
msd[[im]][[id]]$x3 <- x3
msd[[im]][[id]]$combidata <- nowdata
# !-------
# GET PEAK CANDIDATES
# GET PEAK CANDIDATES
peak_list <- qqq_mrm_integrate(metab, smpl, prm, msd, id, im)
# !-------
# ToDo: move this for loop to end of qqq_mrm_integrate
# alternatively: get logic from qqq_mrm_integrate and place here
#
for (p in 1:length(peak_list)){
if (!is.list(peak_list[[p]]) ) {
candqs[p] <- -1
candrt[p] <- -1
next
}
}
# To check: maybe no need to write peak_list to msd[[im]][[id]]$peaks
candqs[p] <- peak_list[[p]]$qs
candrt[p] <- peak_list[[p]]$RT
anypeak <- TRUE
} # endfor peaklist
# Check peak candidates qs scores against treshold
if (prm$verbos >= 2){
cat('\r', paste(metab[[im]]$name , " --> Peaks' QS:", paste(sapply(candqs,round,2),collapse=" ")))
......@@ -193,16 +193,16 @@ peak_detection <- function(metab, smpl, prm) {
msd[[im]][[id]]$RT <- NA
msd[[im]][[id]]$qs <- -1
msd[[im]][[id]]$qs <- numeric(length(peak_list$qs_list))
next
}
# POLARMETS
# for polar mets take best peak or if there is no convincing peak
if ( (prm$polarmets) || (length(nowgoodpeak) == 0) ) {
# Fill matrices of area, height, RT, qs
if ( (prm$polarmets) || (length(nowgoodpeak) == 0) ) {
# Fill matrices of area, height, RT, qs
bestpeak <- which(candqs == max(candqs))[1]
# add bestpeak to metabolite-sample-data
msd[[im]][[id]]$bestpeak <- bestpeak # for_cleanup: msd[[im]][[id]]$bestoverallpeak <-
if (prm$sum12c) {
......@@ -224,9 +224,9 @@ peak_detection <- function(metab, smpl, prm) {
msd[[im]][[id]]$ylim <- peak_list[[bestpeak]]$ylim
msd[[im]][[id]]$label1 <- peak_list[[bestpeak]]$label1
msd[[im]][[id]]$label2 <- peak_list[[bestpeak]]$label2
qsnames <- names( peak_list[[bestpeak]]$qs_list )
# append qslog info for training (last 2 cols are x12_13c_flags for qs median values)
for(p in 1:length(peak_list)){
metab_name <- metab[[im]]$name
......@@ -235,7 +235,7 @@ peak_detection <- function(metab, smpl, prm) {
peak_RT <- peak_list[[p]]$RT
qs_scores <- peak_list[[p]]$qs_list
x12_13c_flags <- msd[[im]][[id]]$foundchrom[2:3]
if (qslog_i == 1) {
qslog <- c(metab_name,sample_name,peak_id,peak_RT,as.numeric(qs_scores),x12_13c_flags)
} else {
......@@ -243,24 +243,24 @@ peak_detection <- function(metab, smpl, prm) {
}
qslog_i <- qslog_i + 1
}
# Write training data to tsv (quality scores for 1. model)
cat('\r', paste(' --> found Metabolite at RT: ', as.character(round(peak_list[[bestpeak]]$RT,2)) ) )
} # -> else for lipids comes here <-
} # loop over metabolites
}# loop over samples
colnames(qslog) = c("Metabolite", "id", "Peak", "RT", qsnames , "12C", "13C")
if (prm$verbose >= 1) {
# tsv is saver in case of comma in metabolite or sample name, like fructore 1,6 bisphosphate
write.table(qslog, file = 'qslog_initial.tsv', sep = '\t', row.names = FALSE)
write.table(qslog, file = file.path(prm$batchdir, 'qslog_initial.tsv'), sep = '\t', row.names = FALSE)
}
msd
}# function end
plot_peaks <- function(metab, smpl, msd, prm) {
# plots of MRMs (quantifier, 12c&13c-qualifier) (n: samples x metabolites)
# plots of MRMs (quantifier, 12c&13c-qualifier) (n: samples x metabolites)
# plots peakarea, RT(expected and measured)
# adds quality scores to labels (more qs in expertmode)
cat('Creating peakoverview.pdf ', file=prm$log_con)
print(' ')
if(prm$train_path != ""){
pdf_file <- paste0("peakoverview_", prm$train_path, ".pdf")
}else{
pdf_file <- "peakoverview.pdf"
}
pdf(file = pdf_file, height = 2*prm$nsmpl, width = 5* prm$nmet, family = "Helvetica")
pdf(file = file.path(prm$batchdir, pdf_file), height = 2*prm$nsmpl, width = 5* prm$nmet, family = "Helvetica")
par(mai = c(0.5, 0.5, 0.8, 0.5))
layout(matrix(c(1:(prm$nmet*prm$nsmpl)), prm$nsmpl, prm$nmet, byrow = TRUE)) # initiate subplots
# Reverse looping and indexing according to Jörg plotting logic (samples first, then metabolites)
for (id in 1:prm$nsmpl){
for (im in 1:prm$nmet){
......@@ -25,11 +25,11 @@ plot_peaks <- function(metab, smpl, msd, prm) {
nd <- get_ref_chromatograms(metab, msd, smpl, id, id, im)
nd$x1 <- nd$x1 - prm$bgoffset
nd$x1[nd$x1 < 0] <- 0
nowy <- compressplot(nd$x1)
nowgoody <- !is.na(nowy)
textxpos <- min(prm$unirt[nowgoody], na.rm = TRUE)
if (is.na(msd[[im]][[id]]$peakstart ) || is.na(msd[[im]][[id]]$peakend) ) {
nowgoodplot <- FALSE
msd[[im]][[id]]$ylim <- 1.1 * max(c(nd$x1, nd$x2, nd$x3), na.rm = TRUE)
......@@ -37,33 +37,33 @@ plot_peaks <- function(metab, smpl, msd, prm) {
cat('\r', paste('No peak to plot for', metab[[im]]$name, 'in', smpl$samplenames[id],'. '))
}
}
if (sum(nowgoody) < 2) {
nowgoodplot <- FALSE
plot(0)
} else {
if (msd[[im]][[id]]$report) {
if (msd[[im]][[id]]$report) {
main_col <- "black"
} else {
} else {
main_col <- "red"
}
if (prm$expertmode) {
plot(prm$unirt[nowgoody], nowy[nowgoody] ,
type = 'l',
type = 'l',
main = msd[[im]][[id]]$label2, col.main = main_col,
ylim = msd[[im]][[id]]$ylim,
xlab='', ylab='',
xlab='', ylab='',
cex.main=0.7)
} else {
plot(prm$unirt[nowgoody], nowy[nowgoody] ,
type = 'l',
type = 'l',
main = msd[[im]][[id]]$label1, col.main = main_col,
ylim = msd[[im]][[id]]$ylim,
xlab='', ylab='',
xlab='', ylab='',
cex.main=0.7)
}
} # endif there is data to plot
if (nowgoodplot) {
# mark inegrated range
nowxa <- msd[[im]][[id]]$peakstart : msd[[im]][[id]]$peakend
......@@ -81,14 +81,14 @@ plot_peaks <- function(metab, smpl, msd, prm) {
polygon(x=c(prm$unirt[nowxa[1]], prm$unirt[nowxa], prm$unirt[nowxa[length(nowxa)]]) ,
y=c(msd[[im]][[id]]$ylim[2] , nowya, msd[[im]][[id]]$ylim[2]) ,
col='#DDDDDD', border=NA)
# draw quantifier line again because it looks nicer
lines(prm$unirt[nowgoody],nowy[nowgoody],lwd=1.2)
}
# legend for quantifier
text(textxpos,0.95*msd[[im]][[id]]$ylim[2], metab[[im]]$quant$MRM , cex=0.5, pos=4)
# draw vertical line at peaktop
lines(prm$unirt[rep(msd[[im]][[id]]$peaktop,2)],
msd[[im]][[id]]$ylim,
......@@ -98,14 +98,14 @@ plot_peaks <- function(metab, smpl, msd, prm) {
lines(rep( metab[[im]]$RT + (msd[[im]][[id]]$predshift / (prm$samplingfrequency * 60)) , 2),
c(0,-100000),
lwd=5, col="blue")
# draw vertical line at exp RT
lines(rep(metab[[im]]$RT ,2),
c(0,-100000),
lwd=3, col="red")
}
# plot 12C qualifier
if (msd[[im]][[id]]$foundchrom[2]) {
nd$x2 <- nd$x2 - prm$bgoffset
......@@ -119,7 +119,7 @@ plot_peaks <- function(metab, smpl, msd, prm) {
}
text(textxpos,0.85*msd[[im]][[id]]$ylim[2], metab[[im]]$qual12c$MRM , col = 'blue', cex=0.5, pos=4)
}
# plot 13c qualifier
if (msd[[im]][[id]]$foundchrom[3]) {
nd$x3 <- nd$x3 - prm$bgoffset
......@@ -133,7 +133,7 @@ plot_peaks <- function(metab, smpl, msd, prm) {
}
text(textxpos,0.75*msd[[im]][[id]]$ylim[2], metab[[im]]$qual13c$MRM , col = 'green', cex=0.5, pos=4)
}
}
}
dev.off()
......
......@@ -2,6 +2,7 @@
#'
#' This function processes all .mzML files in nowfolder (current folder if empty)
#' using metabolite definitions and sample information provided in the same folder.
#' Output files are written to nowfolder.
#'
#' @param nowfolder folder that contains .mzML files, sample.info and optionally update_prm.R. Default is current working directory
#' @param parameters list of parameters that overwrites defaults defined in initialize_prm()
......@@ -15,7 +16,7 @@ process_batch <- function(nowfolder = "", parameters = list()) {
ptm <- proc.time()
pardefault <- par(no.readonly = TRUE)
options("scipen"=100, "digits"=4)
originalwd <- getwd()
# originalwd <- getwd()
# Set working directory (in Testmode)
if (nowfolder == '') {
......@@ -23,8 +24,8 @@ process_batch <- function(nowfolder = "", parameters = list()) {
nowfolder <- getwd()
}else{
if(dir.exists(nowfolder)){
print('setting nowfolder')
setwd(nowfolder)
# print('setting nowfolder')
# setwd(nowfolder)
}else{
cat(paste0(nowfolder," directory doesn't exist!"))
return(NULL)
......@@ -32,14 +33,18 @@ process_batch <- function(nowfolder = "", parameters = list()) {
}
# Remove existing log file
system("rm R_messages.log")
# Remove existing log file and create new one
system(paste0("rm ", file.path(nowfolder, "R_messages.log")))
# Initialize prm (global settings)
prm <- initialize_prm()
# create new log file
prm$log_con <- file(file.path(nowfolder, "R_messages.log"),open="a")
# update prm from local settings in mzML folder
update_prm_path <- paste0(nowfolder,"/update_prm.xlsx")
update_prm_path <- file.path(nowfolder,"update_prm.xlsx")
if (file.exists(update_prm_path)){
updatedata <- openxlsx::read.xlsx(update_prm_path)
if (('Variable.in.prm' %in% colnames(updatedata)) && ('Value' %in% colnames(updatedata)) ) {
......@@ -65,6 +70,9 @@ process_batch <- function(nowfolder = "", parameters = list()) {
}
}
# save nowfolder to prm
prm$batchdir <- nowfolder
if(!prm$runninglocal){ # if running on server
sink(prm$log_con, append=TRUE) # output errors and messages to log file
}
......@@ -114,15 +122,15 @@ process_batch <- function(nowfolder = "", parameters = list()) {
}
# calculate unified RT scale
prm <- get_unirt(list.files(pattern='.mzML')[1], prm)
prm <- get_unirt(file.path(prm$batchdir, list.files(path = prm$batchdir, pattern='.mzML')[1]) , prm)
# Read mzML-files for unirt, chromdata and samplenames (from files or Rdata-file, if present)-----------
if (!file.exists("mzML.rds")) {
smpl <- read_mzmlfiles(list.files(pattern='.mzML'), prm)
saveRDS(smpl, "mzML.rds") # ! NEEDED for testing, because data structure has been changed!
}else{
smpl <- readRDS("mzML.rds")
}
# if (!file.exists("mzML.rds")) {
smpl <- read_mzmlfiles( list.files(path = prm$batchdir, pattern='.mzML'), prm)
# saveRDS(smpl, "mzML.rds") # ! NEEDED for testing, because data structure has been changed!
# }else{
# smpl <- readRDS("mzML.rds")
# }
prm$nsmpl <- length(smpl$chroms)
# get additional info from sample.info
......@@ -140,7 +148,7 @@ process_batch <- function(nowfolder = "", parameters = list()) {
msd <- peak_detection(metab, smpl, prm)
if (prm$verbose >= 2) {
save(metab, smpl, msd, prm, file='troubleshoot.RData')
save(metab, smpl, msd, prm, file = file.path(prm$batchdir, 'troubleshoot.RData') )
}
# do not proceed in case of initial training
......@@ -185,14 +193,14 @@ process_batch <- function(nowfolder = "", parameters = list()) {
cat('\n Total R-time for this dataset: ', file=prm$log_con)
cat(proc.time() - ptm , file=prm$log_con)
ptime <- proc.time() - ptm
write(as.numeric(ptime[3]/60),file='R.time')
write(as.numeric(ptime[3]/60),file = file.path(prm$batchdir, 'R.time'))
# for documentation purposes append session info to log. do this at the end because it's a messy read.
cat(as.character(sessionInfo()), file=prm$log_con)
tryCatch({
# Call sample postprocessing if in pipeline mode
if (!(as.character(Sys.info()[["user"]]) == 'rstudio')){
if (!(as.character(Sys.info()[["user"]]) == 'rstudio') && !prm$runninglocal ){
rm(list = ls()) # empty workspace to avoid out of memory options
gc()
# system("chmod 777 overviewheatmap.pdf", intern=FALSE, wait=FALSE)
......@@ -203,7 +211,7 @@ process_batch <- function(nowfolder = "", parameters = list()) {
print ('qqq_auto_integrate complete.')
# tidy up
setwd(originalwd)
# setwd(originalwd)
par(pardefault)
}, error = function(err) {
......
read_mzmlfiles <- function(filelist,prm){
# reads .mzML files using mzR package
# sets up unirt (unified prm$timerange) by interpolation
# reads out chroms (samples x measured m/z ranges)
# reads out samplenames
# preallocate, some info will be added by read_sampleinfo
nsmpl <- length(filelist)
chroms <- list()
samplenames <- t(rep('unknown',nsmpl))
smpl <- list("chroms" = chroms,
"samplenames" = samplenames,
"filenames" = samplenames,
samplenames <- t(rep('unknown',nsmpl))
smpl <- list("chroms" = chroms,
"samplenames" = samplenames,
"filenames" = samplenames,
"unirt" = numeric(),
"type1" = character(nsmpl),
"type2" = character(nsmpl),
......@@ -23,41 +23,41 @@ read_mzmlfiles <- function(filelist,prm){
"measureddate" = character(nsmpl),
"plateposition" = character(nsmpl),
"nowtype" = character(nsmpl),
"sorttype" = character(nsmpl)
"sorttype" = character(nsmpl)
)
if (nsmpl < 1 ) {
print(paste('ERROR: no mzML files found'))
stop()
} else {
for (filenum in 1:length(filelist)){
if (prm$verbose >=2) {
cat('\r', paste('read_mzmlfiles:', filenum ,'of', nsmpl, ':', filelist[filenum]))
}
cat( paste('read_mzmlfiles:', filenum ,'of', nsmpl, ':', filelist[filenum]), file=prm$log_con)
chroms[[filenum]] <- list()
options(warn=-1)
# read file from mzML
mz <- mzR::openMSfile(filelist[filenum], backend = "pwiz")
mz <- mzR::openMSfile(file.path(prm$batchdir,filelist[filenum]), backend = "pwiz")
options(warn=0)
nowlabel <- character(mzR::nChrom(mz)-1)
for (it in 1:mzR::nChrom(mz)){
for (it in 1:mzR::nChrom(mz)){
ch <- mzR::chromatogramHeader(mz)
nowpolarity <- c('0', '-', '+')[ch[it,'polarity']+2]
nowlabel[it] <- paste0(nowpolarity ,
as.character(round(as.numeric(ch[it,'precursorIsolationWindowTargetMZ'] ),1)), '>',
as.character(round(as.numeric(ch[it,'precursorIsolationWindowTargetMZ'] ),1)), '>',
as.character(round(as.numeric(ch[it,'productIsolationWindowTargetMZ'] ),1)) )
}
# if there are multiple time segments for the same transition, they will have the same label
uniquelabel <- unique(nowlabel)
# interpolate intensities to unified time steps
# stitch time segments toghether
for (uit in 1:length(uniquelabel)) {
......@@ -79,16 +79,16 @@ read_mzmlfiles <- function(filelist,prm){
nowtime <- nowurtime
nowint <- nowurint
}
nowtimeorder <- order(nowtime)
nowtime <- nowtime[nowtimeorder]
nowint <- nowint[nowtimeorder]
# weed out possible NAs and add time point 0
goodpoints <- which(!is.na(nowtime + nowint))
nowtime <- c(0, nowtime[goodpoints], 9999999)
nowint <- c(nowint[goodpoints[1]], nowint[goodpoints],nowint[goodpoints[length(goodpoints)]])
# interpolate nowint to unified time scale
nowuint <- rep(0,prm$nscan)
maxtimediff <- 1.9*median(diff(nowtime),na.rm = TRUE) #?
......@@ -102,7 +102,7 @@ read_mzmlfiles <- function(filelist,prm){
}
chroms[[filenum]][[uit]] <- nowuint # [2:length(nowuint)]
}