Commit 70f6f055 authored by joerg.buescher's avatar joerg.buescher
Browse files

Merge branch 'test' into 'master'

tidy up after merge mess

See merge request !18
parents fe4cf45f 07b26dd9
No preview for this file type
^automRm\.Rproj$
^\.Rproj\.user$
^LICENSE\.md$
......@@ -10,8 +10,7 @@ Authors@R:
person(given = "Daniel",
family = "Eilertz",
role = c("aut", "cre"),
email = "first.last@example.com",
comment = c(ORCID = "YOUR-ORCID-ID"))
email = "eilertz@ie-freiburg.mpg.de")
Description: automRm processes LC-QQQ raw data in the open .mzML format to obtain a user-friendly output of signal intensities for every analyte and every sample in .xlsx format. In addition to the .mzML files a matching list of target metabolites must be available. automRm then uses 2 random forest models to 1st. decide which chromatographic peak is the most likely to represent an analyte and 2nd to decide if the data is of sufficient quality to be given to a person with little metabolomics experience. Both random forest models can easily be trained to meet one's LC-MS methods and quality standards.
License: MIT + file LICENSE
Encoding: UTF-8
......
#' compressplot for faster loading of overview plots
compressplot <- function(y) {
# function to reduce amount of data to be plotted for faster loading of overview plots
if (length(y) < 10) {
z <- y
} else {
......@@ -35,5 +35,5 @@ compressplot <- function(y) {
z[napos] <- NA
} # endif length(y) < 10
z
} # endfunction compressplot
} # endfunction
......@@ -3,7 +3,7 @@ consensus_peaks_update <- function(metab, smpl, msd, prm) {
print(' ')
# find ref sample
pstartmat <- pendmat <- rtshiftwheight <- rtshift <- rtshift2 <- matrix(nrow = prm$nsmpl, ncol=prm$nmet) # topcand <- candrt <- bestqs
pstartmat <- pendmat <- rtshiftwheight <- rtshift <- rtshift2 <- matrix(nrow = prm$nsmpl, ncol=prm$nmet)
#number of quality scores (get it from first peak of first metabolite in first sample)
n_qs <- length(msd[[1]][[1]]$qs_list)
......@@ -23,7 +23,6 @@ refsmpl <- which.max(bestqs)[1]
# find shift relative to refsmpl
corweight <- list()
for (id in 1:prm$nsmpl) {
#id <- 17
corweight[[id]] <- list()
for (im in 1:prm$nmet) {
if (prm$verbose >= 2){
......@@ -110,7 +109,7 @@ if (prm$polarmets && !prm$sosomets){
nowtshift <- predshift + prm$tshiftfine
}
# smaller time window for second shift
tr <- which(abs(prm$unirt - metab[[im]]$RT ) < (prm$timewindow/3))
tr <- which(abs(prm$unirt - metab[[im]]$RT ) < (prm$timewindow*0.5))
shiftmatch <- numeric(length(nowtshift))
for (it in 1:length(nowtshift)) {
goodt <- which(tr+nowtshift[it] > 0 )
......@@ -123,6 +122,10 @@ if (prm$polarmets && !prm$sosomets){
shiftmatch[it] <- 0
}
}
# just in case there are multiple optima, prefer the one that is closest to the predicted shift
var <- 20*(max(prm$tshiftfine) - min(prm$tshiftfine))
profile <- (exp(-(prm$tshiftfine)^2/(2*var)) / sqrt(2*pi*var))
shiftmatch <- (shiftmatch-min(shiftmatch))*profile
rtshift2[id,im] <- nowtshift[which.max(shiftmatch)]
} # for all goodmets
} else {
......@@ -177,7 +180,6 @@ for (im in 1:prm$nmet) {
pmat3[id, goodt] <- nd$x3[tr[goodt]+rtshift2[id,im]]
}
}
# print(foundtrace)
} # endfor all samples
# take average of all aligned chromatograms for each trace as prototype chromatogram
......@@ -190,13 +192,12 @@ for (im in 1:prm$nmet) {
p3[is.na(p3)] <- 0
# peak detection
# # peaknum <- 3
# determine candidates and their qs scores
p0 <- zoo::rollmean(rowSums(cbind(p1/max(p1,na.rm = TRUE), p2/max(p2,na.rm = TRUE), p3/max(p3,na.rm = TRUE)), na.rm = TRUE), 3, fill=0, align='center')
sp0 <- diff(p0)
maxsp0 <- max(abs(sp0))
local_max <- which(diff(sign(sp0))==-2)
peaktops <- local_max[order(p0[local_max], decreasing=T)] #[1:peaknum]
peaktops <- local_max[order(p0[local_max], decreasing=T)]
peaknum <- length(peaktops)
peakstart <- peakend <- numeric(peaknum)
for (ip in 1:peaknum){
......@@ -210,7 +211,7 @@ for (im in 1:prm$nmet) {
sp0 <- diff(p0)
maxsp0 <- max(abs(sp0))
local_max <- which(diff(sign(sp0))==-2)
peaktops <- c(peaktops, local_max[order(p0[local_max], decreasing=T)]) #[1:peaknum])
peaktops <- c(peaktops, local_max[order(p0[local_max], decreasing=T)])
peaknum2 <- length(peaktops) - peaknum
for (ip in 1:peaknum2){
if (!is.na(peaktops[peaknum+ip])) {
......@@ -220,14 +221,7 @@ for (im in 1:prm$nmet) {
}
peaktops <- peaktops[!is.na(peaktops)]
# # determine quality scores on original p0
# qsmat <- as.data.frame(matrix(nrow = (2*peaknum), ncol = n_qs))
# colnames(qsmat) <- names(msd[[im]][[id]]$qs_list)
# for (ip in 1:length(peaktops)) {
# qsmat[ip, ] <- get_quality_scores(p1, p2, p3, peakstart[ip], peaktops[ip], peakend[ip], nowusetrace, im, metab, prm, rangestart = tr[1])
# }
# alternative idea: get quality scores generated by peak_detection and
# get quality scores generated by peak_detection and
# find the consensus peak that represents the highest average quality score
originalrt <- unlist(sapply(msd[[im]], '[[', 'RT'))
originalqs <- unlist(sapply(msd[[im]], '[[', 'qs'))
......@@ -239,7 +233,7 @@ for (im in 1:prm$nmet) {
nowpend <- prm$unirt[max(c(3, min(c(prm$nscan, peakend[ip] + tr[1])) )) ]
peakoldqs[ip] <- sum(originalqs[intersect(which((originalrt - metabshift) > nowpstart),
which((originalrt - metabshift) < nowpend))]) /
(peakend[ip] - peakstart[ip])
sqrt(peakend[ip] - peakstart[ip])
}
}
peakoldqs[is.na(peakoldqs)] <- 0
......@@ -248,24 +242,11 @@ for (im in 1:prm$nmet) {
bestcand <- which(peakoldqs == max(peakoldqs))
bestcand <- bestcand[1]
# if (prm$ml_type == 'mltrain_initial'){
# # select best candidate by RF model
# pred_qs <- rowMeans(qsmat)
# bestcand <- which(pred_qs == max(pred_qs, na.rm = TRUE))[1]
# }else{
# # Transform input data
# trans <- predict(prm$train_preprocessing, as.data.frame(qsmat) )
# # select best candidate by RF model
# pred_qs <- predict(prm$model, trans, type="prob")
# bestcand <- which(pred_qs$H == max(pred_qs$H ))[1] # take first candidate if two bestcands are present
# }
pstart <- peakstart[bestcand]
pend <- peakend[bestcand]
# propagate peak start and end to all measurements
for (id in 1:prm$nsmpl) {
# id <- 1
nowpstart <- min(max(c(0, pstart + rtshift2[id,im] + tr[1])), (prm$nscan-10))
nowpend <- max(c(3, min(c(prm$nscan, pend + rtshift2[id,im] + tr[1])) ))
......
evaluate_peaks <- function(msd, prm){
# Function to evaluate peaks based on final random forest model
# In Trainig mode: all peak candiates are reported
# In Trainig mode: all peak candidates are reported
# Quantiles of RF scores for each batch calculated
# Final random forest model predicts peak scores based on each peaks QS + rf score quantiles
# Updates msd:
......@@ -11,14 +10,14 @@ evaluate_peaks <- function(msd, prm){
print(' ')
trainingflag <- (substr(prm$ml_type,1,7) == 'mltrain') # only compute once for speed
if ((prm$verbose >=2) && trainingflag ) {#(prm$ml_train && prm$verbose >=2) {
if ((prm$verbose >=2) && trainingflag ) {
cat('\r', 'evaluate_peaks: Setting default values (report <- TRUE) in training mode')
}
for (im in 1:prm$nmet){
# calculate quantiles for each batch
ml_quantile <- quantile(sapply(msd[[im]], '[[', 'qs')) # quantile(rf_vec)
ml_quantile <- quantile(sapply(msd[[im]], '[[', 'qs'))
names(ml_quantile) <- paste0("RF",seq(0,100,25))
for (id in 1:prm$nsmpl){
......@@ -35,10 +34,8 @@ evaluate_peaks <- function(msd, prm){
nowquantile <- c(data.frame('Output_H' = msd[[im]][[id]]$qs), ml_quantile)
# msd[[im]][[id]]$ml_quantile <- ml_quantile # do we really ever need this again? -> Good question! -> if yes, it should be saved to metab
# predict final peak score
final_data <- as.data.frame(c(msd[[im]][[id]]$qs_list, nowquantile)) # as.data.frame(t(c(peaks_by_metab_temp[[im]][[id]][["peaks"]][[peak]]$qs_list,ml_quantile)))
final_data <- as.data.frame(c(msd[[im]][[id]]$qs_list, nowquantile))
final_data_prep <- predict(prm$train_preprocessing_final,final_data)
final_pred <- predict(prm$model_final,final_data_prep,type="prob")$H
......
evaluate_peaks_rf <- function(msd, prm){
# Function to evaluate peaks based on final random forest model
# In Training mode: all peak candidates are reported
# Quantiles of RF scores for each batch calculated
# Final random forest model predicts peak scores based on each peaks QS + rf score quantiles
# Updates msd:
# - new label with report flag 'rep'
# - new variable per sample*metabolite 'report' with score probability
print(' ')
trainingflag <- (substr(prm$ml_type,1,7) == 'mltrain') # only compute once for speed
if ((prm$verbose >=2) && trainingflag ) {
cat('\r', 'evaluate_peaks: Setting default values (report <- TRUE) in training mode')
}
for (im in 1:prm$nmet){
# calculate quantiles for each batch
rf_quantile <- quantile(sapply(msd[[im]], '[[', 'qs'))
names(rf_quantile) <- paste0("RF",seq(0,100,25))
for (id in 1:prm$nsmpl){
# Set report flag to default if in ml training mode
if (trainingflag) {
msd[[im]][[id]]$report <- TRUE
next
}
if (prm$verbose >=2) {
cat('\r', paste('evaluate_peaks: metab', im, 'sample', id, ' '))
}
nowrfquantile <- c(data.frame('Output_H' = msd[[im]][[id]]$qs), rf_quantile)
# predict final peak score
final_data <- as.data.frame(c(msd[[im]][[id]]$qs_list, nowrfquantile))
final_data_prep <- predict(prm$train_preprocessing_final,final_data)
final_pred <- predict(prm$model_rf_final,final_data_prep,type="prob")$H
#update plotdata label
quantile_label <- paste0(names(rf_quantile),": ",rf_quantile,collapse=" ")
new_label <- paste0(paste("rep: ",final_pred),"\n",quantile_label)
msd[[im]][[id]]$label2 <- paste(msd[[im]][[id]]$label2, new_label)
nowchecksum <- sum(msd[[im]][[id]]$foundchrom)
# Set report variable and report value
msd[[im]][[id]]$report <- ((final_pred >= 0.5) && (nowchecksum > 1))
}
}
# Return updated data
msd
}# endfunction
......@@ -48,7 +48,7 @@ generate_Xy_data_peaks_final <- function(xlsx_path,tsv_path){
# Get index of sample*metabolites and scrape qs
row_index <- which(rownames(my_data) == sample_metab)
qs_scores<- tsv_td[row, tsvqscols ] # 5:ncol(tsv_td)-1]
qs_scores<- tsv_td[row, tsvqscols ]
# get indices of current metab to aggregate rf hit probabilty
metab_indices <- which(tsv_td[,2] == tsv_td[row,2])
......@@ -57,10 +57,6 @@ generate_Xy_data_peaks_final <- function(xlsx_path,tsv_path){
my_data[row_index,2:ncol(my_data)] <- c(qs_scores, ml_quantile)
}
# Only use subset of mamatrix where peak classification is 2 or 0 (1's are maybe peaks)
#my_data <- my_data[my_data[,1] != 1,]
#my_data[my_data[,1] == 1,1] <- 0
# Set y to factor
my_data$y <- as.factor(my_data$y)
......
generate_ml_training_data <- function(tsv_path, xlsx_path){
# tsv_path <- paste0(nnwd, 'ccm/mzML','/qslog_initial.tsv')
# xlsx_path <- paste0(nnwd,'ccm/trainingSolution.xlsx')
# Read peak candidate quality scores
tsv_td <- read.table(file = tsv_path, sep = '\t', stringsAsFactors = FALSE)
......@@ -15,9 +13,6 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
x12_13c_flags <- peaks_candidate_Xy[ , c('12C','13C')]
peaks_candidate_Xy <- peaks_candidate_Xy[ , 1:(which(colnames(peaks_candidate_Xy) %in% c('12C', '13C') )[1] -1)]
# head(peaks_candidate_Xy)
# head(x12_13c_flags)
n_extra_col = 4
# set dummies for training data
......@@ -33,15 +28,15 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
stop()
}
#sample names
# sample names
data_files <- unlist(strsplit(td[3:nrow(td), which(td[2,]=='Data File')],"[.]"))
if(any(data_files=="d")){
if (any(data_files=="d")) {
train$id <- data_files[seq(1,length(data_files),2)]
}else{
} else {
train$id <- data_files
}
#Metabolite names
# Metabolite names
train$mets <- setdiff( unique(as.character(td[1,])), c('id', NA) )
train$mets <- train$mets[!substr(train$mets,1,9) == "Qualifier"]
# remove " Results" for better string matching results
......@@ -51,9 +46,8 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
}
}
# Only take numeric values as matrix for Xall
Xall <- peaks_candidate_Xy[ , grep('QS_', colnames(peaks_candidate_Xy))] #(n_extra_col+1):ncol(peaks_candidate_Xy)]
Xall <- peaks_candidate_Xy[ , grep('QS_', colnames(peaks_candidate_Xy))]
Xall <- matrix(as.numeric(unlist(Xall)),nrow=dim(Xall)[1],ncol=dim(Xall)[2])
colnames(Xall) <- colnames(peaks_candidate_Xy[ , grep('QS_', colnames(peaks_candidate_Xy))] )
......@@ -63,17 +57,16 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
metabs <- unique(as.character(peaks_candidate_Xy$Metabolite))
samples <- unique(as.character(peaks_candidate_Xy$id))
# Loop over metabolites in peaks_by_metab
# Loop over metabolites
for (met in metabs){
#met="Uridine"
cat('\r', paste('generate_Xy_data_peaks:', met, ' '))
rt <- rep(0,length(3:dim(td)[1]))
start <- rep(0,length(3:dim(td)[1]))
end <- rep(0,length(3:dim(td)[1]))
# Go trhough metabolites in training excel sheet (colums)
# Find matching metabolite in peaks_by_metab
# Go through metabolites in training excel sheet (columns)
# Find matching metabolite
met_index <- which(tolower(td[1,]) == tolower(met))
if (length(met_index) == 0) {
......@@ -81,48 +74,36 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
next
}
# get column by column name rather than assuming column order
rtcol <- which(tolower(td[2, ]) == 'rt')
rtcol <- min(rtcol[rtcol > met_index], na.rm = TRUE)
startcol <- which(tolower(td[2, ]) == 'int. start')
startcol <- min(startcol[startcol > met_index], na.rm = TRUE)
endcol <- which(tolower(td[2, ]) == 'int. end')
endcol <- min(endcol[endcol > met_index], na.rm = TRUE)
# Get rt, peakstart, peakend from training data for peak evaluation
rt <- as.numeric(td[3:dim(td)[1], rtcol])
start <- as.numeric(td[3:dim(td)[1], startcol])
end <- as.numeric(td[3:dim(td)[1], endcol])
rt <- as.numeric(td[3:dim(td)[1],met_index + 1 ])
start <- as.numeric(td[3:dim(td)[1],met_index + 2])
end <- as.numeric(td[3:dim(td)[1],met_index + 3])
goodpos <- which((as.numeric(!is.na(rt)) * as.numeric(!is.na(start)) * as.numeric(!is.na(end)) ) == 1)
# If end == start, metabolite wasn't detected (take care of NAs)
sample_detected <- end > start
sample_detected[is.na(sample_detected)] <- FALSE
# Loop over samples
# Loop over samples in peaks_by_metab
for (sample in samples){
# index of sample row in training data (different tables around)
smp_index_train <- which(train$id==sample)
smp_index_train <- which(train$id==sample) # - 2 # minus two header rows
if ( !(smp_index_train %in% goodpos) ) {
print(paste('skipping', td$id[smp_index_train]))
next
}
#index of sample row in measured data
# index of sample row in measured data
smp_index_mes <- which(peaks_candidate_Xy$id==sample & peaks_candidate_Xy$Metabolite==met)
#measured rt
# measured rt
mes_rt <- as.numeric(peaks_candidate_Xy[smp_index_mes,"RT"])
#training rt
# training rt
rt[smp_index_train]
#initialize hitlist 0 (peak not detected)
# initialize hitlist 0 (peak not detected)
hit_list <- rep(0,5)
# If not measured, set hitlist to 2 (not measured)
......@@ -133,14 +114,10 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
# check if rt is within training peak range and choose nearest measured peak candicated
peak_in_range <- which((start[smp_index_train] < mes_rt) & (mes_rt < end[smp_index_train]))
if (length(peak_in_range) > 1) {
peak_in_range <- peak_in_range[ which.max(as.numeric(peaks_candidate_Xy[smp_index_mes[peak_in_range], 'QS_height']))[1] ]
}
hit_list[peak_in_range] <- 1
clostest_peak <- which(abs(rt[smp_index_train] - mes_rt) == min(abs(rt[smp_index_train] - mes_rt)))
# clostest_peak <- which(abs(rt[smp_index_train] - mes_rt) == min(abs(rt[smp_index_train] - mes_rt)))
# Set hitlist to 1 (peak detected)
# hit_list[intersect(peak_in_range,clostest_peak)] <- 1
hit_list[intersect(peak_in_range,clostest_peak)] <- 1
}
# Add hitlist to yall
......@@ -152,8 +129,6 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
# Set rownames
x_y_names <- paste(peaks_candidate_Xy$Metabolite,peaks_candidate_Xy$id,peaks_candidate_Xy$Peak,sep=" ")
rownames(Xall) <- names(yall) <- x_y_names
#head(Xall)
#head(peaks_candidate_Xy)
# Remove not unmeasured metabolites from Xall and yall (y[ ,1] == y[ , 2])
non_measured <- c(which(yall == 2), which(is.na(yall)))
......@@ -162,7 +137,7 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
Xall <- Xall[-non_measured, ]
x12_13c_flags <- x12_13c_flags[-non_measured,]
#Restructure data structures for further processing
# Restructure data structures for further processing
Xall_stack <- as.data.frame(Xall)
yall_stack <- yall
x12_13c_stack <- x12_13c_flags
......@@ -175,5 +150,4 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
xy_data <- list("training_data"=training_data,"x12_13c_flags"=x12_13c_flags)
xy_data
}
} # endfunction
get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetrace, im, metab, prm, rtshift = 0, rangestart = 0) {
# peakstart <- peakstart[ip]
# peaktop <- peaktops[ip]
# peakend <- peakend[ip]
......@@ -53,7 +54,6 @@ get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetr
# QS5 deviation from expected RT
# algo in mrm_integrate: max(c(1/(1 + (0.02*abs(peaktops[i] - metparam$peaktopa) / (1+metparam$peaktopadelta) ) )^2,0))
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency)
dRT_value <- (1/(1+abs(prm$unirt[peaktop + rangestart] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRT <- check_qs_value(dRT_value)
......@@ -75,7 +75,6 @@ get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetr
qs_list$QS_w <- check_qs_value(widthscore)
# QS7 | QS8 | QS9 average correlation (quant, qual12c, qual13c)
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE | nowusetrace[2] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
......
#' get chromatograms for consensus_peaks
#'
get_ref_chromatograms <- function(metab, msd, smpl, id, refsmpl, im){
refx1 <- smpl$chroms[[refsmpl]][[metab[[im]]$quant$MRM]]
x1 <- smpl$chroms[[id]][[metab[[im]]$quant$MRM]]
# get chromatograms for consensus_peaks
refx1 <- smpl$chroms[[refsmpl]][[metab[[im]]$quant$MRM]]
x1 <- smpl$chroms[[id]][[metab[[im]]$quant$MRM]]
if ((is.null(refx1)|| is.null(x1))) {
refx1 <- x1 <- rep(0,length(refx1))
}
if (msd[[im]][[id]]$foundchrom[2]) {
refx2 <- smpl$chroms[[refsmpl]][[metab[[im]]$qual12c$MRM[1] ]]
x2 <- smpl$chroms[[id]][[metab[[im]]$qual12c$MRM[1] ]]
if (msd[[im]][[id]]$foundchrom[2]) {
refx2 <- smpl$chroms[[refsmpl]][[metab[[im]]$qual12c$MRM[1] ]]
x2 <- smpl$chroms[[id]][[metab[[im]]$qual12c$MRM[1] ]]
if ((is.null(refx2) || is.null(x2))) {
refx2 <- x2 <- rep(0,length(refx1))
}
} else {
refx2 <- x2 <- rep(0,length(refx1))
}
if (msd[[im]][[id]]$foundchrom[3]) {
refx3 <- smpl$chroms[[refsmpl]][[metab[[im]]$qual13c$MRM[1] ]]
x3 <- smpl$chroms[[id]][[metab[[im]]$qual13c$MRM[1] ]]
if (msd[[im]][[id]]$foundchrom[3]) {
refx3 <- smpl$chroms[[refsmpl]][[metab[[im]]$qual13c$MRM[1] ]]
x3 <- smpl$chroms[[id]][[metab[[im]]$qual13c$MRM[1] ]]
if ((is.null(refx3) || is.null(x3))) {
refx3 <- x3 <- rep(0,length(refx1))
}
} else {
refx3 <- x3 <- rep(0,length(refx1))
}
d <- list("x1" = x1, "x2" = x2, "x3" = x3,
d <- list("x1" = x1, "x2" = x2, "x3" = x3,
"refx1" = refx1, "refx2" = refx2, "refx3" = refx3 )
} # endfuction
\ No newline at end of file
} # endfuction
#' Initialize list of parameters with default values.
#' This is a test.
#' Testing external raw data.
#' Testing inst why in /home/rstudio.
#' This function does not use any input parameters.s
#' You will have to overwrite some of these values. To do this, define the new
#' values in a file called update_prm.tsv that is in the same folder where
#' the .mzML files are. Alternatively you can also define the new values
#' when calling batch_process().
#'
#' @export
initialize_prm <- function() {
# initialize list of global parameters
prm <- list()
# Model type
#prm$ml_type <- 'mltrain_initial'
#prm$ml_type <- 'mltrain_pcand'
prm$ml_type <- 'mlprod'
# Comand line args
# Command line args
prm$polarmets <- TRUE
prm$sosomets <- FALSE
prm$expertmode <- FALSE
prm$fancyformat <- FALSE
prm$expertmode <- FALSE # additional debugging outputs
prm$fancyformat <- FALSE # add conditional formatting in excel output CAUTION: Incompatible with recent versions of MS excel. Works well with Libre Office.
prm$verbose <- 1
# Check if local or on server
# Check if local or on server ------------------------------
if (as.character(Sys.info()[["user"]]) == 'rstudio'){
prm$runninglocal <- TRUE
prm$pathprefix <- '/home/rstudio/'
} else {
# Set testing environemt
# Set testing environment
prm$runninglocal <- FALSE
prm$pathprefix <- '/'
}
# Set up logging (log to file only when running on server) ------------------------------
prm$log_con <- file("R_messages.log",open="a")
# Set global parameters for peak detection
prm$timerange <- c(0,5) # time range in minutes --> will be re-determined based on first sample further down
prm$samplingfrequency <- 2 # samplingfrequency (time resolution of analysis) in Hz
......@@ -45,8 +50,8 @@ initialize_prm <- function() {
prm$groupmetsearchwindow <- 90 # search window in seconds
prm$minNormArea <- 10000 # minimum median area of 13C ISTD to use for normalization
prm$minwidth <- 4 # minimum peak width in seconds
prm$qscutoff <- c(0.8,0.6) # 0.72 - minimum quality score to report a peak [1] used for outliers [2] used for peaks within RT range
prm$quartile_range_factor <- 3 # factor to spread interquartile range for peak's RT outlier detection
prm$qscutoff <- c(0.8,0.6) #0.72 #minimum quality score to report a peak [1] used for outliers [2] used for peaks within RT range
prm$quartile_range_factor <- 3 #factor to spread interquartile range for peak's RT outlier detection
prm$initial_candidates <- 5 # to be used in qqq_mrm_integrage
prm$timewindow <- 1.5 # time window for correlating
prm$tshiftcrude <- seq(-60,60,2) # steps for time shift test
......@@ -54,19 +59,19 @@ initialize_prm <- function() {
prm$microsearch <- seq(-6,6,1) # steps for microsearch of minimum when propagating consensus peak to single chromatograms
prm$nmet <- 0 # number of metabolites, will be updated later
prm$nsmpl <- 0 # number of samples, will be updated later
prm$polarities <- list('Positive'= '+','Negative'= '-','-'='Negative','+'='Positive')
prm$polarities = list('Positive'= '+','Negative'= '-','-'='Negative','+'='Positive')
# Paths and columns
# prm$RTcolumn <- 'LunaNH2.RT.QQQ'
# prm$excelfile <- paste0(prm$pathprefix, 'data/helperfiles/metabolite_databases/AgilentQQQdb.xlsx') # polar is default
prm$model_path <- paste0(prm$pathprefix, 'data/helperfiles/testdata/LC_machine_learning/model_rf5_pcand.Rdata')
prm$model_final_path <- paste0(prm$pathprefix, 'data/helperfiles/testdata/LC_machine_learning/model_rf9_finalp.Rdata')
prm$model_rf_path <- paste0(prm$pathprefix, 'data/helperfiles/testdata/LC_machine_learning/model_pcand.Rdata')
prm$model_rf_final_path <- paste0(prm$pathprefix, 'data/helperfiles/testdata/LC_machine_learning/model_finalp.Rdata')
#Rouse?
prm$train_path <- "" #must be set until intermediate pipeline is replaced
#?
prm$sum12c <- FALSE # sum 12c qual and quant area/height #
# option to use sum of quantifier and 12C qualifier as intensity value instead of only quantifier
prm$sum12c <- FALSE # sum 12c qual and quant area/height
# Polarmets as default