Commit c8d2f75e authored by daniel.eilertz's avatar daniel.eilertz
Browse files

Merge branch 'dev' into 'master'

adding quality scores gauss shape and quantifier ratio and dynamic handling of...

See merge request !10
parents 425dbe56 37adc58d
......@@ -21,10 +21,13 @@ RoxygenNote: 7.1.1
Imports:
caret (>= 6.0-86),
e1071 (>= 1.7-3),
kernlab (>= 0.9-27),
mgcv(>= 1.8-28),
mzR (>= 2.20.0),
neuralnet (>= 1.44.2),
nnls (>= 1.4),
openxlsx (>= 4.1.4),
randomForest (>= 4.6-14),
robustbase(>= 0.93-6),
robustbase (>= 0.93-6),
xgboost (>= 0.82.1),
zoo (>= 1.8-7)
......@@ -71,7 +71,7 @@ if (prm$polarmets && !prm$sosomets){
}
if (id != refsmpl){
# fit shift for sample as robust quadratic
nowfitdata <- data.frame(shift = rtshift[id, ], rt = sapply(metab,'[[', 'RT')) # metabdb_data$metdb$RT[goodmets])
nowfitdata <- data.frame(shift = rtshift[id, ], rt = sapply(metab,'[[', 'RT'))
if (prm$sosomets) {
# poor man's robust spline fit
noww <- (rtshiftwheight[id, ]+1) * 1/(abs(nowfitdata$shift)+3)
......@@ -109,14 +109,16 @@ if (prm$polarmets && !prm$sosomets){
} else {
nowtshift <- predshift + prm$tshiftfine
}
tr <- which(abs(prm$unirt - metab[[im]]$RT ) < prm$timewindow)
# smaller time window for second shift
tr <- which(abs(prm$unirt - metab[[im]]$RT ) < (prm$timewindow/3))
shiftmatch <- numeric(length(nowtshift))
for (it in 1:length(nowtshift)) {
goodt <- which(tr+nowtshift[it] > 0 )
if (sum(!is.na(nd$refx1[tr[goodt]] * nd$x1[tr[goodt]+nowtshift[it]])) > 10) { # need at least 10 points to compute correlation
shiftmatch[it] <- sum(c(corweight[[id]][[im]][1] * cor(nd$refx1[tr[goodt]], nd$x1[tr[goodt]+nowtshift[it]], use="pairwise.complete.obs"),#"complete.obs"),
corweight[[id]][[im]][2] * cor(nd$refx2[tr[goodt]], nd$x2[tr[goodt]+nowtshift[it]], use="pairwise.complete.obs"),#"complete.obs"),
corweight[[id]][[im]][3] * cor(nd$refx3[tr[goodt]], nd$x3[tr[goodt]+nowtshift[it]], use="pairwise.complete.obs")), na.rm=TRUE)#"complete.obs")), na.rm = TRUE)
shiftmatch[it] <- sum(c(corweight[[id]][[im]][1] * cor(nd$refx1[tr[goodt]], nd$x1[tr[goodt]+nowtshift[it]], use="pairwise.complete.obs"),
corweight[[id]][[im]][2] * cor(nd$refx2[tr[goodt]], nd$x2[tr[goodt]+nowtshift[it]], use="pairwise.complete.obs"),
corweight[[id]][[im]][3] * cor(nd$refx3[tr[goodt]], nd$x3[tr[goodt]+nowtshift[it]], use="pairwise.complete.obs")), na.rm=TRUE)
} else {
shiftmatch[it] <- 0
}
......@@ -142,12 +144,15 @@ for (im in 1:prm$nmet) {
tr <- which(abs(prm$unirt - metab[[im]]$RT ) < prm$timewindow)
pmat1 <- pmat2 <- pmat3 <- matrix(nrow = prm$nsmpl, ncol = length(tr))
refx1 <- smpl$chroms[[refsmpl]][[ metab[[im]]$quant$MRM ]] # mzml_data$chromdata[[refsmpl]][[metabdb_data$quantifier[[im]]$MRM]]
if (!is.null(refx1)) {
# currently we only use the timewindow around the expexted RT that we used for rtshift adjustment
# for the creation of the prototype. Consequently, only peaks within this timewindow can be found
# One could test to use the whole chromatogram instead. However the alignment of parts outside the
# timewindow will probably be bad.
refx1 <- smpl$chroms[[refsmpl]][[ metab[[im]]$quant$MRM ]]
if ((!is.null(refx1)) && (sum(!is.na(refx1[tr])) > 20)) {
# currently we only use the time window around the expected RT
# that we used for rtshift adjustment for the creation of the
# prototype. Consequently, only peaks within this time window
# can be found. One could test to use the whole chromatogram
# instead. However the alignment of parts outside the
# time window will probably be bad.
for (id in 1:prm$nsmpl) {
if (prm$verbose >= 2){
cat('\r', paste('consensus_peaks: metab', im, 'sample', id, ' ')) # much faster than print(id)
......@@ -185,13 +190,14 @@ for (im in 1:prm$nmet) {
p3[is.na(p3)] <- 0
# peak detection
peaknum <- 3
# # peaknum <- 3
# determine candidates and their qs scores
p0 <- zoo::rollmean(rowSums(cbind(p1/max(p1,na.rm = TRUE), p2/max(p2,na.rm = TRUE), p3/max(p3,na.rm = TRUE)), na.rm = TRUE), 3, fill=0, align='center')
sp0 <- diff(p0)
maxsp0 <- max(abs(sp0))
local_max <- which(diff(sign(sp0))==-2)
peaktops <- local_max[order(p0[local_max], decreasing=T)][1:peaknum]
peaktops <- local_max[order(p0[local_max], decreasing=T)] #[1:peaknum]
peaknum <- length(peaktops)
peakstart <- peakend <- numeric(peaknum)
for (ip in 1:peaknum){
if (!is.na(peaktops[ip])){
......@@ -204,8 +210,9 @@ for (im in 1:prm$nmet) {
sp0 <- diff(p0)
maxsp0 <- max(abs(sp0))
local_max <- which(diff(sign(sp0))==-2)
peaktops <- c(peaktops, local_max[order(p0[local_max], decreasing=T)][1:peaknum])
for(ip in 1:peaknum){
peaktops <- c(peaktops, local_max[order(p0[local_max], decreasing=T)]) #[1:peaknum])
peaknum2 <- length(peaktops) - peaknum
for (ip in 1:peaknum2){
if (!is.na(peaktops[peaknum+ip])) {
peakstart[peaknum+ip] <- max(c(which(sp0[1: max(c(1,(peaktops[peaknum+ip]-3))) ] < 0.01*maxsp0),1))
peakend[peaknum+ip] <- peaktops[peaknum+ip]+3+min(which(-sp0[(peaktops[peaknum+ip]+3):length(sp0)] < 0.01*maxsp0), length(p0))
......@@ -213,25 +220,45 @@ for (im in 1:prm$nmet) {
}
peaktops <- peaktops[!is.na(peaktops)]
# determine quality scores on original p0
qsmat <- as.data.frame(matrix(nrow = (2*peaknum), ncol = n_qs))
colnames(qsmat) <- names(msd[[im]][[id]]$qs_list)
# # determine quality scores on original p0
# qsmat <- as.data.frame(matrix(nrow = (2*peaknum), ncol = n_qs))
# colnames(qsmat) <- names(msd[[im]][[id]]$qs_list)
# for (ip in 1:length(peaktops)) {
# qsmat[ip, ] <- get_quality_scores(p1, p2, p3, peakstart[ip], peaktops[ip], peakend[ip], nowusetrace, im, metab, prm, rangestart = tr[1])
# }
# alternative idea: get quality scores generated by peak_detection and
# find the consensus peak that represents the highest average quality score
originalrt <- unlist(sapply(msd[[im]], '[[', 'RT'))
originalqs <- unlist(sapply(msd[[im]], '[[', 'qs'))
metabshift <- 1/(prm$samplingfrequency * 60) * rtshift2[ ,im]
peakoldqs <- numeric(length(peaktops))
for (ip in 1:length(peaktops)) {
qsmat[ip, ] <- get_quality_scores(p1, p2, p3, peakstart[ip], peaktops[ip], peakend[ip], nowusetrace, im, metab, prm, rangestart = tr[1])
if (!is.na(peakstart[ip]) && !is.na(peakend[ip])) {
nowpstart <- prm$unirt[min(max(c(0, peakstart[ip] + tr[1])), (prm$nscan-10))]
nowpend <- prm$unirt[max(c(3, min(c(prm$nscan, peakend[ip] + tr[1])) )) ]
peakoldqs[ip] <- sum(originalqs[intersect(which((originalrt - metabshift) > nowpstart),
which((originalrt - metabshift) < nowpend))]) /
(peakend[ip] - peakstart[ip])
}
}
peakoldqs[is.na(peakoldqs)] <- 0
# Get best peak
if (prm$ml_type == 'mltrain_initial'){
# select best candidate by RF model
pred_qs <- rowMeans(qsmat)
bestcand <- which(pred_qs == max(pred_qs))
}else{
# Transform input data
trans <- predict(prm$train_preprocessing, as.data.frame(qsmat) )
# select best candidate by RF model
pred_qs <- predict(prm$model_rf, trans, type="prob")
bestcand <- which(pred_qs$H == max(pred_qs$H ))[1] # take first candidate if two bestcands are present
}
bestcand <- which(peakoldqs == max(peakoldqs))
bestcand <- bestcand[1]
# if (prm$ml_type == 'mltrain_initial'){
# # select best candidate by RF model
# pred_qs <- rowMeans(qsmat)
# bestcand <- which(pred_qs == max(pred_qs, na.rm = TRUE))[1]
# }else{
# # Transform input data
# trans <- predict(prm$train_preprocessing, as.data.frame(qsmat) )
# # select best candidate by RF model
# pred_qs <- predict(prm$model, trans, type="prob")
# bestcand <- which(pred_qs$H == max(pred_qs$H ))[1] # take first candidate if two bestcands are present
# }
pstart <- peakstart[bestcand]
pend <- peakend[bestcand]
......@@ -240,7 +267,7 @@ for (im in 1:prm$nmet) {
for (id in 1:prm$nsmpl) {
# id <- 1
nowpstart <- min(max(c(0, pstart + rtshift2[id,im] + tr[1])), (prm$nscan-10))
nowpend <- min(c(prm$nscan, pend + rtshift2[id,im] + tr[1]))
nowpend <- max(c(3, min(c(prm$nscan, pend + rtshift2[id,im] + tr[1])) ))
# micro-adjust start and end
nowmicrorangestart <- nowpstart+prm$microsearch
......@@ -287,7 +314,7 @@ for (im in 1:prm$nmet) {
final_peak_class$H <- mean(as.numeric(nowqs))
}else{
trans <- predict(prm$train_preprocessing, as.data.frame(nowqs) )
final_peak_class <- predict(prm$model_rf, trans,type="prob")
final_peak_class <- predict(prm$model, trans,type="prob")
}
# log quality scores for ML training
......@@ -313,25 +340,23 @@ for (im in 1:prm$nmet) {
msd[[im]][[id]]$label1 <- paste0(metab[[im]]$name, '\n',
smpl$samplenames[[id]], '\n',
'QS_pcand = ', round(final_peak_class$H*100) )
msd[[im]][[id]]$label2 <- paste0(metab[[im]]$name, '\n',
smpl$samplenames[[id]], '\n',
'Cor:', round(nowqs$QS_cor123, 2),
' T/O:', round(nowqs$QS_T.o, 2),
' T/Bmax:', round(nowqs$QS_T.h, 2),
' T/Bmin:', round(nowqs$QS_T.l, 2),
' dRT:', round(nowqs$QS_dRT, 2),
' width:', round(nowqs$QS_w, 2),
' Cor 12c:', round(nowqs$QS_cor12, 2),
' Cor 13c:', round(nowqs$QS_cor13, 2),
' Cor 12c/13c: ', round(nowqs$QS_cor23, 2),
'\n',
' PInt: ', round(nowqs$QS_height, 2),
' relRT:' , round(nowqs$QS_relRT, 2),
' T/O 12c:' , round(nowqs$QS_T.o2, 2),
' T/O 13c:' , round(nowqs$QS_T.o3, 2),
' dRT_shift:' , round(nowqs$QS_dRTs, 2),
label2 <- paste0(metab[[im]]$name, '\n',
smpl$samplenames[[id]], '\n')
qsnames <- names(nowqs)
nowcount <- 0
for (iq in 1:length(nowqs) ) {
nowcount <- nowcount + 1
label2 <- paste0( label2, substr(qsnames[iq], 4, nchar(qsnames[iq]) ) , ': ', as.character(round(nowqs[[qsnames[iq]]],2) ), ' ')
if ((nowcount > 7) && (iq < length(nowqs))) {
label2 <- paste0(label2, '\n')
nowcount <- 0
}
}
msd[[im]][[id]]$label2 <- paste0(label2,
'\n',
' rf_pcand:' , final_peak_class$H)
' ML_pcand: ' , round(final_peak_class$H,2))
} # endfor all samples
} # endif metabdb_data$quantifier was measured in refsample
......@@ -342,20 +367,43 @@ if (prm$verbose >= 1) {
# write log of quality scores for training to tsv file
write.table(qslog, file = file.path(prm$batchdir, 'qslog.tsv'), sep = '\t', row.names = FALSE)
nowscore <- matrix(nrow = prm$nsmpl, ncol = prm$nmet, data = NA)
# use old scores if available and peaks match
if (file.exists(file.path(prm$batchdir, "manual_peakcheck.xlsx"))) {
oldcheck <- openxlsx::read.xlsx(file.path(prm$batchdir, "manual_peakcheck.xlsx"), colNames = FALSE)
for (id in 1:prm$nsmpl) {
spos <- which(oldcheck[ ,1] == smpl$filenames[id])
if (length(spos) == 1) {
for (im in 1:prm$nmet) {
mpos <- which(oldcheck[1, ] == metab[[im]]$name)
if (length(mpos) == 1) {
if (!any(is.na(c(pstartmat[id ,im],pendmat[id ,im] )))) {
if ( (abs(as.numeric(oldcheck[spos, mpos+1]) - prm$unirt[pstartmat[id ,im]] ) < 0.1) &&
(abs(as.numeric(oldcheck[spos, mpos+2]) - prm$unirt[pendmat[id ,im]] ) < 0.1) ) {
nowscore[id,im] <- oldcheck[spos, mpos]
}
}
}
}
}
}
}
# write template for training solution to xlsx file
wb <- openxlsx::createWorkbook()
openxlsx::addWorksheet(wb, 'Sheet1')
openxlsx::writeData(wb, 'Sheet1', smpl$filenames, startCol = 1, startRow = 4, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', smpl$filenames[prm$typeorder], startCol = 1, startRow = 4, rowNames = FALSE, colNames = FALSE)
for (im in 1:prm$nmet) {
nowcol <- 3*im
openxlsx::writeData(wb, 'Sheet1', metab[[im]]$name, startCol = nowcol, startRow = 1, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', t(c('Score', 'Start', 'End')) , startCol = nowcol, startRow = 2, rowNames = FALSE, colNames = FALSE)
options("openxlsx.numFmt" = "0")
openxlsx::writeData(wb, 'Sheet1', rep(1,prm$nsmpl) , startCol = nowcol, startRow = 4, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', nowscore[prm$typeorder, im] , startCol = nowcol, startRow = 4, rowNames = FALSE, colNames = FALSE)
options("openxlsx.numFmt" = "0.00")
openxlsx::writeData(wb, 'Sheet1', prm$unirt[pstartmat[ ,im]] , startCol = nowcol+1, startRow = 4, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', prm$unirt[pendmat[ ,im]] , startCol = nowcol+2, startRow = 4, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', prm$unirt[pstartmat[prm$typeorder ,im]] , startCol = nowcol+1, startRow = 4, rowNames = FALSE, colNames = FALSE)
openxlsx::writeData(wb, 'Sheet1', prm$unirt[pendmat[prm$typeorder ,im]] , startCol = nowcol+2, startRow = 4, rowNames = FALSE, colNames = FALSE)
}
openxlsx::saveWorkbook(wb, file.path(prm$batchdir, "manual_peakcheck_template.xlsx"), overwrite = TRUE)
}
......
evaluate_peaks_rf <- function(msd, prm){
evaluate_peaks <- function(msd, prm){
# Function to evaluate peaks based on final random forest model
# In Trainig mode: all peak candiates are reported
......@@ -18,8 +18,8 @@ evaluate_peaks_rf <- function(msd, prm){
for (im in 1:prm$nmet){
# calculate quantiles for each batch
rf_quantile <- quantile(sapply(msd[[im]], '[[', 'qs')) # quantile(rf_vec)
names(rf_quantile) <- paste0("RF",seq(0,100,25))
ml_quantile <- quantile(sapply(msd[[im]], '[[', 'qs')) # quantile(rf_vec)
names(ml_quantile) <- paste0("RF",seq(0,100,25))
for (id in 1:prm$nsmpl){
......@@ -33,22 +33,22 @@ evaluate_peaks_rf <- function(msd, prm){
cat('\r', paste('evaluate_peaks: metab', im, 'sample', id, ' '))
}
nowrfquantile <- c(data.frame('Output_H' = msd[[im]][[id]]$qs), rf_quantile)
nowquantile <- c(data.frame('Output_H' = msd[[im]][[id]]$qs), ml_quantile)
# msd[[im]][[id]]$rf_quantile <- rf_quantile # do we really ever need this again? -> Good question! -> if yes, it should be saved to metab
# msd[[im]][[id]]$ml_quantile <- ml_quantile # do we really ever need this again? -> Good question! -> if yes, it should be saved to metab
# predict final peak score
final_data <- as.data.frame(c(msd[[im]][[id]]$qs_list, nowrfquantile)) # as.data.frame(t(c(peaks_by_metab_temp[[im]][[id]][["peaks"]][[peak]]$qs_list,rf_quantile)))
final_data <- as.data.frame(c(msd[[im]][[id]]$qs_list, nowquantile)) # as.data.frame(t(c(peaks_by_metab_temp[[im]][[id]][["peaks"]][[peak]]$qs_list,ml_quantile)))
final_data_prep <- predict(prm$train_preprocessing_final,final_data)
final_pred <- predict(prm$model_rf_final,final_data_prep,type="prob")$H
final_pred <- predict(prm$model_final,final_data_prep,type="prob")$H
#update plotdata label1
nowstartpos <- gregexpr('\n', msd[[im]][[id]]$label1)[[1]][2]
msd[[im]][[id]]$label1 <- paste0(substr(msd[[im]][[id]]$label1, 1, nowstartpos), 'QS: ', round(100*final_pred,0) )
#update plotdata label2
quantile_label <- paste0(names(rf_quantile),": ",rf_quantile,collapse=" ")
new_label <- paste0(paste("rep: ",final_pred),"\n",quantile_label)
quantile_label <- paste0(names(ml_quantile),": ", round(ml_quantile,2), collapse=" ")
new_label <- paste0(paste(" rep: ", round(final_pred,3),"\n",quantile_label) )
msd[[im]][[id]]$label2 <- paste(msd[[im]][[id]]$label2, new_label)
nowchecksum <- sum(msd[[im]][[id]]$foundchrom)
......
......@@ -32,10 +32,14 @@ generate_Xy_data_peaks_final <- function(xlsx_path,tsv_path){
sample <- substr(xlsx_td[row,1], 1, nchar(xlsx_td[row,1])-5)
}
rownames(my_data)[count] <- paste0(metnames[im]," ",sample)
my_data[count,1] <- xlsx_td[row,col]
count <- count + 1
nowscore <- as.numeric(xlsx_td[row, col])
if (nowscore %in% c(0, 1, 2)) {
my_data[count,1] <- nowscore
count <- count + 1
}
}
}
my_data <- my_data[1:(count-1), ]
# parse quality scores from tsv file
for (row in 1 : nrow(tsv_td)){
......@@ -48,9 +52,9 @@ generate_Xy_data_peaks_final <- function(xlsx_path,tsv_path){
# get indices of current metab to aggregate rf hit probabilty
metab_indices <- which(tsv_td[,2] == tsv_td[row,2])
rf_quantiles <- quantile(tsv_td[metab_indices, pcandcol])
ml_quantile <- quantile(tsv_td[metab_indices, pcandcol])
my_data[row_index,2:ncol(my_data)] <- c(qs_scores, rf_quantiles)
my_data[row_index,2:ncol(my_data)] <- c(qs_scores, ml_quantile)
}
# Only use subset of mamatrix where peak classification is 2 or 0 (1's are maybe peaks)
......
......@@ -81,10 +81,20 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
next
}
# get column by column name rather than assuming column order
rtcol <- which(tolower(td[2, ]) == 'rt')
rtcol <- min(rtcol[rtcol > met_index], na.rm = TRUE)
startcol <- which(tolower(td[2, ]) == 'int. start')
startcol <- min(startcol[startcol > met_index], na.rm = TRUE)
endcol <- which(tolower(td[2, ]) == 'int. end')
endcol <- min(endcol[endcol > met_index], na.rm = TRUE)
# Get rt, peakstart, peakend from training data for peak evaluation
rt <- as.numeric(td[3:dim(td)[1],met_index + 1 ])
start <- as.numeric(td[3:dim(td)[1],met_index + 2])
end <- as.numeric(td[3:dim(td)[1],met_index + 3])
rt <- as.numeric(td[3:dim(td)[1], rtcol])
start <- as.numeric(td[3:dim(td)[1], startcol])
end <- as.numeric(td[3:dim(td)[1], endcol])
goodpos <- which((as.numeric(!is.na(rt)) * as.numeric(!is.na(start)) * as.numeric(!is.na(end)) ) == 1)
......@@ -93,14 +103,11 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
sample_detected <- end > start
sample_detected[is.na(sample_detected)] <- FALSE
# Loop over samples in peaks_by_metab
# Loop over samples
for (sample in samples){
# index of sample row in training data (different tables around)
# if (!is.na(td[length(td[,2]),2])) {
smp_index_train <- which(train$id==sample) # - 2 # minus two header rows
# }else{
# smp_index_train <- which(train$id==sample) # - 2 # minus two header rows
# }
smp_index_train <- which(train$id==sample)
if ( !(smp_index_train %in% goodpos) ) {
print(paste('skipping', td$id[smp_index_train]))
next
......@@ -124,12 +131,16 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
#print("outsch!")
} else{
#check if rt is within training peak range and choose nearest measured peak candicated
# check if rt is within training peak range and choose nearest measured peak candicated
peak_in_range <- which((start[smp_index_train] < mes_rt) & (mes_rt < end[smp_index_train]))
clostest_peak <- which(abs(rt[smp_index_train] - mes_rt) == min(abs(rt[smp_index_train] - mes_rt)))
if (length(peak_in_range) > 1) {
peak_in_range <- peak_in_range[ which.max(as.numeric(peaks_candidate_Xy[smp_index_mes[peak_in_range], 'QS_height']))[1] ]
}
hit_list[peak_in_range] <- 1
# clostest_peak <- which(abs(rt[smp_index_train] - mes_rt) == min(abs(rt[smp_index_train] - mes_rt)))
# Set hitlist to 1 (peak detected)
hit_list[intersect(peak_in_range,clostest_peak)] <- 1
# hit_list[intersect(peak_in_range,clostest_peak)] <- 1
}
# Add hitlist to yall
......
get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetrace, im, metab, prm, rtshift = 0, rangestart = 0) {
# peakstart <- peakstart[ip]
# peaktop <- peaktops[ip]
# peakend <- peakend[ip]
# ideas for future work:
# add QS: the fit quality (residual of the fit, same for all samples)
# alternatively: for each sample/metabolite: residual of shift fit
# add QS: average score of next-best peak candidate: the idea is that
# if there are multiple good candidates, more weight should be given to dRT,
# add QS: average score of next-best peak candidate: the idea is that
# if there are multiple good candidates, more weight should be given to dRT,
# however if there is only 1 good candidate we can be more relaxed concerning dRT
# add QS: Measure how well the ratio of quantifier and 12c qualifier matches the ratio from metdb
# names must start with "QS" in order for the training script to work properly
qs_list <- list( 'QS_cor123' = 0,
'QS_T.o' = 0,
'QS_T.h' = 0,
'QS_T.l' = 0,
'QS_dRT' = 0,
'QS_w' = 0,
'QS_cor12' = 0,
'QS_cor13' = 0,
'QS_cor23' = 0,
'QS_height' = 0,
'QS_relRT' = 0,
'QS_T.o' = 0,
'QS_T.h' = 0,
'QS_T.l' = 0,
'QS_dRT' = 0,
'QS_w' = 0,
'QS_cor12' = 0,
'QS_cor13' = 0,
'QS_cor23' = 0,
'QS_height' = 0,
'QS_relRT' = 0,
'QS_T.o2' = 0,
'QS_T.o3' = 0,
'QS_gauss' = 0,
'QS_1vs2' = 0 ,
'QS_dRTs' = 0)
# QS1: average correlation (quant, qual12c, qual13c)
nowcor <- suppressWarnings(cor(cbind(x1[peakstart:peakend],x2[peakstart:peakend],x3[peakstart:peakend]),use="everything") ) #use="complete.obs"
cor_value <- 0.5*(mean(c(nowcor[1, 2:3], nowcor[2,3]), na.rm = TRUE)+1)
qs_list$QS_cor123 <- check_qs_value(cor_value)
# QS2 ratio peak top to highest point outside of series
t_o_value <- max(c(x1[peaktop] / (x1[peaktop] + max(x1[c(1:(max(c(peakstart-1,1))),(min(c(peakend+1,length(x1)))):length(x1))], na.rm = TRUE)),0))
qs_list$QS_T.o <- check_qs_value(t_o_value)
# QS3 ratio peak top/max(peakend,peakstart)
t_b_h_value <- ((2 * x1[peaktop])/ (x1[peaktop] + max(x1[peakstart],x1[peakend])) ) -1
qs_list$QS_T.h <- check_qs_value(t_b_h_value)
# QS4 ratio peak top/min(peakend,peakstart)
t_b_l_value <- ((2 * x1[peaktop])/ (x1[peaktop] + min(x1[peakstart],x1[peakend])) ) -1
qs_list$QS_T.l <- check_qs_value(t_b_l_value)
# QS5 deviation from expected RT
# algo in mrm_integrate: max(c(1/(1 + (0.02*abs(peaktops[i] - metparam$peaktopa) / (1+metparam$peaktopadelta) ) )^2,0))
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency)
dRT_value <- (1/(1+abs(prm$unirt[peaktop + rangestart] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRT <- check_qs_value(dRT_value)
# deviation from predicted RT
if (prm$ml_type != 'mltrain_initial') {
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency) + rtshift
peaktopa <- min(max(c(1, peaktopa + rangestart)) , prm$nscan)
dRT_value <- (1/(1+abs(prm$unirt[peaktop] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRTs <- check_qs_value(dRT_value)
}
# QS6 widthscore
if (peakstart != peakend){
widthscore <- 1/ (1+(prm$unirt[peakend] - prm$unirt[peakstart]))
}else{
widthscore <- 0
}
qs_list$QS_w <- check_qs_value(widthscore)
# QS7 | QS8 | QS9 average correlation (quant, qual12c, qual13c)
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE | nowusetrace[2] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
}else{
cor_value <- prm$rf_median_values[["median_qs7"]]
}
}else{
cor_value <- cor(x1[peakstart:peakend],x2[peakstart:peakend],use="everything") * 0.5 + 0.5
}
qs_list$QS_cor12 <- check_qs_value(cor_value)
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE || nowusetrace[3] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
}else{
cor_value <- prm$rf_median_values[["median_qs8"]]
}
}else{
cor_value <- cor(x1[peakstart:peakend],x3[peakstart:peakend],use="everything") * 0.5 + 0.5
}
qs_list$QS_cor13 <- check_qs_value(cor_value)
#Check if nowusetrace is 0
if (nowusetrace[2] == FALSE || nowusetrace[3] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
}else{
cor_value <- prm$rf_median_values[["median_qs9"]]
}
}else{
cor_value <- cor(x2[peakstart:peakend],x3[peakstart:peakend],use="everything") * 0.5 + 0.5
}
qs_list$QS_cor23 <- check_qs_value(cor_value)
# QS10 Peak intensity
peak_int_value <- log10(x1[peaktop])
qs_list$QS_height <- check_qs_value(peak_int_value)
# QS11 #retention time relative to method length
RT_value <- prm$unirt[peaktop] / max(prm$unirt, na.rm = TRUE)
qs_list$QS_relRT <- check_qs_value(RT_value)
# QS12 | QS13 (12c & 13c qualifier): ratio peak top to highest point outside
if (is.null(x2) || nowusetrace[2] == FALSE || (peakstart == peakend)){
if(prm$ml_type == "mltrain_initial"){
qual_t_o_value <- 0
}else{
qual_t_o_value <- prm$rf_median_values[["median_qs12"]]
}
}else{
qual_t_o_value <- max((max(x2[peakstart:peakend],na.rm=TRUE) / ( (max(x2[peakstart:peakend],na.rm=TRUE) + max(c(x2[1:max(c(peakstart-1),1)],x2[min(c(peakend+1,length(x2))):length(x2)]),na.rm=TRUE)) )),0)
}
qs_list$QS_T.o2 <- check_qs_value(qual_t_o_value)
if (is.null(x3) || nowusetrace[3] == FALSE || (peakstart == peakend)){