Dear Gitlab users, due to maintenance reasons, Gitlab will not be available on Thursday 30.09.2021 from 5:00 pm to approximately 5:30 pm.

Commit 48a3d2c4 authored by joerg.buescher's avatar joerg.buescher
Browse files

more robust handling in case of errors in metabdb

parent 6c9add10
......@@ -143,7 +143,8 @@ for (im in 1:prm$nmet) {
pmat1 <- pmat2 <- pmat3 <- matrix(nrow = prm$nsmpl, ncol = length(tr))
refx1 <- smpl$chroms[[refsmpl]][[ metab[[im]]$quant$MRM ]] # mzml_data$chromdata[[refsmpl]][[metabdb_data$quantifier[[im]]$MRM]]
if (!is.null(refx1)) {
if ((!is.null(refx1)) && (sum(!is.na(refx1[tr])) > 20)) {
# currently we only use the timewindow around the expexted RT that we used for rtshift adjustment
# for the creation of the prototype. Consequently, only peaks within this timewindow can be found
# One could test to use the whole chromatogram instead. However the alignment of parts outside the
......@@ -224,7 +225,7 @@ for (im in 1:prm$nmet) {
if (prm$ml_type == 'mltrain_initial'){
# select best candidate by RF model
pred_qs <- rowMeans(qsmat)
bestcand <- which(pred_qs == max(pred_qs))
bestcand <- which(pred_qs == max(pred_qs, na.rm = TRUE))[1]
}else{
# Transform input data
trans <- predict(prm$train_preprocessing, as.data.frame(qsmat) )
......@@ -240,7 +241,7 @@ for (im in 1:prm$nmet) {
for (id in 1:prm$nsmpl) {
# id <- 1
nowpstart <- min(max(c(0, pstart + rtshift2[id,im] + tr[1])), (prm$nscan-10))
nowpend <- min(c(prm$nscan, pend + rtshift2[id,im] + tr[1]))
nowpend <- max(c(3, min(c(prm$nscan, pend + rtshift2[id,im] + tr[1])) ))
# micro-adjust start and end
nowmicrorangestart <- nowpstart+prm$microsearch
......
get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetrace, im, metab, prm, rtshift = 0, rangestart = 0) {
# peakstart <- peakstart[ip]
# peaktop <- peaktops[ip]
# peakend <- peakend[ip]
# ideas for future work:
# add QS: the fit quality (residual of the fit, same for all samples)
......@@ -6,7 +9,6 @@ get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetr
# add QS: average score of next-best peak candidate: the idea is that
# if there are multiple good candidates, more weight should be given to dRT,
# however if there is only 1 good candidate we can be more relaxed concerning dRT
# add QS: Measure how well the ratio of quantifier and 12c qualifier matches the ratio from metdb
# names must start with "QS" in order for the training script to work properly
qs_list <- list( 'QS_cor123' = 0,
......@@ -26,145 +28,148 @@ get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetr
'QS_1vs2' = 0 ,
'QS_dRTs' = 0)
# QS1: average correlation (quant, qual12c, qual13c)
nowcor <- suppressWarnings(cor(cbind(x1[peakstart:peakend],x2[peakstart:peakend],x3[peakstart:peakend]),use="everything") ) #use="complete.obs"
cor_value <- 0.5*(mean(c(nowcor[1, 2:3], nowcor[2,3]), na.rm = TRUE)+1)
qs_list$QS_cor123 <- check_qs_value(cor_value)
if ((peakstart < peaktop) && (peakend > peaktop) ) {
# QS1: average correlation (quant, qual12c, qual13c)
nowcor <- suppressWarnings(cor(cbind(x1[peakstart:peakend],x2[peakstart:peakend],x3[peakstart:peakend]),use="everything") ) #use="complete.obs"
cor_value <- 0.5*(mean(c(nowcor[1, 2:3], nowcor[2,3]), na.rm = TRUE)+1)
qs_list$QS_cor123 <- check_qs_value(cor_value)
# QS2 ratio peak top to highest point outside of series
t_o_value <- max(c(x1[peaktop] / (x1[peaktop] + max(x1[c(1:(max(c(peakstart-1,1))),(min(c(peakend+1,length(x1)))):length(x1))], na.rm = TRUE)),0))
qs_list$QS_T.o <- check_qs_value(t_o_value)
# QS2 ratio peak top to highest point outside of series
t_o_value <- max(c(x1[peaktop] / (x1[peaktop] + max(x1[c(1:(max(c(peakstart-1,1))),(min(c(peakend+1,length(x1)))):length(x1))], na.rm = TRUE)),0))
qs_list$QS_T.o <- check_qs_value(t_o_value)
# QS3 ratio peak top/max(peakend,peakstart)
t_b_h_value <- ((2 * x1[peaktop])/ (x1[peaktop] + max(x1[peakstart],x1[peakend])) ) -1
qs_list$QS_T.h <- check_qs_value(t_b_h_value)
# QS3 ratio peak top/max(peakend,peakstart)
t_b_h_value <- ((2 * x1[peaktop])/ (x1[peaktop] + max(x1[peakstart],x1[peakend])) ) -1
qs_list$QS_T.h <- check_qs_value(t_b_h_value)
# QS4 ratio peak top/min(peakend,peakstart)
t_b_l_value <- ((2 * x1[peaktop])/ (x1[peaktop] + min(x1[peakstart],x1[peakend])) ) -1
qs_list$QS_T.l <- check_qs_value(t_b_l_value)
# QS4 ratio peak top/min(peakend,peakstart)
t_b_l_value <- ((2 * x1[peaktop])/ (x1[peaktop] + min(x1[peakstart],x1[peakend])) ) -1
qs_list$QS_T.l <- check_qs_value(t_b_l_value)
# QS5 deviation from expected RT
# algo in mrm_integrate: max(c(1/(1 + (0.02*abs(peaktops[i] - metparam$peaktopa) / (1+metparam$peaktopadelta) ) )^2,0))
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency)
dRT_value <- (1/(1+abs(prm$unirt[peaktop + rangestart] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRT <- check_qs_value(dRT_value)
# deviation from predicted RT
if (prm$ml_type != 'mltrain_initial') {
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency) + rtshift
peaktopa <- min(max(c(1, peaktopa + rangestart)) , prm$nscan)
dRT_value <- (1/(1+abs(prm$unirt[peaktop] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRTs <- check_qs_value(dRT_value)
}
# QS5 deviation from expected RT
# algo in mrm_integrate: max(c(1/(1 + (0.02*abs(peaktops[i] - metparam$peaktopa) / (1+metparam$peaktopadelta) ) )^2,0))
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency)
dRT_value <- (1/(1+abs(prm$unirt[peaktop + rangestart] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRT <- check_qs_value(dRT_value)
# QS6 widthscore
if (peakstart != peakend){
widthscore <- 1/ (1+(prm$unirt[peakend] - prm$unirt[peakstart]))
}else{
widthscore <- 0
}
qs_list$QS_w <- check_qs_value(widthscore)
# deviation from predicted RT
if (prm$ml_type != 'mltrain_initial') {
peaktopa <- round(metab[[im]]$RT * 60 * prm$samplingfrequency) + rtshift
peaktopa <- min(max(c(1, peaktopa + rangestart)) , prm$nscan)
dRT_value <- (1/(1+abs(prm$unirt[peaktop] - prm$unirt[peaktopa]) ))^2
qs_list$QS_dRTs <- check_qs_value(dRT_value)
}
# QS6 widthscore
if (peakstart != peakend){
widthscore <- 1/ (1+(prm$unirt[peakend] - prm$unirt[peakstart]))
}else{
widthscore <- 0
}
qs_list$QS_w <- check_qs_value(widthscore)
# QS7 | QS8 | QS9 average correlation (quant, qual12c, qual13c)
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE | nowusetrace[2] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
# QS7 | QS8 | QS9 average correlation (quant, qual12c, qual13c)
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE | nowusetrace[2] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
}else{
cor_value <- prm$median_values$QS_cor12
}
}else{
cor_value <- prm$median_values$QS_cor12
cor_value <- cor(x1[peakstart:peakend],x2[peakstart:peakend],use="everything") * 0.5 + 0.5
}
}else{
cor_value <- cor(x1[peakstart:peakend],x2[peakstart:peakend],use="everything") * 0.5 + 0.5
}
qs_list$QS_cor12 <- check_qs_value(cor_value)
qs_list$QS_cor12 <- check_qs_value(cor_value)
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE || nowusetrace[3] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
#Check if nowusetrace is 0
if (nowusetrace[1] == FALSE || nowusetrace[3] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
}else{
cor_value <- prm$
median_values$QS_cor13
}
}else{
cor_value <- prm$
median_values$QS_cor13
cor_value <- cor(x1[peakstart:peakend],x3[peakstart:peakend],use="everything") * 0.5 + 0.5
}
}else{
cor_value <- cor(x1[peakstart:peakend],x3[peakstart:peakend],use="everything") * 0.5 + 0.5
}
qs_list$QS_cor13 <- check_qs_value(cor_value)
#Check if nowusetrace is 0
if (nowusetrace[2] == FALSE || nowusetrace[3] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
qs_list$QS_cor13 <- check_qs_value(cor_value)
#Check if nowusetrace is 0
if (nowusetrace[2] == FALSE || nowusetrace[3] == FALSE) {
if(prm$ml_type == "mltrain_initial"){
cor_value <- 0
}else{
cor_value <- prm$median_values$QS_cor23
}
}else{
cor_value <- prm$median_values$QS_cor23
cor_value <- cor(x2[peakstart:peakend],x3[peakstart:peakend],use="everything") * 0.5 + 0.5
}
}else{
cor_value <- cor(x2[peakstart:peakend],x3[peakstart:peakend],use="everything") * 0.5 + 0.5
}
qs_list$QS_cor23 <- check_qs_value(cor_value)
qs_list$QS_cor23 <- check_qs_value(cor_value)
# QS10 Peak intensity
peak_int_value <- log10(x1[peaktop])
qs_list$QS_height <- check_qs_value(peak_int_value)
# QS10 Peak intensity
peak_int_value <- log10(x1[peaktop])
qs_list$QS_height <- check_qs_value(peak_int_value)
# QS11 #retention time relative to method length
RT_value <- prm$unirt[peaktop] / max(prm$unirt, na.rm = TRUE)
qs_list$QS_relRT <- check_qs_value(RT_value)
# QS11 #retention time relative to method length
RT_value <- prm$unirt[peaktop] / max(prm$unirt, na.rm = TRUE)
qs_list$QS_relRT <- check_qs_value(RT_value)
# QS12 | QS13 (12c & 13c qualifier): ratio peak top to highest point outside
if (is.null(x2) || nowusetrace[2] == FALSE || (peakstart == peakend)){
if(prm$ml_type == "mltrain_initial"){
qual_t_o_value <- 0
# QS12 | QS13 (12c & 13c qualifier): ratio peak top to highest point outside
if (is.null(x2) || nowusetrace[2] == FALSE || (peakstart == peakend)){
if(prm$ml_type == "mltrain_initial"){
qual_t_o_value <- 0
}else{
qual_t_o_value <- prm$median_values$QS_T.o2
}
}else{
qual_t_o_value <- prm$median_values$QS_T.o2
qual_t_o_value <- max((max(x2[peakstart:peakend],na.rm=TRUE) / ( (max(x2[peakstart:peakend],na.rm=TRUE) + max(c(x2[1:max(c(peakstart-1),1)],x2[min(c(peakend+1,length(x2))):length(x2)]),na.rm=TRUE)) )),0)
}
}else{
qual_t_o_value <- max((max(x2[peakstart:peakend],na.rm=TRUE) / ( (max(x2[peakstart:peakend],na.rm=TRUE) + max(c(x2[1:max(c(peakstart-1),1)],x2[min(c(peakend+1,length(x2))):length(x2)]),na.rm=TRUE)) )),0)
}
qs_list$QS_T.o2 <- check_qs_value(qual_t_o_value)
# gaussean shape of quantifier
nowoutsidepoints <- which(x1 < 0.5*x1[peaktop]) - peaktop
nowfwhm <- min(nowoutsidepoints[nowoutsidepoints > 0]) - max(nowoutsidepoints[nowoutsidepoints < 0])
qs_list$QS_gauss <- check_qs_value(max(c(0,cor(x1[peakstart:peakend], dnorm(peakstart:peakend,mean=(peaktop+1),sd=(0.5*nowfwhm)) )),na.rm=TRUE) )# correlation to gaussian peak
# ratio quant/12C_qual
# Check if nowusetrace is 0
if (nowusetrace[1] == FALSE || nowusetrace[2] == FALSE ||
is.na(metab[[im]]$quant$intens) || is.na(metab[[im]]$qual12c$intens) ) {
if(prm$ml_type == "mltrain_initial"){
ratio_value <- 0
}else{
ratio_value <- prm$median_values$QS_1vs2
qs_list$QS_T.o2 <- check_qs_value(qual_t_o_value)
# gaussean shape of quantifier
nowoutsidepoints <- which(x1 < 0.5*x1[peaktop]) - peaktop
nowfwhm <- min(nowoutsidepoints[nowoutsidepoints > 0]) - max(nowoutsidepoints[nowoutsidepoints < 0])
qs_list$QS_gauss <- check_qs_value(max(c(0,cor(x1[peakstart:peakend], dnorm(peakstart:peakend,mean=(peaktop+1),sd=(0.5*nowfwhm)) )),na.rm=TRUE) )# correlation to gaussian peak
# ratio quant/12C_qual
# Check if nowusetrace is 0
if (nowusetrace[1] == FALSE || nowusetrace[2] == FALSE ||
is.na(metab[[im]]$quant$intens) || is.na(metab[[im]]$qual12c$intens) ) {
if(prm$ml_type == "mltrain_initial"){
ratio_value <- 0
}else{
ratio_value <- prm$median_values$QS_1vs2
}
} else {
ratio_value <- (1 / (1 + abs(log(x1[peaktop] / x2[peaktop], 2) - log(metab[[im]]$quant$intens / metab[[im]]$qual12c$intens,2))^2 ) )^2
}
} else {
ratio_value <- (1 / (1 + abs(log(x1[peaktop] / x2[peaktop], 2) - log(metab[[im]]$quant$intens / metab[[im]]$qual12c$intens,2))^2 ) )^2
}
qs_list$QS_1vs2 <- check_qs_value(ratio_value)
qs_list$QS_1vs2 <- check_qs_value(ratio_value)
if (is.null(x3) || nowusetrace[3] == FALSE || (peakstart == peakend)){
if(prm$ml_type == "mltrain_initial"){
qual_t_o_value <- 0
if (is.null(x3) || nowusetrace[3] == FALSE || (peakstart == peakend)){
if(prm$ml_type == "mltrain_initial"){
qual_t_o_value <- 0
}else{
qual_t_o_value <- prm$median_values$QS_T.o3
}
}else{
qual_t_o_value <- prm$median_values$QS_T.o3
qual_t_o_value <- max((max(x3[peakstart:peakend],na.rm=TRUE) / ( (max(x3[peakstart:peakend],na.rm=TRUE) + max(c(x3[1:max(c(peakstart-1),1)],x3[min(c(peakend+1,length(x3))):length(x3)]),na.rm=TRUE)) )),0)
}
}else{
qual_t_o_value <- max((max(x3[peakstart:peakend],na.rm=TRUE) / ( (max(x3[peakstart:peakend],na.rm=TRUE) + max(c(x3[1:max(c(peakstart-1),1)],x3[min(c(peakend+1,length(x3))):length(x3)]),na.rm=TRUE)) )),0)
}
qs_list$QS_T.o3 <- check_qs_value(qual_t_o_value)
qs_list$QS_T.o3 <- check_qs_value(qual_t_o_value)
} # endif peaktop is between peakstart and peakend
# return qs_list
qs_list
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment