Commit a0e697df authored by joerg.buescher's avatar joerg.buescher
Browse files

Merge branch 'dev2' into 'master'

minor updates

See merge request !19
parents b0780c82 d3106dfa
*.log
*.DS_Store
......@@ -20,6 +20,23 @@ for (id in 1:prm$nsmpl) {
}
refsmpl <- which.max(bestqs)[1]
# find typical fwhm to determine smoothing factor
if ('fwhm' %in% names(msd[[1]][[1]]) ) {
allfwhm <- numeric()
allqs <- numeric()
for (id in 1:prm$nsmpl) {
allfwhm <- c(allfwhm, unlist(sapply(msd, '[[', id)['fwhm', ]) )
allqs <- c(allqs, unlist(sapply(msd, '[[', id)['qs', ]) )
}
typicalfwhm <- median(allfwhm[allqs > 0.9], na.rm = TRUE)
if (prm$verbose > 1) {
print(paste('found typical fwhm to be', typicalfwhm, 'scans'))
}
} else {
typicalfwhm <- 15
}
# find shift relative to refsmpl
corweight <- list()
for (id in 1:prm$nsmpl) {
......@@ -191,9 +208,10 @@ for (im in 1:prm$nmet) {
p2[is.na(p2)] <- 0
p3[is.na(p3)] <- 0
# peak detection
# peak detection on weighted sum of single traces
p00 <- rowSums(cbind(p1/max(p1,na.rm = TRUE), p2/max(p2,na.rm = TRUE), p3/max(p3,na.rm = TRUE)), na.rm = TRUE)
# determine candidates and their qs scores
p0 <- zoo::rollmean(rowSums(cbind(p1/max(p1,na.rm = TRUE), p2/max(p2,na.rm = TRUE), p3/max(p3,na.rm = TRUE)), na.rm = TRUE), 3, fill=0, align='center')
p0 <- zoo::rollmean(p00, round(typicalfwhm/3) , fill=0, align='center') #
sp0 <- diff(p0)
maxsp0 <- max(abs(sp0))
local_max <- which(diff(sign(sp0))==-2)
......@@ -207,7 +225,7 @@ for (im in 1:prm$nmet) {
}
}
# second pass with more smoothing
p0 <- zoo::rollmean(rowSums(cbind(p1/max(p1,na.rm = TRUE), p2/max(p2,na.rm = TRUE), p3/max(p3,na.rm = TRUE)), na.rm = TRUE), 15, fill=0, align='center')
p0 <- zoo::rollmean(p00, typicalfwhm, fill=0, align='center')
sp0 <- diff(p0)
maxsp0 <- max(abs(sp0))
local_max <- which(diff(sign(sp0))==-2)
......@@ -233,7 +251,7 @@ for (im in 1:prm$nmet) {
nowpend <- prm$unirt[max(c(3, min(c(prm$nscan, peakend[ip] + tr[1])) )) ]
peakoldqs[ip] <- sum(originalqs[intersect(which((originalrt - metabshift) > nowpstart),
which((originalrt - metabshift) < nowpend))]) /
sqrt(peakend[ip] - peakstart[ip])
sqrt(peakend[ip] - peakstart[ip])
}
}
peakoldqs[is.na(peakoldqs)] <- 0
......@@ -348,6 +366,7 @@ if (prm$verbose >= 1) {
# write log of quality scores for training to tsv file
write.table(qslog, file = file.path(prm$batchdir, 'qslog.tsv'), sep = '\t', row.names = FALSE)
# write manual_peakcheck template
nowscore <- matrix(nrow = prm$nsmpl, ncol = prm$nmet, data = NA)
# use old scores if available and peaks match
......@@ -369,6 +388,16 @@ if (prm$verbose >= 1) {
}
}
}
} else { # if there is no previous solution, at least fill in the clear cases
for (id in 1:prm$nsmpl) {
for (im in 1:prm$nmet) {
if (msd[[im]][[id]]$qs < 0.2) {
nowscore[id,im] <- 0
} else if (msd[[im]][[id]]$qs > 0.95) {
nowscore[id,im] <- 2
}
}
}
}
# write template for training solution to xlsx file
......@@ -389,6 +418,7 @@ if (prm$verbose >= 1) {
openxlsx::saveWorkbook(wb, file.path(prm$batchdir, "manual_peakcheck_template.xlsx"), overwrite = TRUE)
}
msd # return msd
} # endfunction
......
......@@ -38,6 +38,7 @@ evaluate_peaks <- function(msd, prm){
final_data <- as.data.frame(c(msd[[im]][[id]]$qs_list, nowquantile))
final_data_prep <- predict(prm$train_preprocessing_final,final_data)
final_pred <- predict(prm$model_final,final_data_prep,type="prob")$H
msd[[im]][[id]]$report_p <- final_pred
#update plotdata label1
nowstartpos <- gregexpr('\n', msd[[im]][[id]]$label1)[[1]][2]
......
......@@ -153,7 +153,8 @@ get_quality_scores <- function(x1, x2, x3, peakstart, peaktop, peakend, nowusetr
ratio_value <- prm$median_values$QS_1vs2
}
} else {
ratio_value <- (1 / (1 + abs(log(x1[peaktop] / x2[peaktop], 2) - log(metab[[im]]$quant$intens / metab[[im]]$qual12c$intens,2))^2 ) )^2
ratio_value <- (1 / (1 + abs( log(x1[peaktop] / x2[peaktop], 2) -
log(metab[[im]]$quant$intens / metab[[im]]$qual12c$intens[1],2))^2 ) )^2
}
qs_list$QS_1vs2 <- check_qs_value(ratio_value)
......
......@@ -32,7 +32,9 @@ peak_detection <- function(metab, smpl, prm) {
'peakend' = NA, # end of peak (scan number)
'ylim' = c(0, 100), # y axis limits for plotting
'label1' = character(), # simple label for plot
'label2' = character() # debugging label for plot
'label2' = character(), # debugging label for plot
'report' = TRUE, # report in excel output only if quality is sufficient
'fwhm' = NA # full width half maximum
)
msd2 <- list()
......@@ -145,10 +147,10 @@ peak_detection <- function(metab, smpl, prm) {
msd[[im]][[id]]$combidata <- nowdata
if(TRUE){
x1 <- msd[[im]][[id]]$x1
x2 <- msd[[im]][[id]]$x2
x3 <- msd[[im]][[id]]$x3
nowdata <- msd[[im]][[id]]$coimidata
# x1 <- msd[[im]][[id]]$x1
# x2 <- msd[[im]][[id]]$x2
# x3 <- msd[[im]][[id]]$x3
# nowdata <- msd[[im]][[id]]$coimidata
nowusetrace <- as.logical( c( 1, c(metab[[im]]$qual12c$use, 0)[1] , c(metab[[im]]$qual13c$use,0)[1] ) )
nowusetrace[is.na(nowusetrace)] <- FALSE
......@@ -215,7 +217,7 @@ peak_detection <- function(metab, smpl, prm) {
qs_list <- list()
peak_list <- list()
peakstarts <- peakends <- peak_min_start <- peak_max_end <- rep(NA,prm$initial_candidates)
qs <- area1 <- area2 <- area13c <- height1 <- height2 <- height13c <- RT <- rep(NA,prm$initial_candidates)
fwhm <- qs <- area1 <- area2 <- area13c <- height1 <- height2 <- height13c <- RT <- rep(NA,prm$initial_candidates)
# peakparameter adjustment
for (i in 1:length(peaktops)){
......@@ -292,6 +294,11 @@ peak_detection <- function(metab, smpl, prm) {
area2[i] <- if(!is.null(x2)) sum(x2[peakstarts[i]:peakends[i]])/prm$samplingfrequency else NA
area13c[i] <- if(!is.null(x3)) sum(x3[peakstarts[i]:peakends[i]])/prm$samplingfrequency else NA
height1[i] <- x1[peaktops[i]]
if (!is.na(height1[i])) {
fwhm[i] <- min(c(peakends[i], intersect(which(x1 < 0.5*height1[i]), peaktops[i]:prm$nscan)), na.rm = TRUE) -
max(c(peakstarts[i], intersect(which(x1 < 0.5*height1[i]), 1:peaktops[i])), na.rm = TRUE)
}
# save peak-heights for met
if (length(x2) == 0){
......@@ -352,6 +359,7 @@ peak_detection <- function(metab, smpl, prm) {
peak$peakstart <- peakstarts[i]
peak$peaktop <- peaktops[i]
peak$peakend <- peakends[i]
peak$fwhm <- fwhm[i]
peak$ylim <- c(0,1.1*max(c(100,height1[i],nowusetrace[2]*height2[i],nowusetrace[3]*height13c[i]),na.rm = TRUE))
peak$label1 <- paste0(metab[[im]]$name, '\n',
smpl$samplenames[[id]], '\n',
......@@ -440,6 +448,7 @@ peak_detection <- function(metab, smpl, prm) {
msd[[im]][[id]]$peakstart <- peak_list[[bestpeak]]$peakstart
msd[[im]][[id]]$peaktop <- peak_list[[bestpeak]]$peaktop
msd[[im]][[id]]$peakend <- peak_list[[bestpeak]]$peakend
msd[[im]][[id]]$fwhm <- peak_list[[bestpeak]]$fwhm
msd[[im]][[id]]$ylim <- peak_list[[bestpeak]]$ylim
msd[[im]][[id]]$label1 <- peak_list[[bestpeak]]$label1
msd[[im]][[id]]$label2 <- peak_list[[bestpeak]]$label2
......@@ -447,7 +456,7 @@ peak_detection <- function(metab, smpl, prm) {
qsnames <- names( peak_list[[bestpeak]]$qs_list )
# append qslog info for training (last 2 cols are x12_13c_flags for qs median values)
for(p in 1:length(peak_list)){
for (p in 1:length(peak_list)) {
metab_name <- metab[[im]]$name
sample_name <- smpl$samplenames[id]
peak_id <- p
......
......@@ -6,14 +6,14 @@ plot_peaks <- function(metab, smpl, msd, prm) {
cat('Creating peakoverview.pdf ', file=prm$log_con)
print(' ')
pdf_file <- "peakoverview.pdf"
pdf_file <- file.path(prm$batchdir,"peakoverview.pdf")
pdf(file = pdf_file, height = 2*prm$nsmpl, width = 5* prm$nmet, family = "Helvetica")
par(mai = c(0.5, 0.5, 0.8, 0.5))
layout(matrix(c(1:(prm$nmet*prm$nsmpl)), prm$nsmpl, prm$nmet, byrow = TRUE)) # initiate subplots
# Reverse looping and indexing (samples first, then metabolites)
for (id in 1:prm$nsmpl){
for (id in prm$typeorder){
for (im in 1:prm$nmet){
if (prm$verbose >=2) {
cat('\r', paste('plot_peaks: metab', im, 'sample', id, ' '))
......
......@@ -36,7 +36,7 @@ process_batch <- function(nowfolder = "", parameters = list(), return_msd=FALSE)
prm <- initialize_prm()
# create new log file
prm$log_con <- file(file.path(nowfolder, "R_messages.log"),open="a")
prm$log_con <- file(file.path(nowfolder, 'R_messages.log'),open="a")
# Update prm from tsv config file and arguments
prm <- update_prm(nowfolder, parameters, prm)
......@@ -88,7 +88,7 @@ process_batch <- function(nowfolder = "", parameters = list(), return_msd=FALSE)
prm$model_final <- model_finalp
prm$train_preprocessing_final <- train_preprocessing_finalp
# only keep what we need
rm('model_finalp', 'train_preprocessing_finalp', 'train_dummy_vars_finalp')
rm('model_finalp', 'train_preprocessing_finalp')
if (prm$verbose >= 2) {
print(paste0('Successfully loaded 2. model from ', prm$model_final_path))
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment