Commit be864b8a authored by daniel.eilertz's avatar daniel.eilertz
Browse files

Integrated logic of qqq_mrm_integrate.R into peak_detection.R

parent 688437c3
......@@ -36,17 +36,17 @@ peak_detection <- function(metab, smpl, prm) {
'label2' = character() # debugging label for plot
)
msd2 <- list()
for (id in 1:prm$nsmpl) {
for(id in 1:prm$nsmpl) {
msd2[[id]] <- msdelement
}
msd <- list()
for (im in 1:prm$nmet) {
for(im in 1:prm$nmet) {
msd[[im]] <- msd2
}
rm('msdelement', 'msd2')
# loop over samples
for (id in 1:prm$nsmpl){
for(id in 1:prm$nsmpl){
# id <- 1
if (prm$verbose >= 2 ){
cat(paste(as.character(id),'-'), file=prm$log_con)
......@@ -149,13 +149,252 @@ peak_detection <- function(metab, smpl, prm) {
# !-------
# GET PEAK CANDIDATES
peak_list <- qqq_mrm_integrate(metab, smpl, prm, msd, id, im)
#peak_list <- qqq_mrm_integrate(metab, smpl, prm, msd, id, im)
# !-------
# ToDo: move this for loop to end of qqq_mrm_integrate
# alternatively: get logic from qqq_mrm_integrate and place here
#
#
# START mrm_integrate
#
if(TRUE){
x1 <- msd[[im]][[id]]$x1
x2 <- msd[[im]][[id]]$x2
x3 <- msd[[im]][[id]]$x3
nowdata <- msd[[im]][[id]]$coimidata
nowusetrace <- as.logical( c( 1, c(metab[[im]]$qual12c$use, 0)[1] , c(metab[[im]]$qual13c$use,0)[1] ) )
nowusetrace[is.na(nowusetrace)] <- FALSE
# Calculate moving window correlation (over all available time series)
if(TRUE){
# can only calculate correlation and quant/qual ratio if there is more than 1 trace
if (prm$polarmets == FALSE) {
nowcorwindow <- 3*prm$corwindow # larger correlation window for lipids
} else {
nowcorwindow <- prm$corwindow
}
# moving window correlation over available x1,x2,x3
meancor <- rep(0,prm$nscan)
if (length(nowdata) > prm$nscan){
for (ic in (1:(prm$nscan-nowcorwindow) ) ) {
nowcor <- cor(nowdata[ic:(ic-1+nowcorwindow), ])
meancor[ic+ceiling(0.5*nowcorwindow)] <- mean(nowcor[lower.tri(nowcor)], na.rm = TRUE)
}
meancor[is.na(meancor)] <- 0
# more strict on correlation for lipids
if (prm$polarmets == FALSE) {
meancor[meancor < 0.75] <- 0
}
}
}
# calculate slope, maxima, minima ... of series
if(TRUE){
# normalize time series x1,x1 for plotting
xtest <- (x1 - min(x1,na.rm=TRUE))/ (max(x1,na.rm=TRUE)-min(x1,na.rm=TRUE))
# slope
x1slope <- diff(x1)
#local maxima
local_max <- which(diff(sign(diff(x1)))==-2)
#local minima
local_min <- which(diff(sign(diff(x1)))==2)
# top5 local maxima
peaktops <- local_max[order(x1[local_max], decreasing=T)][1:prm$initial_candidates]
# slope < 0
ls <- which(x1slope < 0) #blue
# slope > 0
us <- which(x1slope > 0) #green
}
# Test plot x1 with maxima/minima and slope
if ( prm$verbose >= 3 ){
par(mar=c(2,2,0.5,0.5))
layout(matrix(c(1,1,2,2), 2, 2, byrow = TRUE),
widths=c(3,1), heights=c(2,1))
plot(xtest)
for(i in 1:length(local_max)){
abline(v=local_max,col="red",lty="dotted")
}
for(i in 1:length(local_min)){
abline(v=local_min,col="blue",lty="dotted")
}
plot(x1slope)
points(ls,x1slope[ls],col="blue")
points(us,x1slope[us],col="red")
}
qs_list <- list()
peak_list <- list()
peakstarts <- peakends <- peak_min_start <- peak_max_end <- rep(NA,prm$initial_candidates)
qs <- area1 <- area2 <- area13c <- height1 <- height2 <- height13c <- RT <- rep(NA,prm$initial_candidates)
# peakparameter adjustment
for (i in 1:length(peaktops)){
#i <- 1
if (prm$verbose >=1 ) {
cat('\r', i)
}
# initialize peaklists
peak <- list()
napeak <- FALSE
# Calculate peakstart and peakend (if peaktop is NA set dummy value length(x1))
if (is.na(peaktops[i])) {
peakstarts[i] <- peakends[i] <- length(x1)
napeak <- TRUE
}else{
#PEAKSTART
# nearest local minimum left from peaktop
peak_min_start[i] <- max(c(local_min[sign(local_min - peaktops[i]) < 0],0))
# maximum range of bad slope cutoff left from peaktop
peak_start_width_thresh <- round(peaktops[i] - (peaktops[i] - peak_min_start[i])/3)
# bad slope indices between potential peakstart and peaktop
bad_slopes <- which(abs(x1slope[peak_min_start[i]:peak_start_width_thresh]) < max(abs(x1slope[peak_min_start[i]:peak_start_width_thresh]),na.rm=T)*0.1)
# x-shift of peakstart (0 if bad_slopes is empty)
shift <- max(c(0,bad_slopes))
peakstarts[i] <- max(c(1,peak_min_start[i] + shift))
#PEAKEND
# nearest local minimum right from peaktop
peak_max_end[i] <- min(c(local_min[sign(local_min - peaktops[i]) > 0],length(x1)))
# maximum range of bad slope cutoff right from peaktop
peak_end_width_thresh <- round(peak_max_end[i] - (peak_max_end[i] - peaktops[i])/3)
# bad slope indices between potential peakstart and peaktop
bad_slopes <- which(abs(x1slope[peak_max_end[i]:peak_end_width_thresh]) < max(abs(x1slope[peak_max_end[i]:peak_end_width_thresh]),na.rm=T)*0.1)
# x-shift of peakend (0 if bad_slopes is empty)
shift <- max(c(0,bad_slopes))
peakends[i] <- min(c(peak_max_end[i] - shift),length(x1))
}
# Plotting (gurumode)
if (!napeak & (prm$verbose >= 3 )){
# graphichal parameters
par(mar=c(2,2,0.5,0.5))
layout(matrix(c(1,1,2,2), 2, 2, byrow = TRUE),
widths=c(3,1), heights=c(2,1))
# x range around peak for plotting
peak_x_range <- (max(c(1,peak_min_start[i]-100))):min(c(prm$nscan,peak_max_end[i]+100))
# Plot peak MRM, start, top end
plot(peak_x_range,xtest[peak_x_range])#,xaxt='n')
header <- paste(i, " - " , metab[[im]]$name, "(",im,")")
text(peak_x_range[1]+20,max(xtest[peak_x_range],na.rm=T),labels=header)
lines(peak_x_range,xtest[peak_x_range])
points(us,xtest[us],col="green",pch=16)
points(ls,xtest[ls],col="blue",pch=16)
abline(v=peak_min_start[i],col=rainbow(length(peaktops))[i],lty=2)
abline(v=peakstarts[i],col="black",lty=3)
abline(v=peaktops[i],col=rainbow(length(peaktops))[i])
abline(v=peak_max_end[i],col=rainbow(length(peaktops))[i],lty=2)
abline(v=peakends[i],col="black",lty=3)
# plot peak slope
plot(peak_x_range,x1slope[peak_x_range], type="l")
abline(h=0)
points(ls[!is.na(match(ls,peak_x_range))],x1slope[ls[!is.na(match(ls,peak_x_range))]], type="p",col="blue")
points(us[!is.na(match(us,peak_x_range))],x1slope[us[!is.na(match(us,peak_x_range))]], type="p",col="green")
}
# Get area, heigth and RT
if(TRUE){
# safe peak-areas for met
area1[i] <- if(!napeak) sum(x1[peakstarts[i]:peakends[i]])/prm$samplingfrequency else NA
area2[i] <- if(!is.null(x2)) sum(x2[peakstarts[i]:peakends[i]])/prm$samplingfrequency else NA
area13c[i] <- if(!is.null(x3)) sum(x3[peakstarts[i]:peakends[i]])/prm$samplingfrequency else NA
height1[i] <- x1[peaktops[i]]
# save peak-heights for met
if (length(x2) == 0){
height2[i] <- 0
} else {
height2[i] <- x2[peaktops[i]]
}
if (length(x3) == 0){
height13c[i] <- 0
} else {
height13c[i] <- x3[peaktops[i]]
}
# Save peak retention time
RT[i] <- prm$unirt[peaktops[i]]
}
nowqs <- get_quality_scores(x1, x2, x3, peakstarts[i], peaktops[i], peakends[i], nowusetrace, im, metab, prm)
# Calculate peak quality scores
if (prm$ml_type == "mltrain_initial"){
# Calculate mean peak's qs (squared!)
qs[i] <- (mean(as.numeric(nowqs), na.rm = TRUE))^2
} else {
# Transform input data
trans <- predict(prm$train_preprocessing, as.data.frame(nowqs) ) # as.data.frame(t(nowqs))
# RF Model predictions
qs[i] <- predict(prm$model, trans,type="prob")$H
}
# add nowqs to peak parameters
qs_list[[i]] <- nowqs
# Check peaks for killcriteria
killcriteria <- c( napeak,
(abs(peakstarts[i]-peaktops[i]) <=prm$minDistTopBorder) ,
(abs(peakends[i]-peaktops[i]) <=prm$minDistTopBorder) ,
(x1[peakstarts[i]]>=x1[peaktops[i]]) ,
(x1[peakends[i]]>=x1[peaktops[i]]) ,
((peakends[i]-peakstarts[i]) < (prm$minwidth*prm$samplingfrequency)) )
killcriteria[ is.na(killcriteria)] <- FALSE
# discard peaks
if (any(killcriteria)) {
qs[i] <- 0
}
# Set up peak data structure for further procssing
peak$area1 <- area1[i]
peak$area2 <- area2[i]
peak$area13c <- area13c[i]
peak$height1 <- height1[i]
peak$height2 <- height2[i]
peak$height13c <- height13c[i]
peak$RT <- RT[i]
peak$qs <- peak$final_peak_class <- qs[i]
peak$qs_list <- qs_list[[i]]
peak$peakstart <- peakstarts[i]
peak$peaktop <- peaktops[i]
peak$peakend <- peakends[i]
peak$ylim <- c(0,1.1*max(c(100,height1[i],nowusetrace[2]*height2[i],nowusetrace[3]*height13c[i]),na.rm = TRUE))
peak$label1 <- paste0(metab[[im]]$name, '\n',
smpl$samplenames[[id]], '\n',
'QS = ', round(qs[i]*100) )
label2 <- paste0(metab[[im]]$name, '\n',
smpl$samplenames[[id]], '\n')
qsnames <- names(nowqs)
nowcount <- 0
for (iq in 1:length(nowqs) ) {
nowcount <- nowcount + 1
label2 <- paste0( label2, substr(qsnames[iq], 4, nchar(qsnames[iq]) ), ': ', as.character(round(nowqs[[qsnames[iq]]],2) ), ' ')
if ((nowcount > 7) && (iq < length(nowqs))) {
label2 <- paste0(label2, '\n')
nowcount <- 0
}
}
peak$label2 <- label2
# fill peak dummy
peak_list[[i]] <- peak
} # endfor all peaktops
#return peaks
#peak_list
}
#
# END mrm_integrate
#
for (p in 1:length(peak_list)){
if (!is.list(peak_list[[p]]) ) {
candqs[p] <- -1
......
......@@ -77,11 +77,11 @@ qqq_mrm_integrate <- function(metab, smpl, prm, msd, id, im) {
}
qs_list <- list()
peaks <- list()
peak_list <- list()
peakstarts <- peakends <- peak_min_start <- peak_max_end <- rep(NA,prm$initial_candidates)
qs <- area1 <- area2 <- area13c <- height1 <- height2 <- height13c <- RT <- rep(NA,prm$initial_candidates)
# peakparameter adjustment------------------------------------------------------
# peakparameter adjustment
for (i in 1:length(peaktops)){
#i <- 1
if (prm$verbose >=1 ) {
......@@ -236,9 +236,9 @@ qqq_mrm_integrate <- function(metab, smpl, prm, msd, id, im) {
peak$label2 <- label2
# fill peak dummy
peaks[[i]] <- peak
peak_list[[i]] <- peak
} # endfor all peaktops
#return peaks
peaks
peak_list
} # end function
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment