Dear Gitlab users, due to maintenance reasons, Gitlab will not be available on Thursday 30.09.2021 from 5:00 pm to approximately 5:30 pm.

Commit 4d59d3fc authored by Joerg Buescher's avatar Joerg Buescher
Browse files

generate_ml_training_data: take highest peak within range

parent a10bb665
......@@ -81,10 +81,20 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
next
}
# get column by column name rather than assuming column order
rtcol <- which(tolower(td[2, ]) == 'rt')
rtcol <- min(rtcol[rtcol > met_index], na.rm = TRUE)
startcol <- which(tolower(td[2, ]) == 'int. start')
startcol <- min(startcol[startcol > met_index], na.rm = TRUE)
endcol <- which(tolower(td[2, ]) == 'int. end')
endcol <- min(endcol[endcol > met_index], na.rm = TRUE)
# Get rt, peakstart, peakend from training data for peak evaluation
rt <- as.numeric(td[3:dim(td)[1],met_index + 1 ])
start <- as.numeric(td[3:dim(td)[1],met_index + 2])
end <- as.numeric(td[3:dim(td)[1],met_index + 3])
rt <- as.numeric(td[3:dim(td)[1], rtcol])
start <- as.numeric(td[3:dim(td)[1], startcol])
end <- as.numeric(td[3:dim(td)[1], endcol])
goodpos <- which((as.numeric(!is.na(rt)) * as.numeric(!is.na(start)) * as.numeric(!is.na(end)) ) == 1)
......@@ -93,14 +103,11 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
sample_detected <- end > start
sample_detected[is.na(sample_detected)] <- FALSE
# Loop over samples in peaks_by_metab
# Loop over samples
for (sample in samples){
# index of sample row in training data (different tables around)
# if (!is.na(td[length(td[,2]),2])) {
smp_index_train <- which(train$id==sample) # - 2 # minus two header rows
# }else{
# smp_index_train <- which(train$id==sample) # - 2 # minus two header rows
# }
smp_index_train <- which(train$id==sample)
if ( !(smp_index_train %in% goodpos) ) {
print(paste('skipping', td$id[smp_index_train]))
next
......@@ -124,12 +131,16 @@ generate_ml_training_data <- function(tsv_path, xlsx_path){
#print("outsch!")
} else{
#check if rt is within training peak range and choose nearest measured peak candicated
# check if rt is within training peak range and choose nearest measured peak candicated
peak_in_range <- which((start[smp_index_train] < mes_rt) & (mes_rt < end[smp_index_train]))
clostest_peak <- which(abs(rt[smp_index_train] - mes_rt) == min(abs(rt[smp_index_train] - mes_rt)))
if (length(peak_in_range) > 1) {
peak_in_range <- peak_in_range[ which.max(as.numeric(peaks_candidate_Xy[smp_index_mes[peak_in_range], 'QS_height']))[1] ]
}
hit_list[peak_in_range] <- 1
# clostest_peak <- which(abs(rt[smp_index_train] - mes_rt) == min(abs(rt[smp_index_train] - mes_rt)))
# Set hitlist to 1 (peak detected)
hit_list[intersect(peak_in_range,clostest_peak)] <- 1
# hit_list[intersect(peak_in_range,clostest_peak)] <- 1
}
# Add hitlist to yall
......
......@@ -34,9 +34,9 @@ read_mzmlfiles <- function(filelist,prm){
for (filenum in 1:length(filelist)){
if (prm$verbose >=2) {
cat('\r', paste('read_mzmlfiles:', filenum ,'of', nsmpl, ':', filelist[filenum]))
cat('\r', paste('read_mzmlfiles:', filenum ,'of', nsmpl, ':', filelist[filenum], ' '))
}
cat( paste('read_mzmlfiles:', filenum ,'of', nsmpl, ':', filelist[filenum]), file=prm$log_con)
cat( paste('read_mzmlfiles:', filenum ,'of', nsmpl, ':', filelist[filenum], ' '), file=prm$log_con)
chroms[[filenum]] <- list()
options(warn=-1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment