Commit 8108f5e0 authored by gross47's avatar gross47
Browse files

update dataPrep

parent 57fef798
......@@ -2,42 +2,54 @@
library(readxl)
library(tidyverse)
library(janitor)
dataPreparation <- function(dat, uncertainty = "SE"){
## Filter all Rows with only NA ##
dat.final <- dat[rowSums(is.na(dat)) != ncol(dat), ]
dat.final <- dat.final %>% drop_na(...4)
dataPreparation <- function(dat, uncertainty = "SE", expVAL = "mean"){ #added expected value
## select landUse names ##
landUse <- dat[1, ]
landUse <- landUse[, colSums(is.na(landUse)) != nrow(landUse)]
landUse <- landUse %>% row_to_names(1)
landUse <- names(landUse)
## Convert input Data to dat.final ##
## Filter all Rows with only NA ##
dat.final <- dat[rowSums(is.na(dat)) != ncol(dat), ]
if(any(is.na(dat.final[, 1]))){dat.final <- dat.final[-1, ]}
## Create column names ##
dat.final <- dat.final %>% row_to_names(1)
colnames(dat.final) <- dat.final[1, ]
dat.final <- dat.final[-1, ]
## rename duplicated Columnnames ##
names(dat.final) <- make.unique(colnames(dat.final))
## detect and set classes of a dat.final
dat.final <- lapply(dat.final, type.convert) %>% bind_cols()
## rename first columns for initScenario function and define data structure ##
names(dat.final)[1:4] <- c("branch", "indicatorGroup", "indicator", "direction")
dat.final[, 5:ncol(dat.final)][is.na(dat.final[, 5:ncol(dat.final)])] <- 0
dat.final[5:ncol(dat.final)] <- lapply(dat.final[5:ncol(dat.final)], as.numeric)
chtr.cols <- unlist(lapply(dat.final[1,],is.numeric))
chtr.cols <- length(chtr.cols[chtr.cols == FALSE])
dat.final[, (chtr.cols+1):ncol(dat.final)][is.na(dat.final[, (chtr.cols+1):ncol(dat.final)])] <- 0
## warn and delete factor rows with NA ##
if(any(is.na(dat.final[, 1:chtr.cols]))){warning("Some Indicators have missing value, rows got deleted")}
# which(is.na(dat.final[, 1:chtr.cols]), arr.ind = TRUE)
dat.final <- na.omit(dat.final)
## select landUse names ##
landUse <- dat[1, ]
landUse <- landUse[, colSums(is.na(landUse)) != nrow(landUse)]
colnames(landUse) <- landUse[1, ]
landUse <- landUse[-1, ]
landUse <- names(landUse)
## select mean values, rename columns and gather ##
importValues <- dat.final %>% select(branch, indicatorGroup, indicator, direction, starts_with("mean"))
colnames(importValues)[grepl("mean", colnames(importValues))] <- landUse
importValues <- dat.final %>% select((1:chtr.cols), starts_with(expVAL))
colnames(importValues)[grepl(expVAL, colnames(importValues))] <- landUse
importValues <- importValues %>% gather(key = "landUse", value = "indicatorValue", landUse[1]:landUse[length(landUse)])
## select uncertainty, rename columns and gather ##
importUnc <- dat.final %>%select(branch, indicatorGroup, indicator, direction, starts_with(uncertainty))
importUnc <- dat.final %>% select((1:chtr.cols), starts_with(uncertainty))
colnames(importUnc)[grepl(uncertainty, colnames(importUnc))] <- landUse
importUnc <- importUnc %>% gather(key = "landUse", value = "indicatorUncertainty", landUse[1]:landUse[length(landUse)])
## combine mean and uncertainty ##
dataSource <- left_join(importValues, importUnc, by = c("branch", "indicatorGroup", "indicator", "direction", "landUse"))
dataSource <- left_join(importValues, importUnc, by = c(names(dat.final)[1:chtr.cols], "landUse"))
return(dataSource)
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment