### destring.variable.update.data.R
###------------------------------------------------------------------------------------------
### What: script to convert string variables to numeric and update the csv data file
### Time-stamp: <2017-11-13 11:21:27 assyst>
###-------------------------------------------------------------------------------------------


## Input Parameters
# data file path (csv)
csvpath <- input[[1]]
# json file path - variable details
jsonpath <- input[[2]]
# Updated data file path
updateddatapath <- input[[3]]
# list fo variables to perform destring operation
listOfVariables <- fromJSON(input[[4]])
# libary path (R library path embedded in MDE)
libPath <- input[[5]]
# directory to load functions
workingDirectory <- input[[6]]

# Set working directory
setwd(workingDirectory);

# Load functions
source("fn.common.utilities.R")
source("fn.calculate.varStats.R")

# R is not embedded in current MAC version, Hence use default path to load the libraries
if(libPath == "MAC") {
  libPath <- NULL
}
if (is.null(libPath) || libPath == '') {
  library(haven)
  library(jsonlite)
  library(plyr)
  library(readr)
} else {
  .libPaths(libPath)
  library(haven, lib.loc=libPath)
  library(jsonlite, lib.loc=libPath)
  library(plyr, lib.loc=libPath)
  library(readr, lib.loc=libPath)
}

# Read variable deatils from the json path
jsonData <- fromJSON(jsonpath)
# flatten the jsonData (valRange and valFormat objects are the only ones flattend)
flattenData <- flatten(jsonData, recursive = TRUE)

# Read csv header to pick the column names
csvHeader <- as.character(read_csv(file=csvpath, n_max = 1, col_types = cols(.default = "c"), col_names = FALSE))
matchingVariables <- intersect(listOfVariables, csvHeader)

# read csv file 
DF_DATA <- data.frame(read.datafile(flattenData, file=csvpath))
matchingDF <- DF_DATA [,matchingVariables]
#Check if all values are numeric
canConvertVars <- lapply(matchingVariables, function(col) {
  #type.convert - Convert a character vector to logical, integer, numeric, complex or factor as appropriate.
  if (class(matchingDF) == "data.frame") {
    new_vector <- type.convert(matchingDF[[col]], na.strings = "NA", as.is = TRUE,
                               numerals = c("no.loss"))
  }
  else {
    new_vector <- type.convert(matchingDF, na.strings = "NA", as.is = TRUE,
                               numerals = c("no.loss"))
  }  
  msg <- ''
  status <- FALSE
  varStats <- NA
  # if converted vector is numeric. mode is numeric for both integer, numeric and factor
  if(mode(new_vector) == 'numeric'){
    status <- TRUE
    # convert string values to numeric
    DF_DATA[col] <- lapply(DF_DATA[col], as.numeric)
    # calculate statistics for numeric variables
    varStats <- calculate.varStats(DF_DATA,col)
  } else {
    #. find the reason, why destring is failed & set the message
    if(max(nchar(DF_DATA[[col]])) > 15){
      msg <- "Variable with width > 15 can't be destringed."

    } else {
      
      #. find the values, failed to destring, get the first value and show in message
      # logic to find the values can't be destringed
      # ********************************************* #
      #. exclude NA from the variable
      #. convert the variable to numeric, all non - numeric values will be converted to NA
      #. check any NA in the converted values, if any find the actual value from the variable
      #. if multiple values are available, now showing only the first one

      excludeNAPerdicate <- !is.na(DF_DATA[[col]])
      nonNumericPredicate <- suppressWarnings(is.na(as.numeric(DF_DATA[[col]][excludeNAPerdicate])))
      if(any(nonNumericPredicate)) {
        nonNumericVals <- DF_DATA[[col]][excludeNAPerdicate][nonNumericPredicate]
        if(length(nonNumericVals) > 0){
          msg <- paste("Value", shQuote(nonNumericVals[1]), "can't be destringed.", sep = " ")
        }
      }
    }
  }
  list(name=col, status=status, msg=msg, varStats=varStats)
})

# convert the data to numeric
for (i in (1:length(canConvertVars))){
  if(canConvertVars[[i]]$status){
    DF_DATA[canConvertVars[[i]]$name] <- lapply(DF_DATA[canConvertVars[[i]]$name], as.numeric) 
  }
}

DF_DATA[ is.na(DF_DATA) ] <- NA    #missing values replaced with NA
# write.csv(DF_DATA, file=file(updateddatapath, encoding="UTF-8"), row.names = FALSE, quote = TRUE, na= "*")  #write to CSV
write_csv(DF_DATA, updateddatapath, na = "*", append = FALSE)

# return number of records
cv <- toJSON(canConvertVars,pretty=TRUE,force=TRUE)
return (cv)