### resequence.data.R
###------------------------------------------------------------------------------------------
### What: script to calculate variable width, startpos endpos and update the variable info
### Time-stamp: <2017-06-26 11:21:27 assyst>
###-------------------------------------------------------------------------------------------

## Input Parameters
# data file path (csv)
csvpath <- input[[1]]
# variable info json path
jsonpath <- input[[2]]
# libary path (R library path embedded in MDE)
libPath <- input[[3]]
# directory to load functions
workingDirectory <- input[[4]]
#TAKE LOCALE AS INPUT
lcl <- input[[5]]

# Set working directory
setwd(workingDirectory);

# Load functions
source("fn.variable.width.R")
source("fn.common.utilities.R")

# load packages
load.packages(c('haven', 'jsonlite', 'readr'), libPath)

# set initial configurations like memory limit, locale
set.configurations(lcl=lcl)

# Read JSON (variable information)
jsonData <- fromJSON(jsonpath)
# flatten the jsonData (valRange and valFOrmat objects are the only ones flattend)
flattenData <- flatten(jsonData, recursive = TRUE)

# read data file
DF_DATA <- read.datafile(flattenData, file=csvpath)


# Calculate width, start column and end column
startCol <- 0
endCol <- 0
for (i in 1:nrow(flattenData)){

  variable.name <- flattenData$internalName[[i]]

  # Get category values
  category.values <- flattenData$val[[i]]

  # no. of decimal positions
  dcml <- flattenData$dcml[[i]]

   if( flattenData$type[[i]] == "numeric"){
    # to handle the scenario where dcml greater than the length of the variable
    # for ex: value =  1, dcml = 2, then value will be converted to 1.00'
    DF_DATA[[variable.name]] <- format.num(DF_DATA[[variable.name]], dcml=dcml)
  }

  # calculate variable length
  length <- varibale.width(DF_DATA[[variable.name]], flattenData$type[[i]], category.values)  
  
  # # If variable is not factor, take max value of variable
  # if(is.na(category.values) || length(category.values) == 0 ){
  #   if(all(is.na(DF_DATA[[variable.name]]))){
  #     length <- 1
  #   } else {
  #     # Get variable max value & get the length
  #     length <- max(nchar(as.character(DF_DATA[[variable.name]])), na.rm = TRUE)
  #   }
  # } else {
  #   # calculate max of category value length
  #   length <- max(nchar(as.character(category.values)), na.rm = TRUE)
  # }

  #length <- nchar(maxValue)
  startCol <- endCol +1
  endCol <- endCol + length

  # Update variable information JSON
  flattenData$width[[i]] <- length
  flattenData$StartPos[[i]] <- startCol
  flattenData$EndPos[[i]] <- endCol

}

# Update JSON doc with updated sequence
varjson <- toJSON(flattenData,pretty=TRUE,force=TRUE)

# reset the locale to english to resolve the write json issues
reset.locale(lcl)

write(encode.UTF(varjson), jsonpath)

