### create.categories.R
###------------------------------------------------------------------------------------------
### What: script to create categories from variable statistics
### Time-stamp: <2018-10-01 11:21:27 assyst>
###-------------------------------------------------------------------------------------------

## Input Parameters
# data file path (csv)
csvpath <- input[[1]]
# list of variables
listOfVariables <- input[[2]]
# output json path
outpath <- input[[3]]
# libary path (R library path embedded in MDE)
libPath <- input[[4]]
# directory to load functions
workingDirectory <- input[[5]]
# category maximum limit
catgryMaxLimit <- input[[6]]


# Set working directory
setwd(workingDirectory);

# Load functions
source("fn.common.utilities.R")

# load packages
load.packages(c('haven', 'jsonlite', 'readr', 'plyr'), libPath)

# flatten the jsonData (valRange and valFormat objects are the only ones flattend)
listOfVariables <- fromJSON(listOfVariables)

# read matching variables from CSV
matchingDF <- read.matchingVariables(listOfVariables, csvpath)

# updated variable list
updatedVariableList <- lapply(listOfVariables, function(varName){  
  
  freqTable <- na.omit(count(matchingDF, varName))
  colnames(freqTable) <- c("Value","freq")
  
  # if number of distinct values less than the limit(1500) then create categories
  if(nrow(freqTable) < catgryMaxLimit){
    catgry <- lapply(freqTable$Value, function(value){
      freq <- freqTable[freqTable$Value==value, 'freq']
      list(catValu=value,labl=value,labelled=TRUE,catStat=list(type="freq",text=freq))
    })
  } else {
    catgry <- NA
  }
  
  list(name=varName, catgry=catgry)  
})


# return number of records
varJson <- toJSON(updatedVariableList,pretty=TRUE,force=TRUE,digits=22)

# write output to file, return may not work with large number of categories
# convert json to "UTF-8" format to avoid unicode issues.
write(encode.UTF(varJson), outpath)
