### variable.repoStats.R
###------------------------------------------------------------------------------------------
### What: script to calculate frequencies of variable categories when apply categories from variable repo
### Time-stamp: <2018-11-15 11:21:27 assyst>
###-------------------------------------------------------------------------------------------
csvpath <- input[[1]]
varRepoInput <- input[[2]]
libPath <- input[[3]]
workingDirectory <- input[[4]]

# load custom functions
setwd(workingDirectory)
source("fn.calculate.varStats.R")
source("fn.common.utilities.R")

# load packages
load.packages(c('readr', 'plyr', 'jsonlite'), libPath)

set.configurations();

# get input variables & categories
inputJson <- fromJSON(varRepoInput)
listOfVariables <- inputJson$variables
catList <- inputJson$catgry
catgryDF <- as.data.frame(catList)

# read matching variables from CSV
matchingDF <- read.matchingVariables(listOfVariables, csvpath)

repoStats <- lapply(listOfVariables, function(varName){
  catgry <- calculate.catFrequency(matchingDF, catgryDF, varName)
  list(name=varName, catgry=catgry)
})

# return number of records
statsJson <- toJSON(repoStats,pretty=TRUE,force=TRUE,digits=22)

# write output to file, return may not work with large number of categories
# convert json to "UTF-8" format to avoid unicode issues.
write(encode.UTF(statsJson), varRepoInput)






