### export.data.R
###------------------------------------------------------------------------------------------
### What: script to export dataset to STATA/SPSS/CSV format
### Time-stamp: <2017-06-26 11:21:27 assyst>
###-------------------------------------------------------------------------------------------

## Input Parameters
# data file path (csv)
csvpath <- input[[1]]
# variable details (json)
jsonpath <- input[[2]]
# filepath of output file
outpath <- input[[3]]
# type dta/sav/csv
type <- input[[4]]
# STATA version number
versionNo <- input[[5]]
# libary path (R library path embedded in MDE)
libPath <- input[[6]]
# expand memory limit to handle large files
memoryLimit <- input[[7]]
# directory to load functions
workingDirectory <- input[[8]]

# csvpath <- 'D:/MetadataEditorSource/MetadataEditor/MetadataEditor.Import/test_data/input/csv/F1.csv'
# jsonpath <- 'D:/MetadataEditorSource/MetadataEditor/MetadataEditor.Import/test_data/input/final-json/F1.json'
# json format
# [
#   {{"name":["uqnr"],"internalName":["uqnr"],"labl":["Unique number"],"val":[null],"dcml":[0],"width":["18"],"type":["character"],"intrvl":["contin"]}}
#   {"name":["personnr"],"internalName":["personnr"],"labl":["Person number"],"val":[{"catValu":["01"],"labl":[""]},{"catValu":["02"],"labl":[""]},{"catValu":["03"],"labl":[""]},{"catValu":["07"],"labl":[""]}],"dcml":[0],"width":["2"],"type":["character"],"intrvl":["discrete"]}
# ]

# outpath <- D:/MetadataEditorSource/MetadataEditor/MetadataEditor.Import/test_data/output/dataset/Arabic.dta
# type <- 'DTA'
# versionNo <- 8
# libPath <- NULL
# memoryLimit <- NULL

# load packages, if lib.location is given then load from the given location else from the default loc.
# R is not embedded in the MAC version of MDE, load packages from the default location in MAC. 
if(libPath == "MAC") {
  libPath <- NULL
  Sys.setlocale(category = "LC_ALL", locale = "UTF-8")
}

if (is.null(libPath) || libPath == '') {
  library(haven)
  library(jsonlite)
  library(labelled)
  library(stringi)
  library(readr)
} else {
  .libPaths(libPath)
  library(haven, lib.loc=libPath)
  library(jsonlite, lib.loc=libPath)
  library(labelled, lib.loc=libPath)
  library(stringi, lib.loc=libPath)  
  library(readr, lib.loc=libPath)
}

if(!is.null(memoryLimit)) {
  gc()
  memory.limit(size=as.numeric(memoryLimit))
}

# Set working directory
setwd(workingDirectory);
# Load functions
source("fn.common.utilities.R")

# read variable details from the json file
jsonData <- fromJSON(jsonpath)
# flatten the jsonData (valRange and valFormat objects are the only ones flattend)
flattenData <- flatten(jsonData, recursive = TRUE)

# read data file
DF_DATA <- read.datafile(flattenData, file=csvpath)

# loop through the list of variables and assign label & value label to the dataframe
for(j in 1:nrow(flattenData))
{
  # variable name
  variable.name <- flattenData$name[[j]]

  # if variable name is changed, then update the column name
  if(variable.name != flattenData$internalName[[j]]){
    column.index <- which( colnames(DF_DATA)==flattenData$internalName[[j]])
    colnames(DF_DATA)[column.index] <- variable.name
  }

  # variable type
  variable.type <- flattenData$type[[j]]

  # If new variable add new variable to data as NA
  if(is.null(DF_DATA[[variable.name]])){
    DF_DATA[[variable.name]] <- NA
  }
  # for csv format
  if (toupper(type) == 'CSV') {
     # assign value label
    cat_values <- flattenData$val[[j]]
    if(!is.na(cat_values) && length(cat_values) > 0){
      # category label
      cat_labl <- cat_values[["labl"]]

      # category value
      cat_val <- cat_values[["catValu"]]

       for(i in 1:length(cat_val)){
          # set value as label if label is not empty
          if(is.null(cat_labl[[i]]) || is.na(cat_labl[[i]]) || cat_labl[[i]] == ""){
            cat_labl[[i]] <- cat_val[[i]]
          } else {
            # if value contains comma, then value should be enclosed in double quotes,
            # Otherwise it will be considered as separator
            sepr <- ","
            if(grepl(sepr, cat_labl[[i]])){
              cat_labl[[i]] <- paste("\"", cat_labl[[i]], "\"" )
            }
          }
       }
      DF_DATA[[variable.name]] <- factor(DF_DATA[[variable.name]], labels = cat_labl, levels =  cat_val)
    }
  } else {

    # assign value label
    cat_values <- flattenData$val[[j]]
    if(!is.na(cat_values) && length(cat_values) > 0){

      # category label
      cat_labl <- cat_values[["labl"]]

      # category value
      cat_val <- cat_values[["catValu"]]

      # variable levels and labels
      for(i in 1:length(cat_val)){
          # set value label if label is not empty
          if(!is.null(cat_labl[[i]]) && !is.na(cat_labl[[i]]) && cat_labl[[i]] != ""){
              # trim label string, since dta doesn't support label of length greater than 30
              str_label <- cat_labl[[i]]
              #if(toupper(type) == 'DTA'){
                  #if(nchar(str_label) > 30) {
                      #str_label <- strtrim(str_label,30)
                  #}
              #}
              # Convert category value to the variable type
              # to resolve `x` and `labels` must be same type
              value <- cat_val[[i]]
              if(!is.na(variable.type)){
                class(value) <- variable.type
              }
              # set value label using labelled packge. This is closer to orginal .DTA
              val_label(DF_DATA[[variable.name]], value) <- str_label
          }
      }
    }

    # set format for numeric type
    if (variable.type == "numeric"){

      # variable width
      variable.width <- 16

      if(!is.na(flattenData$width[[j]])){
        variable.width <- flattenData$width[[j]]
      }

      # variable decimal
      variable.dcml <- 0
      if(!is.na(flattenData$dcml[[j]])){
        variable.dcml <- flattenData$dcml[[j]]
      }

      # set format
      # (Link to stata format: http://www.stata.com/manuals13/dformat.pdf )
      # (Link to spss format: https://www.spss-tutorials.com/spss-variable-types-and-formats/)
      if(toupper(type) == 'DTA'){
          # variable format
          variable.format <- paste("%", variable.width, ".", variable.dcml,  "g", sep = "")
          attr(DF_DATA[[variable.name]],"format.stata") <- variable.format
      } else if(toupper(type) == 'SAV'){
          # variable format
          variable.format <- paste("F", variable.width, ".", variable.dcml, sep = "")
          attr(DF_DATA[[variable.name]],"format.spss") <- variable.format
      }

    } else if(variable.type == "character"){
        # variable width
        variable.width <- 9

        if(!is.na(flattenData$width[[j]])){
          variable.width <- flattenData$width[[j]]
      }

      # encode to UTF-8 to avoid unicode issues
      # Ref: https://stackoverflow.com/questions/23699271/force-character-vector-encoding-from-unknown-to-utf-8-in-r
      if(all(stri_enc_isutf8(DF_DATA[[variable.name]]), na.rm = TRUE)){
        DF_DATA[[variable.name]] <- stri_encode(DF_DATA[[variable.name]], "", "UTF-8")
      }        

        #DF_DATA[[variable.name]] <- iconv(DF_DATA[[variable.name]],  "latin1", "UTF-8")
        if(toupper(type) == 'DTA'){
          # variable format
          variable.format <- paste("%", variable.width, "s", sep = "")
          attr(DF_DATA[[variable.name]],"format.stata") <- variable.format
      } else if(toupper(type) == 'SAV'){
          # variable format
          variable.format <- paste("A", variable.width, sep = "")
          attr(DF_DATA[[variable.name]],"format.stata") <- variable.format
      }

    }

    # variable label (Must be set after Value Label is set)
    variable.label <- flattenData$labl[[j]]

    # unlist label if variable label is list
    if(typeof(variable.label) == "list" ){
      variable.label <- unlist(variable.label)
    }
    # assign var label
   if(!is.null(variable.label) && !is.na(variable.label) && variable.label != ""){
    if (is.numeric(variable.label)) {
      cVariable.Label <- as.character(variable.label)   
    }
    else {
      cVariable.Label <- variable.label
    }
    
    if (length(cVariable.Label) > 1) {
      cVariable.Label <- paste(cVariable.Label,sep=",",collapse="")
    }
    var_label(DF_DATA[[variable.name]]) <- cVariable.Label
  }

  }

}

# re sequence variables based on the order
columnNames <- as.character(unlist(flattenData$name))

# added condition to resolve the error when exports datasets with single variable.
if (typeof(DF_DATA) != "list") {
  DF_DATA <- DF_DATA[, columnNames]
}
#View(DF_DATA)

if (toupper(type) == 'DTA') {
  # Write STATA- .dta file
  write_dta(DF_DATA, outpath, version= versionNo)
} else if (toupper(type) == 'SAV') {
  # Write SPSS- .sav file
  write_sav(DF_DATA, outpath)
} else if (toupper(type) == 'CSV'){
  # Write CSV file.
  write_csv(DF_DATA, outpath, na = "", append = FALSE) 
}

 return(0)

