### import.data.R
###------------------------------------------------------------------------------------------
### What: script to import data set file and export to csv file
### Time-stamp: <2017-06-26 11:21:27 assyst>
###-------------------------------------------------------------------------------------------

## Input Parameters
# dataset file path 
fileName <- input[[1]]
# dataset type STATA/SPSS/CSV
type <- input[[2]]
# output file path (csv)
csvFile <- input[[3]]
# libary path (R library path embedded in MDE)
libPath <- input[[4]]
# expand memory limit to handle large files
memoryLimit <- input[[5]]

# load packages, if lib.location is given then load from the given location else from the default loc.
# R is not embedded in the MAC version of MDE, load packages from the default location in MAC. 
if(libPath == "MAC") {
  libPath <- NULL
  Sys.setlocale(category = "LC_ALL", locale = "UTF-8")
}
if (is.null(libPath) || libPath == '') {
  library(haven)
  library(stringr)
  library(readr)
} else {
  .libPaths(libPath)
  library(haven, lib.loc=libPath)
  library(stringr, lib.loc=libPath)
  library(readr, lib.loc=libPath)
}

# set memory limit (to read large file)
if(!is.null(memoryLimit)) {
  gc()
  memory.limit(size=as.numeric(memoryLimit))
}

# read dataset files
if (toupper(type) == 'DTA') {
  DF_DATA <- read_dta(fileName)
  #attr(DF_DATA[['indid']],"format.stata")
} else if (toupper(type) == 'SAV') {
  DF_DATA <- read_spss(fileName)
  #attr(DF_DATA[['indid']],"format.spss")
} else if (toupper(type) == 'SAS7BDAT') {
  DF_DATA <- read_sas(fileName)
  #attr(DF_DATA[['indid']],"format.sas")
}  else if (toupper(type) == 'CSV') {
  DF_DATA <- read.csv(fileName, stringsAsFactors = TRUE)
  DF_DATA[ is.na(DF_DATA) ] <- NA

  # get col names as variables
   variables <- colnames(DF_DATA)

  # set factor levels instead of label
  for(j in 1:length(variables)){

    varName <- variables[j]

    # If factor, set levels as labels, otherwise labels will be exported to CSV
    # Convert type as numeric
    if(is.factor(DF_DATA[[varName]])){
      labels <- as.factor(levels(DF_DATA[[varName]]))
      levels <- as.factor(labels(labels))
      DF_DATA[[varName]] <- factor(DF_DATA[[varName]], labels = levels)
      DF_DATA[[varName]] <- as.numeric( DF_DATA[[varName]] )
    }
  }
}

# Replace empty string with NA
DF_DATA[sapply(DF_DATA, is.character)] <- lapply(DF_DATA[sapply(DF_DATA, is.character)], 
                                       function(x) zap_empty(x))
                                       
DF_DATA[ is.na(DF_DATA) ] <- NA    #missing values replaced with NA

# To resolve the unicode issues, used readr package instead of haven to write the csv file.
write_csv(DF_DATA, csvFile, na = "*", append = FALSE)

return(0)