### fn.common.utilities.R
###------------------------------------------------------------------------------------------
### What: common utilities functions used across the MDE scripts
###     : 
###     : read.datafile fn: find column classes from variable metadata and read CSV/TSV file 
###     : 
###     : convert numeric fn: convert numeric strings to numeric values #
###     : 
### Time-stamp: <2018-7-20 11:08:27 panapps>
###-------------------------------------------------------------------------------------------

read.datafile <- function(variables,
                      file="",
                      type="csv",
                      digits="",
                   ...)
{
    csvHeader <- c()
    if(type == "tsv"){
        # Read tsv header to pick the column names
        csvHeader <- as.character(read_tsv(file=file, n_max = 1, col_types = cols(.default = "c"), col_names = FALSE))
    } else {
        # Read csv header to pick the column names
        csvHeader <- as.character(read_csv(file=file, n_max = 1, col_types = cols(.default = "c"), col_names = FALSE))
    }
    
    # Column classes required while reading CSV or Table.
    # If column classes not provided, then R guess the type of column and lead to issues like leading zero missing
    columnClasses <-lapply(csvHeader, function(x) {
        if(x %in% variables$internalName){
            variable.info <- variables[variables$internalName == x,]
            varType <- ifelse(is.null(variable.info$type), "character", as.character(variable.info$type))
            # For numeric variables exact data type should be provided, Otherwise precision may lost 
            dataType <- ifelse(is.null(variable.info$varType), "character", as.character(variable.info$varType))
            c(switch( varType, "character" = "c", "numeric" = switch(dataType, "double" = "d", "integer" = "i", "number" = "n", "d"), "date" = "D",  "?"))
        } else{
            # defualt will be ?, default data type
            c("?")
        }        
    })

    # set options digits to handle larger decmal values, otherwise decimal will be trimmed off
    if(digits != ""){
        option.digits <- getOption("digits")
        on.exit( options("digits"=option.digits))
        options("digits"=as.numeric(digits))
    }    

    # Read CSV with the options given below
    # header=TRUE               - header should be included
    # colClasses=columnClasses  - data type of each column
    # sep=","                   - delemeter comma
    # na="*"                    - na replaced with *
    DF_DATA <- NA
    if(type == "tsv"){
        DF_DATA <- read_tsv(file, col_names = TRUE, col_types =columnClasses, na = c("", "*"), quoted_na = TRUE)
    } else {
        DF_DATA <- read_csv(file, col_names = TRUE, col_types =columnClasses, na = c("", "*"), quoted_na = TRUE)
    }

    # Get column names of DF, convert tibble to dataframe, assign the column names
    # Resolve the issue of spanish datasets, column names were converted to unicode chars while converting to df
    colnames <- colnames(DF_DATA)
    DF_DATA <- data.frame(DF_DATA)
    colnames(DF_DATA) <- colnames

    return(DF_DATA)
}

read.matchingVariables <- function(variables,
                            file="",
                            default="?",
                            ...)
{
    # function read csv file, get matching columns as dataframe 
    # if any new variable then set NA
    # return the matching fatadataframe

    # read CSV file and find matching variables
    csvHeader <- as.character(read_csv(file=file, n_max = 1, col_types = cols(.default = "c"), col_names = FALSE))
    matchingVariables <- intersect(variables, csvHeader)

    # Read CSV and create DF for matching variables
    DF_DATA <- suppressWarnings(suppressMessages(read_csv(file, col_types = cols(.default = default), col_names = TRUE, na = c("", "*"), quoted_na = TRUE)))
    matchingDF <- DF_DATA [,matchingVariables]

    # dataframe subset returns list if only one variable is selected, convert to dataframe if so
    if (!is.data.frame(matchingDF)) {
    matchingDF <- data.frame(matchingDF, stringsAsFactors=FALSE)
    colnames(matchingDF) <- matchingVariables
    }

    # find the new variables, not exists in CSV and set NA
    newVariables <- setdiff(variables, matchingVariables)
    matchingDF[newVariables] <- NA

    return(matchingDF)

}

load.packages <- function(packages,
                    libPath,
                ...)
{
    # function to load packages
    # if lib.location is given then load from the given location else from the default loc.
    # R is not embedded in the MAC version of MDE, load packages from the default location in MAC. 
    platform <- Sys.info()[["sysname"]]

    if(platform == 'Darwin' || is.null(libPath) || libPath == ''){
        lapply(packages, library, character.only = TRUE)
    } else {
        .libPaths(libPath)
        lapply(packages, library, lib.loc=libPath, character.only = TRUE)
    }    

}

set.configurations <- function(memoryLimit=NULL,
                        lcl='',
                        ...)
{
    # set locale = UTF-8 to resolve the unicode issues..
    platform <- Sys.info()[["sysname"]]

    if(platform == "Darwin") {
        Sys.setlocale(category = "LC_ALL", locale = "UTF-8")
    } else {
        if (!(is.na(lcl) || lcl == '')){
            Sys.setlocale("LC_CTYPE", locale=lcl) 
        }
    }

    # expand memory limit to handle large files
    if(!is.null(memoryLimit)) {
        gc()
        memory.limit(size=as.numeric(memoryLimit))
    }
}


reset.locale <- function(lcl,
                    ...)
{
    # method to rest locale
    platform <- Sys.info()[["sysname"]]
    if(platform != "Darwin") {
        if (!(is.na(lcl) || lcl == '')){
            l <- "English_United States.1252"
            Sys.setlocale("LC_CTYPE", l) 
            Sys.setlocale("LC_COLLATE", l)
            Sys.setlocale("LC_CTYPE", l)
            Sys.setlocale("LC_MONETARY", l)
            Sys.setlocale("LC_TIME", l)
        }
    }
}

encode.UTF <- function(varjson,
                ...)
{
    # function to encode json to UTF8 format to handle unicode issues while writing the json file

    # check the platform, UTF encoding is not required for MAC
    platform <- Sys.info()[["sysname"]]

    if(platform != 'Darwin') {
        varjson <- iconv(varjson, to="UTF-8")
    }

    return (varjson)
}

convert.numeric <- function(variable,
                        libPath = '',
                      ...)
{
    ## --- Setup ---    
    maxlength <- format.info(variable)

    if(maxlength >= 15){
        if(!("gmp" %in% (.packages()))){
            if(is.null(libPath) || libPath == ''){
                suppressWarnings(suppressMessages(library(gmp)))
            } else {
                suppressWarnings(suppressMessages(library(gmp, lib.loc=libPath)))
            }            
        }
        return (as.bigz(variable))
    } else {
        return (as.numeric(variable))
    }
}

