# This scanner uses the recursive descent method.
#
# The char pointers token_str and scan_str are pointers to the input string as
# in the following example.
#
#  | g | a | m | m | a |   | a | l | p | h | a |
#    ^                   ^
#    token_str           scan_str
#
# The char pointer token_buf points to a malloc buffer.
#
#  | g | a | m | m | a | \0 |
#    ^
#    token_buf
#
# In the sequence of method invocations for scanning,
# first we do the calls for scanning the operands
# of the operators of least precedence.
# So, since precedence in maths goes something like
# (form high to low) exponents, mult/div, plus/minus
# so we scan first for terms, then factors, then powers.
# That's the general idea, but of course we also have to deal
# with things like parens, non-commutative
# dot (or inner) product, assignments and tests,
# function calls etc.
# Note that a^1/2 is, correctly, a/2, not, incorrectly, sqrt(a),
# see comment in related test in power.coffee for more about this.

#  Notes:
#
#  Formerly add() and multiply() were used to construct expressions but
#  this preevaluation caused problems.
#
#  For example, suppose A has the floating point value inf.
#
#  Before, the expression A/A resulted in 1 because the scanner would
#  divide the symbols.
#
#  After removing add() and multiply(), A/A results in nan which is the
#  correct result.
#
#  The functions negate() and inverse() are used but they do not cause
#  problems with preevaluation of symbols.


T_INTEGER = 1001
T_DOUBLE = 1002
T_SYMBOL = 1003
T_FUNCTION = 1004
T_NEWLINE = 1006
T_STRING = 1007
T_GTEQ = 1008
T_LTEQ = 1009
T_EQ = 1010
T_NEQ = 1011
T_QUOTASSIGN = 1012

token = ""
newline_flag = 0
meta_mode = 0

input_str = 0
scan_str = 0
token_str = 0
token_buf = 0

lastFoundSymbol = null
symbolsRightOfAssignment = null
symbolsLeftOfAssignment = null
isSymbolLeftOfAssignment = null
scanningParameters = null
functionInvokationsScanningStack = null
skipRootVariableToBeSolved = false
assignmentFound = null


# Returns number of chars scanned and expr on stack.

# Returns zero when nothing left to scan.

# takes a string

scanned = ""
scan = (s) ->
  if DEBUG then console.log "#### scanning " + s
  #if s=="y=x"
  #  debugger
  #if s=="y"
  #  debugger
  #if s=="i=sqrt(-1)"
  #  debugger

  lastFoundSymbol = null
  symbolsRightOfAssignment = []
  symbolsLeftOfAssignment = []
  isSymbolLeftOfAssignment = true
  scanningParameters = []
  functionInvokationsScanningStack = [""]
  assignmentFound = false


  scanned = s
  meta_mode = 0
  expanding++
  input_str = 0
  scan_str = 0
  get_next_token()
  if (token == "")
    push(symbol(NIL))
    expanding--
    return 0
  scan_stmt()
  expanding--

  if !assignmentFound
    symbolsInExpressionsWithoutAssignments = symbolsInExpressionsWithoutAssignments.concat symbolsLeftOfAssignment

  return token_str - input_str

# takes a string
scan_meta = (s) ->
  scanned = s
  meta_mode = 1
  expanding++
  input_str = 0
  scan_str = 0
  get_next_token()
  if (token == "")
    push(symbol(NIL))
    expanding--
    return 0
  scan_stmt()
  expanding--
  return token_str - input_str

scan_stmt = ->
  scan_relation()

  assignmentIsOfQuotedType = false

  if token == T_QUOTASSIGN
    assignmentIsOfQuotedType = true

  if (token == T_QUOTASSIGN or token == '=')
    symbolLeftOfAssignment = lastFoundSymbol
    if DEBUG then console.log("assignment!")
    assignmentFound = true
    isSymbolLeftOfAssignment = false

    get_next_token()
    push_symbol(SETQ)
    swap()

    # if it's a := then add a quote
    if (assignmentIsOfQuotedType)
      push_symbol(QUOTE)

    scan_relation()

    # if it's a := then you have to list
    # together the quote and its argument
    if assignmentIsOfQuotedType
      list(2)

    list(3)

    isSymbolLeftOfAssignment = true

    if codeGen

      # in case of re-assignment, the symbol on the
      # left will also be in the set of the symbols
      # on the right. In that case just remove it from
      # the symbols on the right.
      indexOfSymbolLeftOfAssignment = symbolsRightOfAssignment.indexOf(symbolLeftOfAssignment)
      if indexOfSymbolLeftOfAssignment != -1
        symbolsRightOfAssignment.splice(indexOfSymbolLeftOfAssignment, 1)
        symbolsHavingReassignments.push symbolLeftOfAssignment
      
      # print out the immediate dependencies
      if DEBUG
        console.log "locally, " + symbolLeftOfAssignment + " depends on: "
        for i in symbolsRightOfAssignment
          console.log "  " + i

      # ok add the local dependencies to the existing
      # dependencies of this left-value symbol
      
      # create the exiting dependencies list if it doesn't exist
      symbolsDependencies[symbolLeftOfAssignment] ?= []
      existingDependencies = symbolsDependencies[symbolLeftOfAssignment]

      # copy over the new dependencies to the existing
      # dependencies avoiding repetitions
      for i in symbolsRightOfAssignment
        if existingDependencies.indexOf(i) == -1
          existingDependencies.push i

      symbolsRightOfAssignment = []

scan_relation = ->
  scan_expression()
  switch (token)
    when T_EQ
      push_symbol(TESTEQ)
      swap()
      get_next_token()
      scan_expression()
      list(3)
    when T_NEQ
      push_symbol(NOT)
      swap()
      push_symbol(TESTEQ)
      swap()
      get_next_token()
      scan_expression()
      list(3)
      list(2)
    when T_LTEQ
      push_symbol(TESTLE)
      swap()
      get_next_token()
      scan_expression()
      list(3)
    when T_GTEQ
      push_symbol(TESTGE)
      swap()
      get_next_token()
      scan_expression()
      list(3)
    when '<'
      push_symbol(TESTLT)
      swap()
      get_next_token()
      scan_expression()
      list(3)
    when '>'
      push_symbol(TESTGT)
      swap()
      get_next_token()
      scan_expression()
      list(3)

scan_expression = ->
  h = tos
  switch token
    when '+'
      get_next_token()
      scan_term()
    when '-'
      get_next_token()
      scan_term()
      negate()
    else
      scan_term()

  while (newline_flag == 0 && (token == '+' || token == '-'))
    if (token == '+')
      get_next_token()
      scan_term()
    else
      get_next_token()
      scan_term()
      negate()

  if (tos - h > 1)
    list(tos - h)
    push_symbol(ADD)
    swap()
    cons()

is_factor = ->

  if token.charCodeAt?(0) == dotprod_unicode
    return 1

  switch (token)
    when '*', '/'
      return 1
    when '(', T_SYMBOL, T_FUNCTION, T_INTEGER, T_DOUBLE, T_STRING
      if (newline_flag) # implicit mul can't cross line
        scan_str = token_str  # better error display
        return 0
      else
        return 1
  return 0


simplify_1_in_products = (tos,h) ->
  if (tos > h && isrational(stack[tos - 1]) && equaln(stack[tos - 1], 1))
    pop()

# calculate away consecutive constants
multiply_consecutive_constants = (tos,h)->
  if (tos > h + 1 && isNumericAtom(stack[tos - 2]) && isNumericAtom(stack[tos - 1]))
    multiply()


scan_term = ->
  h = tos

  scan_factor()

  if parse_time_simplifications
    simplify_1_in_products(tos,h)

  while (is_factor())
    if (token == '*')
      get_next_token()
      scan_factor()
    else if (token == '/')
      # in case of 1/... then
      # we scanned the 1, we get rid
      # of it because otherwise it becomes
      # an extra factor that wasn't there and
      # things like
      # 1/(2*a) become 1*(1/(2*a))
      simplify_1_in_products(tos,h)
      get_next_token()
      scan_factor()
      inverse()
    else if (token.charCodeAt?(0) == dotprod_unicode)
      get_next_token()
      push_symbol(INNER)
      swap()
      scan_factor()
      list(3)

    else
      scan_factor()

    if parse_time_simplifications
      multiply_consecutive_constants(tos,h)
      simplify_1_in_products(tos,h)

  if (h == tos)
    push_integer(1)
  else if (tos - h > 1)
    list(tos - h)
    push_symbol(MULTIPLY)
    swap()
    cons()

scan_power = ->
  if (token == '^')
    get_next_token()
    push_symbol(POWER)
    swap()
    scan_factor()
    list(3)

scan_index = (h) ->
  #console.log "[ as index"
  get_next_token()
  push_symbol(INDEX)
  swap()
  scan_expression()
  while (token == ',')
    get_next_token()
    scan_expression()
  if (token != ']')
    scan_error("] expected")
  get_next_token()
  list(tos - h)


scan_factor = ->

  h = tos

  #console.log "scan_factor token: " + token

  firstFactorIsNumber = false

  if (token == '(')
    scan_subexpr()
  else if (token == T_SYMBOL)
    scan_symbol()
  else if (token == T_FUNCTION)
    scan_function_call_with_function_name()
  else if token == '['
    #console.log "[ as tensor"
    #debugger
    scan_tensor()
  else if (token == T_INTEGER)
    firstFactorIsNumber = true
    bignum_scan_integer(token_buf)
    get_next_token()
  else if (token == T_DOUBLE)
    firstFactorIsNumber = true
    bignum_scan_float(token_buf)
    get_next_token()
  else if (token == T_STRING)
    scan_string()
  else
    scan_error("syntax error")


  # after the main initial part of the factor that
  # we just scanned above,
  # we can get an arbitrary about of appendages
  # of the form ...[...](...)...
  # If the main part is not a number, then these are all, respectively,
  #  - index references (as opposed to tensor definition) and
  #  - function calls without an explicit function name
  #    (instead of subexpressions or parameters of function
  #    definitions or function calls with an explicit function
  #    name), respectively
  while token == '[' or token == '(' and newline_flag == 0 and !firstFactorIsNumber
    if token == '['
      scan_index(h)
    else if token == '('
      #console.log "( as function call without function name "
      scan_function_call_without_function_name()


  while (token == '!')
    get_next_token()
    push_symbol(FACTORIAL)
    swap()
    list(2)

  # in theory we could already count the
  # number of transposes and simplify them
  # away, but it's not that clean to have
  # multiple places where that happens, and
  # the parser is not the place.
  while (token.charCodeAt?(0) == transpose_unicode)
    get_next_token()
    push_symbol(TRANSPOSE)
    swap()
    list(2)

  scan_power()


addSymbolRightOfAssignment = (theSymbol) ->
  if predefinedSymbolsInGlobalScope_doNotTrackInDependencies.indexOf(theSymbol) == -1 and
    symbolsRightOfAssignment.indexOf(theSymbol) == -1 and
    symbolsRightOfAssignment.indexOf("'"+theSymbol) == -1 and
    !skipRootVariableToBeSolved
      if DEBUG then console.log("... adding symbol: " + theSymbol + " to the set of the symbols right of assignment")
      prefixVar = ""
      for i in [1...functionInvokationsScanningStack.length]
        if functionInvokationsScanningStack[i] != ""
          prefixVar += functionInvokationsScanningStack[i] + "_" + i + "_"

      theSymbol = prefixVar + theSymbol
      symbolsRightOfAssignment.push theSymbol

addSymbolLeftOfAssignment = (theSymbol) ->
  if predefinedSymbolsInGlobalScope_doNotTrackInDependencies.indexOf(theSymbol) == -1 and
    symbolsLeftOfAssignment.indexOf(theSymbol) == -1 and
    symbolsLeftOfAssignment.indexOf("'"+theSymbol) == -1 and
    !skipRootVariableToBeSolved
      if DEBUG then console.log("... adding symbol: " + theSymbol + " to the set of the symbols left of assignment")
      prefixVar = ""
      for i in [1...functionInvokationsScanningStack.length]
        if functionInvokationsScanningStack[i] != ""
          prefixVar += functionInvokationsScanningStack[i] + "_" + i + "_"

      theSymbol = prefixVar + theSymbol
      symbolsLeftOfAssignment.push theSymbol

scan_symbol = ->
  if (token != T_SYMBOL)
    scan_error("symbol expected")
  if (meta_mode && token_buf.length == 1)
    switch (token_buf[0])
      when 'a'
        push(symbol(METAA))
      when 'b'
        push(symbol(METAB))
      when 'x'
        push(symbol(METAX))
      else
        push(usr_symbol(token_buf))
  else
    push(usr_symbol(token_buf))
  #console.log "found symbol: " + token_buf

  if scanningParameters.length == 0
    if DEBUG then console.log "out of scanning parameters, processing " + token_buf
    lastFoundSymbol = token_buf
    if isSymbolLeftOfAssignment
      addSymbolLeftOfAssignment token_buf
  else
    if DEBUG then console.log "still scanning parameters, skipping " + token_buf
    if isSymbolLeftOfAssignment
      addSymbolRightOfAssignment "'" + token_buf

  if DEBUG then console.log("found symbol: " + token_buf + " left of assignment: " + isSymbolLeftOfAssignment)
  
  # if we were looking at the right part of an assignment while we
  # found the symbol, then add it to the "symbolsRightOfAssignment"
  # set (we check for duplications)
  if !isSymbolLeftOfAssignment
    addSymbolRightOfAssignment token_buf
  get_next_token()

scan_string = ->
  push new_string(token_buf)
  get_next_token()

scan_function_call_with_function_name = ->
  if DEBUG then console.log "-- scan_function_call_with_function_name start"
  n = 1 # the parameter number as we scan parameters
  p = new U()
  p = usr_symbol(token_buf)

  push(p)
  functionName = token_buf
  if functionName == "roots" or functionName == "defint" or functionName == "sum" or functionName == "product" or functionName == "for"
    functionInvokationsScanningStack.push token_buf
  lastFoundSymbol = token_buf
  if !isSymbolLeftOfAssignment
    addSymbolRightOfAssignment token_buf

  get_next_token()  # open parens
  get_next_token()  # 1st parameter
  scanningParameters.push true
  if (token != ')')
    scan_stmt()
    n++
    while (token == ',')
      get_next_token()
      # roots' disappearing variable, if there, is the second one
      if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("roots") != -1
        symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("roots_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
        skipRootVariableToBeSolved = true
      # sums' disappearing variable, is alsways the second one
      if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("sum") != -1
        symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("sum_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
        skipRootVariableToBeSolved = true
      # product's disappearing variable, is alsways the second one
      if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("product") != -1
        symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("product_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
        skipRootVariableToBeSolved = true
      # for's disappearing variable, is alsways the second one
      if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("for") != -1
        symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("for_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
        skipRootVariableToBeSolved = true
      # defint's disappearing variables can be in positions 2,5,8...
      if functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("defint") != -1 and
        (n == 2 or (n>2 and ((n-2) % 3 == 0))) 
          symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("defint_" + (functionInvokationsScanningStack.length - 1) + "_" + token_buf)).test(x)
          skipRootVariableToBeSolved = true

      scan_stmt()
      skipRootVariableToBeSolved = false
      n++

    # todo refactor this, there are two copies
    # this catches the case where the "roots" variable is not specified
    if n == 2 and functionInvokationsScanningStack[functionInvokationsScanningStack.length - 1].indexOf("roots") != -1
      symbolsRightOfAssignment = symbolsRightOfAssignment.filter (x) -> !(new RegExp("roots_" + (functionInvokationsScanningStack.length - 1) + "_" + "x")).test(x)

  scanningParameters.pop()

  for i in [0..symbolsRightOfAssignment.length]
    if symbolsRightOfAssignment[i]?
      if functionName == "roots"
        symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("roots_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
      if functionName == "defint"
        symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("defint_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
      if functionName == "sum"
        symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("sum_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
      if functionName == "product"
        symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("product_" + (functionInvokationsScanningStack.length - 1) + "_"),"")
      if functionName == "for"
        symbolsRightOfAssignment[i] = symbolsRightOfAssignment[i].replace(new RegExp("for_" + (functionInvokationsScanningStack.length - 1) + "_"),"")

  if (token != ')')
    scan_error(") expected")

  get_next_token()
  list(n)
  if functionName == "roots" or functionName == "defint" or functionName == "sum" or functionName == "product" or functionName == "for"
    functionInvokationsScanningStack.pop()
  if functionName == symbol(PATTERN).printname
    patternHasBeenFound = true

  if DEBUG then console.log "-- scan_function_call_with_function_name end"

scan_function_call_without_function_name = ->
  if DEBUG then console.log "-- scan_function_call_without_function_name start"

  # the function will have to be looked up
  # at runtime (i.e. we need to evaulate something to find it
  # e.g. it might be inside a tensor, so we'd need to evaluate
  # a tensor element access in that case)
  push_symbol(EVAL)
  swap()
  list(2)

  n = 1 # the parameter number as we scan parameters
  get_next_token()  # left paren
  scanningParameters.push true
  if (token != ')')
    scan_stmt()
    n++
    while (token == ',')
      get_next_token()
      scan_stmt()
      n++

  scanningParameters.pop()


  if (token != ')')
    scan_error(") expected")

  get_next_token()
  list(n)

  if DEBUG then console.log "-- scan_function_call_without_function_name end: " + stack[tos-1]

# scan subexpression

scan_subexpr = ->
  n = 0
  if (token != '(')
    scan_error("( expected")
  get_next_token()
  scan_stmt()
  if (token != ')')
    scan_error(") expected")
  get_next_token()

scan_tensor = ->
  n = 0
  if (token != '[')
    scan_error("[ expected")

  get_next_token()

  #console.log "scanning the next statement"
  scan_stmt()

  n = 1
  while (token == ',')
    get_next_token()
    scan_stmt()
    n++

  #console.log "building tensor with elements number: " + n
  build_tensor(n)

  if (token != ']')
    scan_error("] expected")
  get_next_token()

scan_error = (errmsg) ->
  errorMessage = ""

  # try not to put question mark on orphan line

  while (input_str != scan_str)
    if ((scanned[input_str] == '\n' || scanned[input_str] == '\r') && input_str + 1 == scan_str)
      break
    errorMessage += scanned[input_str++]

  errorMessage += " ? "

  while (scanned[input_str] && (scanned[input_str] != '\n' && scanned[input_str] != '\r'))
    errorMessage +=  scanned[input_str++]

  errorMessage +=  '\n'

  stop(errmsg)

# There are n expressions on the stack, possibly tensors.
#
# This function assembles the stack expressions into a single tensor.
#
# For example, at the top level of the expression ((a,b),(c,d)), the vectors
# (a,b) and (c,d) would be on the stack.

# takes an integer
build_tensor = (n) ->
  # int i, j, k, ndim, nelem

  i = 0

  save()

  p2 = alloc_tensor(n)
  p2.tensor.ndim = 1
  p2.tensor.dim[0] = n
  for i in [0...n]
    p2.tensor.elem[i] = stack[tos-n+i]

  check_tensor_dimensions p2

  moveTos tos - n

  push(p2)

  restore()

get_next_token = ->
  newline_flag = 0
  while (1)
    get_token()
    if (token != T_NEWLINE)
      break
    newline_flag = 1
  if DEBUG then console.log "get_next_token token: " + token
  #if token == ')'
  #  debugger

get_token = ->
  # skip spaces
  while (isspace(scanned[scan_str]))
    if (scanned[scan_str] == '\n' || scanned[scan_str] == '\r')
      token = T_NEWLINE
      scan_str++
      return
    scan_str++

  token_str = scan_str

  # end of string?

  if (scan_str == scanned.length)
    token = ""
    return

  # number?

  if (isdigit(scanned[scan_str]) || scanned[scan_str] == '.')
    while (isdigit(scanned[scan_str]))
      scan_str++
    if (scanned[scan_str] == '.')
      scan_str++
      while (isdigit(scanned[scan_str]))
        scan_str++
      if (scanned[scan_str] == 'e' && (scanned[scan_str+1] == '+' || scanned[scan_str+1] == '-' || isdigit(scanned[scan_str+1])))
        scan_str += 2
        while (isdigit(scanned[scan_str]))
          scan_str++
      token = T_DOUBLE
    else
      token = T_INTEGER
    update_token_buf(token_str, scan_str)
    return

  # symbol?

  if (isalpha(scanned[scan_str]))
    while (isalnumorunderscore(scanned[scan_str]))
      scan_str++
    if (scanned[scan_str] == '(')
      token = T_FUNCTION
    else
      token = T_SYMBOL
    update_token_buf(token_str, scan_str)
    return

  # string ?

  if (scanned[scan_str] == '"')
    scan_str++
    while (scanned[scan_str] != '"')
      #if (scan_str == scanned.length || scanned[scan_str] == '\n' || scanned[scan_str] == '\r')
      if (scan_str == scanned.length - 1)
        scan_str++
        scan_error("runaway string")
        scan_str--
      scan_str++
    scan_str++
    token = T_STRING
    update_token_buf(token_str + 1, scan_str - 1)
    return

  # comment?

  if (scanned[scan_str] == '#' || scanned[scan_str] == '-' && scanned[scan_str+1] == '-')
    while (scanned[scan_str] && scanned[scan_str] != '\n' && scanned[scan_str] != '\r')
      scan_str++
    if (scanned[scan_str])
      scan_str++
    token = T_NEWLINE
    return

  # quote-assignment
  if (scanned[scan_str] == ':' && scanned[scan_str+1] == '=')
    scan_str += 2
    token = T_QUOTASSIGN
    return

  # relational operator?
  if (scanned[scan_str] == '=' && scanned[scan_str+1] == '=')
    scan_str += 2
    token = T_EQ
    return

  # != operator. It's a little odd because
  # "!" is not a "not", which would make things consistent.
  # (it's used for factorial).
  # An alternative would be to use "<>" but it's not used
  # a lot in other languages...
  if (scanned[scan_str] == '!' && scanned[scan_str+1] == '=')
    scan_str += 2
    token = T_NEQ
    return

  if (scanned[scan_str] == '<' && scanned[scan_str+1] == '=')
    scan_str += 2
    token = T_LTEQ
    return

  if (scanned[scan_str] == '>' && scanned[scan_str+1] == '=')
    scan_str += 2
    token = T_GTEQ
    return

  # single char token

  token = scanned[scan_str++]

# both strings
update_token_buf = (a,b) ->

  token_buf = scanned.substring(a,b)


$.scan = scan