726 lines
19 KiB
Go
726 lines
19 KiB
Go
|
// Package efp (Excel Formula Parser) tokenize an Excel formula using an
|
||
|
// implementation of E. W. Bachtal's algorithm.
|
||
|
//
|
||
|
// Go language version by Ri Xu: https://xuri.me
|
||
|
package efp
|
||
|
|
||
|
import (
|
||
|
"regexp"
|
||
|
"strconv"
|
||
|
"strings"
|
||
|
)
|
||
|
|
||
|
// QuoteDouble, QuoteSingle and other's constants are token definitions.
|
||
|
const (
|
||
|
// Character constants
|
||
|
QuoteDouble = '"'
|
||
|
QuoteSingle = '\''
|
||
|
BracketClose = ']'
|
||
|
BracketOpen = '['
|
||
|
BraceOpen = '{'
|
||
|
BraceClose = '}'
|
||
|
ParenOpen = '('
|
||
|
ParenClose = ')'
|
||
|
Semicolon = ';'
|
||
|
Whitespace = ' '
|
||
|
Comma = ','
|
||
|
ErrorStart = '#'
|
||
|
|
||
|
OperatorsSN = "+-"
|
||
|
OperatorsInfix = "+-*/^&=><"
|
||
|
OperatorsPostfix = '%'
|
||
|
|
||
|
// Token type
|
||
|
TokenTypeNoop = "Noop"
|
||
|
TokenTypeOperand = "Operand"
|
||
|
TokenTypeFunction = "Function"
|
||
|
TokenTypeSubexpression = "Subexpression"
|
||
|
TokenTypeArgument = "Argument"
|
||
|
TokenTypeOperatorPrefix = "OperatorPrefix"
|
||
|
TokenTypeOperatorInfix = "OperatorInfix"
|
||
|
TokenTypeOperatorPostfix = "OperatorPostfix"
|
||
|
TokenTypeWhitespace = "Whitespace"
|
||
|
TokenTypeUnknown = "Unknown"
|
||
|
|
||
|
// Token subtypes
|
||
|
TokenSubTypeStart = "Start"
|
||
|
TokenSubTypeStop = "Stop"
|
||
|
TokenSubTypeText = "Text"
|
||
|
TokenSubTypeNumber = "Number"
|
||
|
TokenSubTypeLogical = "Logical"
|
||
|
TokenSubTypeError = "Error"
|
||
|
TokenSubTypeRange = "Range"
|
||
|
TokenSubTypeMath = "Math"
|
||
|
TokenSubTypeConcatenation = "Concatenation"
|
||
|
TokenSubTypeIntersection = "Intersection"
|
||
|
TokenSubTypeUnion = "Union"
|
||
|
)
|
||
|
|
||
|
var expRegex = regexp.MustCompile(`^[1-9]{1}(\.[0-9]+)?E{1}$`)
|
||
|
|
||
|
// Token encapsulate a formula token.
|
||
|
type Token struct {
|
||
|
TValue string
|
||
|
TType string
|
||
|
TSubType string
|
||
|
}
|
||
|
|
||
|
// Tokens directly maps the ordered list of tokens.
|
||
|
// Attributes:
|
||
|
//
|
||
|
// items - Ordered list
|
||
|
// index - Current position in the list
|
||
|
type Tokens struct {
|
||
|
Index int
|
||
|
Items []Token
|
||
|
}
|
||
|
|
||
|
// Parser inheritable container. TokenStack directly maps a LIFO stack of
|
||
|
// tokens.
|
||
|
type Parser struct {
|
||
|
Formula string
|
||
|
fRune []rune
|
||
|
Tokens Tokens
|
||
|
TokenStack Tokens
|
||
|
Offset int
|
||
|
InString bool
|
||
|
InPath bool
|
||
|
InRange bool
|
||
|
InError bool
|
||
|
}
|
||
|
|
||
|
// isInComparisonSet matches <=, >=, and <>
|
||
|
func isInComparisonSet(r []rune) bool {
|
||
|
if len(r) < 2 {
|
||
|
return false
|
||
|
}
|
||
|
if r[0] != '>' && r[0] != '<' {
|
||
|
return false
|
||
|
}
|
||
|
return r[1] == '=' || (r[0] == '<' && r[1] == '>')
|
||
|
}
|
||
|
|
||
|
// isInfix matches any of +-*/^&=><
|
||
|
func isInfix(r rune) bool {
|
||
|
return r == '+' || r == '-' || r == '*' || r == '/' || r == '^' || r == '&' || r == '=' || r == '>' || r == '<'
|
||
|
}
|
||
|
|
||
|
// isAnError returns a value that indicates whether the given runes text
|
||
|
// represents a formula error.
|
||
|
func isAnError(r []rune) bool {
|
||
|
return runesEqual(r, []rune("#NULL!")) ||
|
||
|
runesEqual(r, []rune("#DIV/0!")) ||
|
||
|
runesEqual(r, []rune("#VALUE!")) ||
|
||
|
runesEqual(r, []rune("#REF!")) ||
|
||
|
runesEqual(r, []rune("#NAME?")) ||
|
||
|
runesEqual(r, []rune("#NUM!")) ||
|
||
|
runesEqual(r, []rune("#N/A")) ||
|
||
|
runesEqual(r, []rune("#SPILL!")) ||
|
||
|
runesEqual(r, []rune("#CALC!")) ||
|
||
|
runesEqual(r, []rune("#GETTING_DATA"))
|
||
|
}
|
||
|
|
||
|
// runesEqual Returns a value that indicates whether the current runes text and
|
||
|
// a specified runes text are equal.
|
||
|
func runesEqual(a, b []rune) bool {
|
||
|
if len(a) != len(b) {
|
||
|
return false
|
||
|
}
|
||
|
for i, r := range a {
|
||
|
if b[i] != r {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// fToken provides function to encapsulate a formula token.
|
||
|
func fToken(value, tokenType, subType string) Token {
|
||
|
return Token{
|
||
|
TValue: value,
|
||
|
TType: tokenType,
|
||
|
TSubType: subType,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// fTokens provides function to handle an ordered list of tokens.
|
||
|
func fTokens(size, cap int) Tokens {
|
||
|
if size == 0 && cap == 0 {
|
||
|
return Tokens{
|
||
|
Index: -1,
|
||
|
}
|
||
|
}
|
||
|
return Tokens{
|
||
|
Index: -1,
|
||
|
Items: make([]Token, size, cap),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// add provides function to add a token to the end of the list.
|
||
|
func (tk *Tokens) add(value, tokenType, subType string) Token {
|
||
|
token := fToken(value, tokenType, subType)
|
||
|
tk.addRef(token)
|
||
|
return token
|
||
|
}
|
||
|
|
||
|
// addRef provides function to add a token to the end of the list.
|
||
|
func (tk *Tokens) addRef(token Token) {
|
||
|
tk.Items = append(tk.Items, token)
|
||
|
}
|
||
|
|
||
|
// reset provides function to reset the index to -1.
|
||
|
func (tk *Tokens) reset() {
|
||
|
tk.Index = -1
|
||
|
}
|
||
|
|
||
|
// BOF provides function to check whether beginning of list.
|
||
|
func (tk *Tokens) BOF() bool {
|
||
|
return tk.Index <= 0
|
||
|
}
|
||
|
|
||
|
// EOF provides function to check whether end of list.
|
||
|
func (tk *Tokens) EOF() bool {
|
||
|
return tk.Index >= (len(tk.Items) - 1)
|
||
|
}
|
||
|
|
||
|
// moveNext provides function to move the index along one.
|
||
|
func (tk *Tokens) moveNext() bool {
|
||
|
if tk.EOF() {
|
||
|
return false
|
||
|
}
|
||
|
tk.Index++
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// current return the current token.
|
||
|
func (tk *Tokens) current() *Token {
|
||
|
if tk.Index == -1 {
|
||
|
return nil
|
||
|
}
|
||
|
return &tk.Items[tk.Index]
|
||
|
}
|
||
|
|
||
|
// next return the next token (leave the index unchanged).
|
||
|
func (tk *Tokens) next() *Token {
|
||
|
if tk.EOF() {
|
||
|
return nil
|
||
|
}
|
||
|
return &tk.Items[tk.Index+1]
|
||
|
}
|
||
|
|
||
|
// previous return the previous token (leave the index unchanged).
|
||
|
func (tk *Tokens) previous() *Token {
|
||
|
if tk.Index < 1 {
|
||
|
return nil
|
||
|
}
|
||
|
return &tk.Items[tk.Index-1]
|
||
|
}
|
||
|
|
||
|
// push provides function to push a token onto the stack.
|
||
|
func (tk *Tokens) push(token Token) {
|
||
|
tk.Items = append(tk.Items, token)
|
||
|
}
|
||
|
|
||
|
// pop provides function to pop a token off the stack.
|
||
|
func (tk *Tokens) pop() Token {
|
||
|
if len(tk.Items) == 0 {
|
||
|
return Token{
|
||
|
TType: TokenTypeFunction,
|
||
|
TSubType: TokenSubTypeStop,
|
||
|
}
|
||
|
}
|
||
|
t := tk.Items[len(tk.Items)-1]
|
||
|
tk.Items = tk.Items[:len(tk.Items)-1]
|
||
|
return fToken("", t.TType, TokenSubTypeStop)
|
||
|
}
|
||
|
|
||
|
// token provides function to non-destructively return the top item on the
|
||
|
// stack.
|
||
|
func (tk *Tokens) token() *Token {
|
||
|
if len(tk.Items) > 0 {
|
||
|
return &tk.Items[len(tk.Items)-1]
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// value return the top token's value.
|
||
|
func (tk *Tokens) value() string {
|
||
|
if tk.token() == nil {
|
||
|
return ""
|
||
|
}
|
||
|
return tk.token().TValue
|
||
|
}
|
||
|
|
||
|
// tp return the top token's type.
|
||
|
func (tk *Tokens) tp() string {
|
||
|
if tk.token() == nil {
|
||
|
return ""
|
||
|
}
|
||
|
return tk.token().TType
|
||
|
}
|
||
|
|
||
|
// subtype return the top token's subtype.
|
||
|
func (tk *Tokens) subtype() string {
|
||
|
if tk.token() == nil {
|
||
|
return ""
|
||
|
}
|
||
|
return tk.token().TSubType
|
||
|
}
|
||
|
|
||
|
// ExcelParser provides function to parse an Excel formula into a stream of
|
||
|
// tokens.
|
||
|
func ExcelParser() Parser {
|
||
|
return Parser{}
|
||
|
}
|
||
|
|
||
|
// getTokens return a token stream (list).
|
||
|
func (ps *Parser) getTokens() Tokens {
|
||
|
ps.Formula = strings.TrimSpace(ps.Formula)
|
||
|
ps.fRune = []rune(ps.Formula)
|
||
|
if len(ps.fRune) > 0 && ps.fRune[0] != '=' {
|
||
|
ps.Formula = "=" + ps.Formula
|
||
|
ps.fRune = []rune(ps.Formula)
|
||
|
}
|
||
|
|
||
|
var token []rune
|
||
|
|
||
|
// state-dependent character evaluation (order is important)
|
||
|
for !ps.EOF() {
|
||
|
|
||
|
// double-quoted strings
|
||
|
// embeds are doubled
|
||
|
// end marks token
|
||
|
if ps.InString {
|
||
|
if ps.currentChar() == QuoteDouble {
|
||
|
if ps.nextChar() == QuoteDouble {
|
||
|
token = append(token, QuoteDouble)
|
||
|
ps.Offset++
|
||
|
} else {
|
||
|
ps.InString = false
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, TokenSubTypeText)
|
||
|
token = token[:0]
|
||
|
}
|
||
|
} else {
|
||
|
token = append(token, ps.currentChar())
|
||
|
}
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// single-quoted strings (links)
|
||
|
// embeds are double
|
||
|
// end does not mark a token
|
||
|
if ps.InPath {
|
||
|
if ps.currentChar() == QuoteSingle {
|
||
|
if ps.nextChar() == QuoteSingle {
|
||
|
token = append(token, QuoteSingle)
|
||
|
ps.Offset++
|
||
|
} else {
|
||
|
ps.InPath = false
|
||
|
}
|
||
|
} else {
|
||
|
token = append(token, ps.currentChar())
|
||
|
}
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// bracketed strings (range offset or linked workbook name)
|
||
|
// no embeds (changed to "()" by Excel)
|
||
|
// end does not mark a token
|
||
|
if ps.InRange {
|
||
|
if ps.currentChar() == BracketClose {
|
||
|
ps.InRange = false
|
||
|
}
|
||
|
token = append(token, ps.currentChar())
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// error values
|
||
|
// end marks a token, determined from absolute list of values
|
||
|
if ps.InError {
|
||
|
token = append(token, ps.currentChar())
|
||
|
ps.Offset++
|
||
|
|
||
|
if isAnError(token) {
|
||
|
ps.InError = false
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, TokenSubTypeError)
|
||
|
token = token[:0]
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// scientific notation check
|
||
|
if len(token) > 1 && (ps.currentChar() == '+' || ps.currentChar() == '-') {
|
||
|
if expRegex.MatchString(string(token)) {
|
||
|
token = append(token, ps.currentChar())
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// independent character evaluation (order not important)
|
||
|
// establish state-dependent character evaluations
|
||
|
if ps.currentChar() == QuoteDouble {
|
||
|
if len(token) > 0 {
|
||
|
// not expected
|
||
|
ps.Tokens.add(string(token), TokenTypeUnknown, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.InString = true
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if ps.currentChar() == QuoteSingle {
|
||
|
if len(token) > 0 {
|
||
|
// not expected
|
||
|
ps.Tokens.add(string(token), TokenTypeUnknown, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.InPath = true
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if ps.currentChar() == BracketOpen {
|
||
|
ps.InRange = true
|
||
|
token = append(token, ps.currentChar())
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if ps.currentChar() == ErrorStart {
|
||
|
if len(token) > 0 {
|
||
|
// not expected
|
||
|
ps.Tokens.add(string(token), TokenTypeUnknown, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.InError = true
|
||
|
token = append(token, ps.currentChar())
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// mark start and end of arrays and array rows
|
||
|
if ps.currentChar() == BraceOpen {
|
||
|
if len(token) > 0 {
|
||
|
// not expected
|
||
|
ps.Tokens.add(string(token), TokenTypeUnknown, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.TokenStack.push(ps.Tokens.add("ARRAY", TokenTypeFunction, TokenSubTypeStart))
|
||
|
ps.TokenStack.push(ps.Tokens.add("ARRAYROW", TokenTypeFunction, TokenSubTypeStart))
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if ps.currentChar() == Semicolon {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.addRef(ps.TokenStack.pop())
|
||
|
ps.Tokens.add(string(Comma), TokenTypeArgument, "")
|
||
|
ps.TokenStack.push(ps.Tokens.add("ARRAYROW", TokenTypeFunction, TokenSubTypeStart))
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if ps.currentChar() == BraceClose {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.addRef(ps.TokenStack.pop())
|
||
|
ps.Tokens.addRef(ps.TokenStack.pop())
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// trim white-space
|
||
|
if ps.currentChar() == Whitespace {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.add("", TokenTypeWhitespace, "")
|
||
|
ps.Offset++
|
||
|
for (ps.currentChar() == Whitespace) && (!ps.EOF()) {
|
||
|
ps.Offset++
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// multi-character comparators
|
||
|
if isInComparisonSet(ps.doubleChar()) {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.add(string(ps.doubleChar()), TokenTypeOperatorInfix, TokenSubTypeLogical)
|
||
|
ps.Offset += 2
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// standard infix operators
|
||
|
if isInfix(ps.currentChar()) {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.add(string(ps.currentChar()), TokenTypeOperatorInfix, "")
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// standard postfix operators
|
||
|
if ps.currentChar() == OperatorsPostfix {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.add(string(ps.currentChar()), TokenTypeOperatorPostfix, "")
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// start subexpression or function
|
||
|
if ps.currentChar() == ParenOpen {
|
||
|
if len(token) > 0 {
|
||
|
ps.TokenStack.push(ps.Tokens.add(string(token), TokenTypeFunction, TokenSubTypeStart))
|
||
|
token = token[:0]
|
||
|
} else {
|
||
|
ps.TokenStack.push(ps.Tokens.add("", TokenTypeSubexpression, TokenSubTypeStart))
|
||
|
}
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// function, subexpression, array parameters
|
||
|
if ps.currentChar() == Comma {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
if ps.TokenStack.tp() != TokenTypeFunction {
|
||
|
ps.Tokens.add(string(ps.currentChar()), TokenTypeOperatorInfix, TokenSubTypeUnion)
|
||
|
} else {
|
||
|
ps.Tokens.add(string(ps.currentChar()), TokenTypeArgument, "")
|
||
|
}
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// stop subexpression
|
||
|
if ps.currentChar() == ParenClose {
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
token = token[:0]
|
||
|
}
|
||
|
ps.Tokens.addRef(ps.TokenStack.pop())
|
||
|
ps.Offset++
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
// token accumulation
|
||
|
token = append(token, ps.currentChar())
|
||
|
ps.Offset++
|
||
|
}
|
||
|
|
||
|
// dump remaining accumulation
|
||
|
if len(token) > 0 {
|
||
|
ps.Tokens.add(string(token), TokenTypeOperand, "")
|
||
|
}
|
||
|
|
||
|
// move all tokens to a new collection, excluding all unnecessary white-space tokens
|
||
|
tokens2 := fTokens(0, len(ps.Tokens.Items))
|
||
|
|
||
|
for ps.Tokens.moveNext() {
|
||
|
token := ps.Tokens.current()
|
||
|
|
||
|
if token.TType == TokenTypeWhitespace {
|
||
|
if ps.Tokens.BOF() || ps.Tokens.EOF() {
|
||
|
} else if !(((ps.Tokens.previous().TType == TokenTypeFunction) && (ps.Tokens.previous().TSubType == TokenSubTypeStop)) || ((ps.Tokens.previous().TType == TokenTypeSubexpression) && (ps.Tokens.previous().TSubType == TokenSubTypeStop)) || (ps.Tokens.previous().TType == TokenTypeOperand)) {
|
||
|
} else if !(((ps.Tokens.next().TType == TokenTypeFunction) && (ps.Tokens.next().TSubType == TokenSubTypeStart)) || ((ps.Tokens.next().TType == TokenTypeSubexpression) && (ps.Tokens.next().TSubType == TokenSubTypeStart)) || (ps.Tokens.next().TType == TokenTypeOperand)) {
|
||
|
} else {
|
||
|
tokens2.add(token.TValue, TokenTypeOperatorInfix, TokenSubTypeIntersection)
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
tokens2.addRef(Token{
|
||
|
TValue: token.TValue,
|
||
|
TType: token.TType,
|
||
|
TSubType: token.TSubType,
|
||
|
})
|
||
|
}
|
||
|
|
||
|
// switch infix "-" operator to prefix when appropriate, switch infix "+"
|
||
|
// operator to noop when appropriate, identify operand and infix-operator
|
||
|
// subtypes, pull "@" from in front of function names
|
||
|
for tokens2.moveNext() {
|
||
|
token := tokens2.current()
|
||
|
if (token.TType == TokenTypeOperatorInfix) && (len(token.TValue) == 1 && token.TValue[0] == '-') {
|
||
|
if tokens2.BOF() {
|
||
|
token.TType = TokenTypeOperatorPrefix
|
||
|
} else if ((tokens2.previous().TType == TokenTypeFunction) && (tokens2.previous().TSubType == TokenSubTypeStop)) || ((tokens2.previous().TType == TokenTypeSubexpression) && (tokens2.previous().TSubType == TokenSubTypeStop)) || (tokens2.previous().TType == TokenTypeOperatorPostfix) || (tokens2.previous().TType == TokenTypeOperand) {
|
||
|
token.TSubType = TokenSubTypeMath
|
||
|
} else {
|
||
|
token.TType = TokenTypeOperatorPrefix
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if (token.TType == TokenTypeOperatorInfix) && (len(token.TValue) == 1 && token.TValue[0] == '+') {
|
||
|
if tokens2.BOF() {
|
||
|
token.TType = TokenTypeNoop
|
||
|
} else if (tokens2.previous().TType == TokenTypeFunction) && (tokens2.previous().TSubType == TokenSubTypeStop) || ((tokens2.previous().TType == TokenTypeSubexpression) && (tokens2.previous().TSubType == TokenSubTypeStop) || (tokens2.previous().TType == TokenTypeOperatorPostfix) || (tokens2.previous().TType == TokenTypeOperand)) {
|
||
|
token.TSubType = TokenSubTypeMath
|
||
|
} else {
|
||
|
token.TType = TokenTypeNoop
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if (token.TType == TokenTypeOperatorInfix) && (len(token.TSubType) == 0) {
|
||
|
if token.TValue[0] == '<' || token.TValue[0] == '>' || token.TValue[0] == '=' {
|
||
|
token.TSubType = TokenSubTypeLogical
|
||
|
} else if len(token.TValue) == 1 && token.TValue[0] == '&' {
|
||
|
token.TSubType = TokenSubTypeConcatenation
|
||
|
} else {
|
||
|
token.TSubType = TokenSubTypeMath
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if (token.TType == TokenTypeOperand) && (len(token.TSubType) == 0) {
|
||
|
if _, err := strconv.ParseFloat(string(token.TValue), 64); err != nil {
|
||
|
if (string(token.TValue) == "TRUE") || (string(token.TValue) == "FALSE") {
|
||
|
token.TSubType = TokenSubTypeLogical
|
||
|
} else {
|
||
|
token.TSubType = TokenSubTypeRange
|
||
|
}
|
||
|
} else {
|
||
|
token.TSubType = TokenSubTypeNumber
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
if token.TType == TokenTypeFunction {
|
||
|
if (len(token.TValue) > 0) && token.TValue[0] == '@' {
|
||
|
token.TValue = token.TValue[1:]
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
tokens2.reset()
|
||
|
|
||
|
// move all tokens to a new collection, excluding all no-ops
|
||
|
tokens := fTokens(0, len(tokens2.Items))
|
||
|
for tokens2.moveNext() {
|
||
|
if tokens2.current().TType != TokenTypeNoop {
|
||
|
tokens.addRef(Token{
|
||
|
TValue: tokens2.current().TValue,
|
||
|
TType: tokens2.current().TType,
|
||
|
TSubType: tokens2.current().TSubType,
|
||
|
})
|
||
|
}
|
||
|
}
|
||
|
|
||
|
tokens.reset()
|
||
|
if len(tokens.Items) == 0 {
|
||
|
tokens.Items = nil
|
||
|
}
|
||
|
return tokens
|
||
|
}
|
||
|
|
||
|
// doubleChar provides function to get two characters after the current
|
||
|
// position.
|
||
|
func (ps *Parser) doubleChar() []rune {
|
||
|
if len(ps.fRune) >= ps.Offset+2 {
|
||
|
return ps.fRune[ps.Offset : ps.Offset+2]
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// currentChar provides function to get the character of the current position.
|
||
|
func (ps *Parser) currentChar() rune {
|
||
|
return ps.fRune[ps.Offset]
|
||
|
}
|
||
|
|
||
|
// nextChar provides function to get the next character of the current position.
|
||
|
func (ps *Parser) nextChar() rune {
|
||
|
if len(ps.fRune) >= ps.Offset+2 {
|
||
|
return ps.fRune[ps.Offset+1]
|
||
|
}
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
// EOF provides function to check whether end of tokens stack.
|
||
|
func (ps *Parser) EOF() bool {
|
||
|
return ps.Offset >= len(ps.fRune)
|
||
|
}
|
||
|
|
||
|
// Parse provides function to parse formula as a token stream (list).
|
||
|
func (ps *Parser) Parse(formula string) []Token {
|
||
|
ps.Formula = formula
|
||
|
ps.Tokens = ps.getTokens()
|
||
|
return ps.Tokens.Items
|
||
|
}
|
||
|
|
||
|
// PrettyPrint provides function to pretty the parsed result with the indented
|
||
|
// format.
|
||
|
func (ps *Parser) PrettyPrint() string {
|
||
|
indent := 0
|
||
|
var output strings.Builder
|
||
|
for _, t := range ps.Tokens.Items {
|
||
|
if t.TSubType == TokenSubTypeStop {
|
||
|
indent--
|
||
|
}
|
||
|
for i := 0; i < indent; i++ {
|
||
|
output.WriteRune('\t')
|
||
|
}
|
||
|
|
||
|
output.WriteString(t.TValue)
|
||
|
output.WriteString(" <")
|
||
|
output.WriteString(t.TType)
|
||
|
output.WriteString("> <")
|
||
|
output.WriteString(t.TSubType)
|
||
|
output.WriteString(">\n")
|
||
|
|
||
|
if t.TSubType == TokenSubTypeStart {
|
||
|
indent++
|
||
|
}
|
||
|
}
|
||
|
return output.String()
|
||
|
}
|
||
|
|
||
|
// Render provides function to get formatted formula after parsed.
|
||
|
func (ps *Parser) Render() string {
|
||
|
var output strings.Builder
|
||
|
for _, t := range ps.Tokens.Items {
|
||
|
if t.TType == TokenTypeFunction && t.TSubType == TokenSubTypeStart {
|
||
|
output.WriteString(t.TValue)
|
||
|
output.WriteRune(ParenOpen)
|
||
|
} else if t.TType == TokenTypeFunction && t.TSubType == TokenSubTypeStop {
|
||
|
output.WriteRune(ParenClose)
|
||
|
} else if t.TType == TokenTypeSubexpression && t.TSubType == TokenSubTypeStart {
|
||
|
output.WriteRune(ParenOpen)
|
||
|
} else if t.TType == TokenTypeSubexpression && t.TSubType == TokenSubTypeStop {
|
||
|
output.WriteRune(ParenClose)
|
||
|
} else if t.TType == TokenTypeOperand && t.TSubType == TokenSubTypeText {
|
||
|
output.WriteRune(QuoteDouble)
|
||
|
output.WriteString(t.TValue)
|
||
|
output.WriteRune(QuoteDouble)
|
||
|
} else if t.TType == TokenTypeOperatorInfix && t.TSubType == TokenSubTypeIntersection {
|
||
|
output.WriteRune(Whitespace)
|
||
|
} else {
|
||
|
output.WriteString(t.TValue)
|
||
|
}
|
||
|
}
|
||
|
return output.String()
|
||
|
}
|