618 lines
15 KiB
Go
618 lines
15 KiB
Go
|
/*
|
||
|
* Copyright 2021 ByteDance Inc.
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
package ast
|
||
|
|
||
|
import (
|
||
|
`fmt`
|
||
|
`github.com/bytedance/sonic/internal/native/types`
|
||
|
`github.com/bytedance/sonic/internal/rt`
|
||
|
)
|
||
|
|
||
|
const _DEFAULT_NODE_CAP int = 16
|
||
|
|
||
|
const (
|
||
|
_ERR_NOT_FOUND types.ParsingError = 33
|
||
|
_ERR_UNSUPPORT_TYPE types.ParsingError = 34
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
ErrNotExist error = newError(_ERR_NOT_FOUND, "value not exists")
|
||
|
ErrUnsupportType error = newError(_ERR_UNSUPPORT_TYPE, "unsupported type")
|
||
|
)
|
||
|
|
||
|
type Parser struct {
|
||
|
p int
|
||
|
s string
|
||
|
noLazy bool
|
||
|
skipValue bool
|
||
|
}
|
||
|
|
||
|
/** Parser Private Methods **/
|
||
|
|
||
|
func (self *Parser) delim() types.ParsingError {
|
||
|
n := len(self.s)
|
||
|
p := self.lspace(self.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if p >= n {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the delimtier */
|
||
|
if self.s[p] != ':' {
|
||
|
return types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
|
||
|
/* update the read pointer */
|
||
|
self.p = p + 1
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
func (self *Parser) object() types.ParsingError {
|
||
|
n := len(self.s)
|
||
|
p := self.lspace(self.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if p >= n {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the delimtier */
|
||
|
if self.s[p] != '{' {
|
||
|
return types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
|
||
|
/* update the read pointer */
|
||
|
self.p = p + 1
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
func (self *Parser) array() types.ParsingError {
|
||
|
n := len(self.s)
|
||
|
p := self.lspace(self.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if p >= n {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the delimtier */
|
||
|
if self.s[p] != '[' {
|
||
|
return types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
|
||
|
/* update the read pointer */
|
||
|
self.p = p + 1
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
func (self *Parser) lspace(sp int) int {
|
||
|
ns := len(self.s)
|
||
|
for ; sp<ns && isSpace(self.s[sp]); sp+=1 {}
|
||
|
|
||
|
return sp
|
||
|
}
|
||
|
|
||
|
func (self *Parser) decodeArray(ret []Node) (Node, types.ParsingError) {
|
||
|
sp := self.p
|
||
|
ns := len(self.s)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if self.p = self.lspace(sp); self.p >= ns {
|
||
|
return Node{}, types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for empty array */
|
||
|
if self.s[self.p] == ']' {
|
||
|
self.p++
|
||
|
return emptyArrayNode, 0
|
||
|
}
|
||
|
|
||
|
/* allocate array space and parse every element */
|
||
|
for {
|
||
|
var val Node
|
||
|
var err types.ParsingError
|
||
|
|
||
|
if self.skipValue {
|
||
|
/* skip the value */
|
||
|
var start int
|
||
|
if start, err = self.skipFast(); err != 0 {
|
||
|
return Node{}, err
|
||
|
}
|
||
|
if self.p > ns {
|
||
|
return Node{}, types.ERR_EOF
|
||
|
}
|
||
|
t := switchRawType(self.s[start])
|
||
|
if t == _V_NONE {
|
||
|
return Node{}, types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
val = newRawNode(self.s[start:self.p], t)
|
||
|
}else{
|
||
|
/* decode the value */
|
||
|
if val, err = self.Parse(); err != 0 {
|
||
|
return Node{}, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* add the value to result */
|
||
|
ret = append(ret, val)
|
||
|
self.p = self.lspace(self.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if self.p >= ns {
|
||
|
return Node{}, types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the next character */
|
||
|
switch self.s[self.p] {
|
||
|
case ',' : self.p++
|
||
|
case ']' : self.p++; return NewArray(ret), 0
|
||
|
default:
|
||
|
if val.isLazy() {
|
||
|
return newLazyArray(self, ret), 0
|
||
|
}
|
||
|
return Node{}, types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *Parser) decodeObject(ret []Pair) (Node, types.ParsingError) {
|
||
|
sp := self.p
|
||
|
ns := len(self.s)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if self.p = self.lspace(sp); self.p >= ns {
|
||
|
return Node{}, types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for empty object */
|
||
|
if self.s[self.p] == '}' {
|
||
|
self.p++
|
||
|
return emptyObjectNode, 0
|
||
|
}
|
||
|
|
||
|
/* decode each pair */
|
||
|
for {
|
||
|
var val Node
|
||
|
var njs types.JsonState
|
||
|
var err types.ParsingError
|
||
|
|
||
|
/* decode the key */
|
||
|
if njs = self.decodeValue(); njs.Vt != types.V_STRING {
|
||
|
return Node{}, types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
|
||
|
/* extract the key */
|
||
|
idx := self.p - 1
|
||
|
key := self.s[njs.Iv:idx]
|
||
|
|
||
|
/* check for escape sequence */
|
||
|
if njs.Ep != -1 {
|
||
|
if key, err = unquote(key); err != 0 {
|
||
|
return Node{}, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* expect a ':' delimiter */
|
||
|
if err = self.delim(); err != 0 {
|
||
|
return Node{}, err
|
||
|
}
|
||
|
|
||
|
|
||
|
if self.skipValue {
|
||
|
/* skip the value */
|
||
|
var start int
|
||
|
if start, err = self.skipFast(); err != 0 {
|
||
|
return Node{}, err
|
||
|
}
|
||
|
if self.p > ns {
|
||
|
return Node{}, types.ERR_EOF
|
||
|
}
|
||
|
t := switchRawType(self.s[start])
|
||
|
if t == _V_NONE {
|
||
|
return Node{}, types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
val = newRawNode(self.s[start:self.p], t)
|
||
|
} else {
|
||
|
/* decode the value */
|
||
|
if val, err = self.Parse(); err != 0 {
|
||
|
return Node{}, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* add the value to result */
|
||
|
ret = append(ret, Pair{Key: key, Value: val})
|
||
|
self.p = self.lspace(self.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if self.p >= ns {
|
||
|
return Node{}, types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the next character */
|
||
|
switch self.s[self.p] {
|
||
|
case ',' : self.p++
|
||
|
case '}' : self.p++; return NewObject(ret), 0
|
||
|
default:
|
||
|
if val.isLazy() {
|
||
|
return newLazyObject(self, ret), 0
|
||
|
}
|
||
|
return Node{}, types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *Parser) decodeString(iv int64, ep int) (Node, types.ParsingError) {
|
||
|
p := self.p - 1
|
||
|
s := self.s[iv:p]
|
||
|
|
||
|
/* fast path: no escape sequence */
|
||
|
if ep == -1 {
|
||
|
return NewString(s), 0
|
||
|
}
|
||
|
|
||
|
/* unquote the string */
|
||
|
out, err := unquote(s)
|
||
|
|
||
|
/* check for errors */
|
||
|
if err != 0 {
|
||
|
return Node{}, err
|
||
|
} else {
|
||
|
return newBytes(rt.Str2Mem(out)), 0
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/** Parser Interface **/
|
||
|
|
||
|
func (self *Parser) Pos() int {
|
||
|
return self.p
|
||
|
}
|
||
|
|
||
|
func (self *Parser) Parse() (Node, types.ParsingError) {
|
||
|
switch val := self.decodeValue(); val.Vt {
|
||
|
case types.V_EOF : return Node{}, types.ERR_EOF
|
||
|
case types.V_NULL : return nullNode, 0
|
||
|
case types.V_TRUE : return trueNode, 0
|
||
|
case types.V_FALSE : return falseNode, 0
|
||
|
case types.V_STRING : return self.decodeString(val.Iv, val.Ep)
|
||
|
case types.V_ARRAY:
|
||
|
if self.noLazy {
|
||
|
return self.decodeArray(make([]Node, 0, _DEFAULT_NODE_CAP))
|
||
|
}
|
||
|
return newLazyArray(self, make([]Node, 0, _DEFAULT_NODE_CAP)), 0
|
||
|
case types.V_OBJECT:
|
||
|
if self.noLazy {
|
||
|
return self.decodeObject(make([]Pair, 0, _DEFAULT_NODE_CAP))
|
||
|
}
|
||
|
return newLazyObject(self, make([]Pair, 0, _DEFAULT_NODE_CAP)), 0
|
||
|
case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0
|
||
|
case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0
|
||
|
default : return Node{}, types.ParsingError(-val.Vt)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *Parser) searchKey(match string) types.ParsingError {
|
||
|
ns := len(self.s)
|
||
|
if err := self.object(); err != 0 {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
/* check for EOF */
|
||
|
if self.p = self.lspace(self.p); self.p >= ns {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for empty object */
|
||
|
if self.s[self.p] == '}' {
|
||
|
self.p++
|
||
|
return _ERR_NOT_FOUND
|
||
|
}
|
||
|
|
||
|
var njs types.JsonState
|
||
|
var err types.ParsingError
|
||
|
/* decode each pair */
|
||
|
for {
|
||
|
|
||
|
/* decode the key */
|
||
|
if njs = self.decodeValue(); njs.Vt != types.V_STRING {
|
||
|
return types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
|
||
|
/* extract the key */
|
||
|
idx := self.p - 1
|
||
|
key := self.s[njs.Iv:idx]
|
||
|
|
||
|
/* check for escape sequence */
|
||
|
if njs.Ep != -1 {
|
||
|
if key, err = unquote(key); err != 0 {
|
||
|
return err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* expect a ':' delimiter */
|
||
|
if err = self.delim(); err != 0 {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
/* skip value */
|
||
|
if key != match {
|
||
|
if _, err = self.skipFast(); err != 0 {
|
||
|
return err
|
||
|
}
|
||
|
} else {
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
/* check for EOF */
|
||
|
self.p = self.lspace(self.p)
|
||
|
if self.p >= ns {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the next character */
|
||
|
switch self.s[self.p] {
|
||
|
case ',':
|
||
|
self.p++
|
||
|
case '}':
|
||
|
self.p++
|
||
|
return _ERR_NOT_FOUND
|
||
|
default:
|
||
|
return types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *Parser) searchIndex(idx int) types.ParsingError {
|
||
|
ns := len(self.s)
|
||
|
if err := self.array(); err != 0 {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
/* check for EOF */
|
||
|
if self.p = self.lspace(self.p); self.p >= ns {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for empty array */
|
||
|
if self.s[self.p] == ']' {
|
||
|
self.p++
|
||
|
return _ERR_NOT_FOUND
|
||
|
}
|
||
|
|
||
|
var err types.ParsingError
|
||
|
/* allocate array space and parse every element */
|
||
|
for i := 0; i < idx; i++ {
|
||
|
|
||
|
/* decode the value */
|
||
|
if _, err = self.skipFast(); err != 0 {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
/* check for EOF */
|
||
|
self.p = self.lspace(self.p)
|
||
|
if self.p >= ns {
|
||
|
return types.ERR_EOF
|
||
|
}
|
||
|
|
||
|
/* check for the next character */
|
||
|
switch self.s[self.p] {
|
||
|
case ',':
|
||
|
self.p++
|
||
|
case ']':
|
||
|
self.p++
|
||
|
return _ERR_NOT_FOUND
|
||
|
default:
|
||
|
return types.ERR_INVALID_CHAR
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
func (self *Node) skipNextNode() *Node {
|
||
|
if !self.isLazy() {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
parser, stack := self.getParserAndArrayStack()
|
||
|
ret := stack.v
|
||
|
sp := parser.p
|
||
|
ns := len(parser.s)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if parser.p = parser.lspace(sp); parser.p >= ns {
|
||
|
return newSyntaxError(parser.syntaxError(types.ERR_EOF))
|
||
|
}
|
||
|
|
||
|
/* check for empty array */
|
||
|
if parser.s[parser.p] == ']' {
|
||
|
parser.p++
|
||
|
self.setArray(ret)
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
var val Node
|
||
|
/* skip the value */
|
||
|
if start, err := parser.skipFast(); err != 0 {
|
||
|
return newSyntaxError(parser.syntaxError(err))
|
||
|
} else {
|
||
|
t := switchRawType(parser.s[start])
|
||
|
if t == _V_NONE {
|
||
|
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
|
||
|
}
|
||
|
val = newRawNode(parser.s[start:parser.p], t)
|
||
|
}
|
||
|
|
||
|
/* add the value to result */
|
||
|
ret = append(ret, val)
|
||
|
parser.p = parser.lspace(parser.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if parser.p >= ns {
|
||
|
return newSyntaxError(parser.syntaxError(types.ERR_EOF))
|
||
|
}
|
||
|
|
||
|
/* check for the next character */
|
||
|
switch parser.s[parser.p] {
|
||
|
case ',':
|
||
|
parser.p++
|
||
|
self.setLazyArray(parser, ret)
|
||
|
return &ret[len(ret)-1]
|
||
|
case ']':
|
||
|
parser.p++
|
||
|
self.setArray(ret)
|
||
|
return &ret[len(ret)-1]
|
||
|
default:
|
||
|
return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (self *Node) skipNextPair() (*Pair) {
|
||
|
if !self.isLazy() {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
parser, stack := self.getParserAndObjectStack()
|
||
|
ret := stack.v
|
||
|
sp := parser.p
|
||
|
ns := len(parser.s)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if parser.p = parser.lspace(sp); parser.p >= ns {
|
||
|
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
|
||
|
}
|
||
|
|
||
|
/* check for empty object */
|
||
|
if parser.s[parser.p] == '}' {
|
||
|
parser.p++
|
||
|
self.setObject(ret)
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
/* decode one pair */
|
||
|
var val Node
|
||
|
var njs types.JsonState
|
||
|
var err types.ParsingError
|
||
|
|
||
|
/* decode the key */
|
||
|
if njs = parser.decodeValue(); njs.Vt != types.V_STRING {
|
||
|
return &Pair{"", *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
|
||
|
}
|
||
|
|
||
|
/* extract the key */
|
||
|
idx := parser.p - 1
|
||
|
key := parser.s[njs.Iv:idx]
|
||
|
|
||
|
/* check for escape sequence */
|
||
|
if njs.Ep != -1 {
|
||
|
if key, err = unquote(key); err != 0 {
|
||
|
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* expect a ':' delimiter */
|
||
|
if err = parser.delim(); err != 0 {
|
||
|
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
|
||
|
}
|
||
|
|
||
|
/* skip the value */
|
||
|
if start, err := parser.skipFast(); err != 0 {
|
||
|
return &Pair{key, *newSyntaxError(parser.syntaxError(err))}
|
||
|
} else {
|
||
|
t := switchRawType(parser.s[start])
|
||
|
if t == _V_NONE {
|
||
|
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
|
||
|
}
|
||
|
val = newRawNode(parser.s[start:parser.p], t)
|
||
|
}
|
||
|
|
||
|
/* add the value to result */
|
||
|
ret = append(ret, Pair{Key: key, Value: val})
|
||
|
parser.p = parser.lspace(parser.p)
|
||
|
|
||
|
/* check for EOF */
|
||
|
if parser.p >= ns {
|
||
|
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_EOF))}
|
||
|
}
|
||
|
|
||
|
/* check for the next character */
|
||
|
switch parser.s[parser.p] {
|
||
|
case ',':
|
||
|
parser.p++
|
||
|
self.setLazyObject(parser, ret)
|
||
|
return &ret[len(ret)-1]
|
||
|
case '}':
|
||
|
parser.p++
|
||
|
self.setObject(ret)
|
||
|
return &ret[len(ret)-1]
|
||
|
default:
|
||
|
return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/** Parser Factory **/
|
||
|
|
||
|
// Loads parse all json into interface{}
|
||
|
func Loads(src string) (int, interface{}, error) {
|
||
|
ps := &Parser{s: src}
|
||
|
np, err := ps.Parse()
|
||
|
|
||
|
/* check for errors */
|
||
|
if err != 0 {
|
||
|
return 0, nil, ps.ExportError(err)
|
||
|
} else {
|
||
|
x, err := np.Interface()
|
||
|
if err != nil {
|
||
|
return 0, nil, err
|
||
|
}
|
||
|
return ps.Pos(), x, nil
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// LoadsUseNumber parse all json into interface{}, with numeric nodes casted to json.Number
|
||
|
func LoadsUseNumber(src string) (int, interface{}, error) {
|
||
|
ps := &Parser{s: src}
|
||
|
np, err := ps.Parse()
|
||
|
|
||
|
/* check for errors */
|
||
|
if err != 0 {
|
||
|
return 0, nil, err
|
||
|
} else {
|
||
|
x, err := np.InterfaceUseNumber()
|
||
|
if err != nil {
|
||
|
return 0, nil, err
|
||
|
}
|
||
|
return ps.Pos(), x, nil
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func NewParser(src string) *Parser {
|
||
|
return &Parser{s: src}
|
||
|
}
|
||
|
|
||
|
// ExportError converts types.ParsingError to std Error
|
||
|
func (self *Parser) ExportError(err types.ParsingError) error {
|
||
|
if err == _ERR_NOT_FOUND {
|
||
|
return ErrNotExist
|
||
|
}
|
||
|
return fmt.Errorf("%q", SyntaxError{
|
||
|
Pos : self.p,
|
||
|
Src : self.s,
|
||
|
Code: err,
|
||
|
}.Description())
|
||
|
}
|