// Copyright 2015-2019 Brett Vickers. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package etree import ( "io" "strings" "unicode/utf8" ) // A simple stack type stack struct { data []interface{} } func (s *stack) empty() bool { return len(s.data) == 0 } func (s *stack) push(value interface{}) { s.data = append(s.data, value) } func (s *stack) pop() interface{} { value := s.data[len(s.data)-1] s.data[len(s.data)-1] = nil s.data = s.data[:len(s.data)-1] return value } func (s *stack) peek() interface{} { return s.data[len(s.data)-1] } // A fifo is a simple first-in-first-out queue. type fifo struct { data []interface{} head, tail int } func (f *fifo) add(value interface{}) { if f.len()+1 >= len(f.data) { f.grow() } f.data[f.tail] = value if f.tail++; f.tail == len(f.data) { f.tail = 0 } } func (f *fifo) remove() interface{} { value := f.data[f.head] f.data[f.head] = nil if f.head++; f.head == len(f.data) { f.head = 0 } return value } func (f *fifo) len() int { if f.tail >= f.head { return f.tail - f.head } return len(f.data) - f.head + f.tail } func (f *fifo) grow() { c := len(f.data) * 2 if c == 0 { c = 4 } buf, count := make([]interface{}, c), f.len() if f.tail >= f.head { copy(buf[0:count], f.data[f.head:f.tail]) } else { hindex := len(f.data) - f.head copy(buf[0:hindex], f.data[f.head:]) copy(buf[hindex:count], f.data[:f.tail]) } f.data, f.head, f.tail = buf, 0, count } // xmlReader provides the interface by which an XML byte stream is // processed and decoded. type xmlReader interface { Bytes() int64 Read(p []byte) (n int, err error) } // xmlSimpleReader implements a proxy reader that counts the number of // bytes read from its encapsulated reader. type xmlSimpleReader struct { r io.Reader bytes int64 } func newXmlSimpleReader(r io.Reader) xmlReader { return &xmlSimpleReader{r, 0} } func (xr *xmlSimpleReader) Bytes() int64 { return xr.bytes } func (xr *xmlSimpleReader) Read(p []byte) (n int, err error) { n, err = xr.r.Read(p) xr.bytes += int64(n) return n, err } // xmlPeekReader implements a proxy reader that counts the number of // bytes read from its encapsulated reader. It also allows the caller to // "peek" at the previous portions of the buffer after they have been // parsed. type xmlPeekReader struct { r io.Reader bytes int64 // total bytes read by the Read function buf []byte // internal read buffer bufSize int // total bytes used in the read buffer bufOffset int64 // total bytes read when buf was last filled window []byte // current read buffer window peekBuf []byte // buffer used to store data to be peeked at later peekOffset int64 // total read offset of the start of the peek buffer } func newXmlPeekReader(r io.Reader) *xmlPeekReader { buf := make([]byte, 4096) return &xmlPeekReader{ r: r, bytes: 0, buf: buf, bufSize: 0, bufOffset: 0, window: buf[0:0], peekBuf: make([]byte, 0), peekOffset: -1, } } func (xr *xmlPeekReader) Bytes() int64 { return xr.bytes } func (xr *xmlPeekReader) Read(p []byte) (n int, err error) { if len(xr.window) == 0 { err = xr.fill() if err != nil { return 0, err } if len(xr.window) == 0 { return 0, nil } } if len(xr.window) < len(p) { n = len(xr.window) } else { n = len(p) } copy(p, xr.window) xr.window = xr.window[n:] xr.bytes += int64(n) return n, err } func (xr *xmlPeekReader) PeekPrepare(offset int64, maxLen int) { if maxLen > cap(xr.peekBuf) { xr.peekBuf = make([]byte, 0, maxLen) } xr.peekBuf = xr.peekBuf[0:0] xr.peekOffset = offset xr.updatePeekBuf() } func (xr *xmlPeekReader) PeekFinalize() []byte { xr.updatePeekBuf() return xr.peekBuf } func (xr *xmlPeekReader) fill() error { xr.bufOffset = xr.bytes xr.bufSize = 0 n, err := xr.r.Read(xr.buf) if err != nil { xr.window, xr.bufSize = xr.buf[0:0], 0 return err } xr.window, xr.bufSize = xr.buf[:n], n xr.updatePeekBuf() return nil } func (xr *xmlPeekReader) updatePeekBuf() { peekRemain := cap(xr.peekBuf) - len(xr.peekBuf) if xr.peekOffset >= 0 && peekRemain > 0 { rangeMin := xr.peekOffset rangeMax := xr.peekOffset + int64(cap(xr.peekBuf)) bufMin := xr.bufOffset bufMax := xr.bufOffset + int64(xr.bufSize) if rangeMin < bufMin { rangeMin = bufMin } if rangeMax > bufMax { rangeMax = bufMax } if rangeMax > rangeMin { rangeMin -= xr.bufOffset rangeMax -= xr.bufOffset if int(rangeMax-rangeMin) > peekRemain { rangeMax = rangeMin + int64(peekRemain) } xr.peekBuf = append(xr.peekBuf, xr.buf[rangeMin:rangeMax]...) } } } // xmlWriter implements a proxy writer that counts the number of // bytes written by its encapsulated writer. type xmlWriter struct { w io.Writer bytes int64 } func newXmlWriter(w io.Writer) *xmlWriter { return &xmlWriter{w: w} } func (xw *xmlWriter) Write(p []byte) (n int, err error) { n, err = xw.w.Write(p) xw.bytes += int64(n) return n, err } // isWhitespace returns true if the byte slice contains only // whitespace characters. func isWhitespace(s string) bool { for i := 0; i < len(s); i++ { if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' { return false } } return true } // spaceMatch returns true if namespace a is the empty string // or if namespace a equals namespace b. func spaceMatch(a, b string) bool { switch { case a == "": return true default: return a == b } } // spaceDecompose breaks a namespace:tag identifier at the ':' // and returns the two parts. func spaceDecompose(str string) (space, key string) { colon := strings.IndexByte(str, ':') if colon == -1 { return "", str } return str[:colon], str[colon+1:] } // Strings used by indentCRLF and indentLF const ( indentSpaces = "\r\n " indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" ) // indentCRLF returns a CRLF newline followed by n copies of the first // non-CRLF character in the source string. func indentCRLF(n int, source string) string { switch { case n < 0: return source[:2] case n < len(source)-1: return source[:n+2] default: return source + strings.Repeat(source[2:3], n-len(source)+2) } } // indentLF returns a LF newline followed by n copies of the first non-LF // character in the source string. func indentLF(n int, source string) string { switch { case n < 0: return source[1:2] case n < len(source)-1: return source[1 : n+2] default: return source[1:] + strings.Repeat(source[2:3], n-len(source)+2) } } // nextIndex returns the index of the next occurrence of sep in s, // starting from offset. It returns -1 if the sep string is not found. func nextIndex(s, sep string, offset int) int { switch i := strings.Index(s[offset:], sep); i { case -1: return -1 default: return offset + i } } // isInteger returns true if the string s contains an integer. func isInteger(s string) bool { for i := 0; i < len(s); i++ { if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') { return false } } return true } type escapeMode byte const ( escapeNormal escapeMode = iota escapeCanonicalText escapeCanonicalAttr ) // escapeString writes an escaped version of a string to the writer. func escapeString(w Writer, s string, m escapeMode) { var esc []byte last := 0 for i := 0; i < len(s); { r, width := utf8.DecodeRuneInString(s[i:]) i += width switch r { case '&': esc = []byte("&") case '<': esc = []byte("<") case '>': if m == escapeCanonicalAttr { continue } esc = []byte(">") case '\'': if m != escapeNormal { continue } esc = []byte("'") case '"': if m == escapeCanonicalText { continue } esc = []byte(""") case '\t': if m != escapeCanonicalAttr { continue } esc = []byte(" ") case '\n': if m != escapeCanonicalAttr { continue } esc = []byte(" ") case '\r': if m == escapeNormal { continue } esc = []byte(" ") default: if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) { esc = []byte("\uFFFD") break } continue } w.WriteString(s[last : i-width]) w.Write(esc) last = i } w.WriteString(s[last:]) } func isInCharacterRange(r rune) bool { return r == 0x09 || r == 0x0A || r == 0x0D || r >= 0x20 && r <= 0xD7FF || r >= 0xE000 && r <= 0xFFFD || r >= 0x10000 && r <= 0x10FFFF }