You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
325 lines
5.1 KiB
325 lines
5.1 KiB
3 years ago
|
package jx
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"unicode/utf16"
|
||
|
"unicode/utf8"
|
||
|
|
||
|
"github.com/go-faster/errors"
|
||
|
)
|
||
|
|
||
|
// StrAppend reads string and appends it to byte slice.
|
||
|
func (d *Decoder) StrAppend(b []byte) ([]byte, error) {
|
||
|
v := value{
|
||
|
buf: b,
|
||
|
raw: false,
|
||
|
}
|
||
|
var err error
|
||
|
if v, err = d.str(v); err != nil {
|
||
|
return b, err
|
||
|
}
|
||
|
return v.buf, nil
|
||
|
}
|
||
|
|
||
|
type value struct {
|
||
|
buf []byte
|
||
|
raw bool // false forces buf reuse
|
||
|
}
|
||
|
|
||
|
func (v value) rune(r rune) value {
|
||
|
return value{
|
||
|
buf: appendRune(v.buf, r),
|
||
|
raw: v.raw,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// badTokenErr means that Token was unexpected while decoding.
|
||
|
type badTokenErr struct {
|
||
|
Token byte
|
||
|
}
|
||
|
|
||
|
func (e badTokenErr) Error() string {
|
||
|
return fmt.Sprintf("unexpected byte %d '%s'", e.Token, []byte{e.Token})
|
||
|
}
|
||
|
|
||
|
func badToken(c byte) error {
|
||
|
return badTokenErr{Token: c}
|
||
|
}
|
||
|
|
||
|
func (d *Decoder) str(v value) (value, error) {
|
||
|
if err := d.consume('"'); err != nil {
|
||
|
return value{}, errors.Wrap(err, "start")
|
||
|
}
|
||
|
var (
|
||
|
c byte
|
||
|
i int
|
||
|
)
|
||
|
for {
|
||
|
buf := d.buf[d.head:d.tail]
|
||
|
for len(buf) >= 8 {
|
||
|
c = buf[0]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[1]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[2]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[3]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[4]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[5]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[6]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[7]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
buf = buf[8:]
|
||
|
}
|
||
|
var n int
|
||
|
for n, c = range buf {
|
||
|
if safeSet[c] != 0 {
|
||
|
i += n
|
||
|
goto readTok
|
||
|
}
|
||
|
}
|
||
|
return d.strSlow(v)
|
||
|
}
|
||
|
readTok:
|
||
|
buf := d.buf[d.head:d.tail]
|
||
|
str := buf[:i]
|
||
|
|
||
|
switch {
|
||
|
case c == '"':
|
||
|
// Skip string + last quote.
|
||
|
d.head += i + 1
|
||
|
if v.raw {
|
||
|
return value{buf: str}, nil
|
||
|
}
|
||
|
return value{buf: append(v.buf, str...)}, nil
|
||
|
case c == '\\':
|
||
|
// Skip only string, keep quote in buffer.
|
||
|
d.head += i
|
||
|
// We need a copy anyway, because string is escaped.
|
||
|
return d.strSlow(value{buf: append(v.buf, str...)})
|
||
|
default:
|
||
|
return v, badToken(c)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (d *Decoder) strSlow(v value) (value, error) {
|
||
|
var (
|
||
|
c byte
|
||
|
i int
|
||
|
)
|
||
|
readStr:
|
||
|
for {
|
||
|
i = 0
|
||
|
buf := d.buf[d.head:d.tail]
|
||
|
for len(buf) >= 8 {
|
||
|
c = buf[0]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[1]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[2]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[3]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[4]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[5]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[6]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
c = buf[7]
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
|
||
|
buf = buf[8:]
|
||
|
}
|
||
|
for _, c = range buf {
|
||
|
if safeSet[c] != 0 {
|
||
|
goto readTok
|
||
|
}
|
||
|
i++
|
||
|
}
|
||
|
|
||
|
v.buf = append(v.buf, d.buf[d.head:d.head+i]...)
|
||
|
if err := d.read(); err != nil {
|
||
|
if err == io.EOF {
|
||
|
return value{}, io.ErrUnexpectedEOF
|
||
|
}
|
||
|
return value{}, err
|
||
|
}
|
||
|
}
|
||
|
readTok:
|
||
|
buf := d.buf[d.head:d.tail]
|
||
|
str := buf[:i]
|
||
|
d.head += i + 1
|
||
|
|
||
|
switch {
|
||
|
case c == '"':
|
||
|
return value{buf: append(v.buf, str...)}, nil
|
||
|
case c == '\\':
|
||
|
v.buf = append(v.buf, str...)
|
||
|
c, err := d.byte()
|
||
|
if err != nil {
|
||
|
return value{}, errors.Wrap(err, "next")
|
||
|
}
|
||
|
v, err = d.escapedChar(v, c)
|
||
|
if err != nil {
|
||
|
return v, errors.Wrap(err, "escape")
|
||
|
}
|
||
|
default:
|
||
|
return v, badToken(c)
|
||
|
}
|
||
|
goto readStr
|
||
|
}
|
||
|
|
||
|
// StrBytes returns string value as sub-slice of internal buffer.
|
||
|
//
|
||
|
// Bytes are valid only until next call to any Decoder method.
|
||
|
func (d *Decoder) StrBytes() ([]byte, error) {
|
||
|
v, err := d.str(value{raw: true})
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return v.buf, nil
|
||
|
}
|
||
|
|
||
|
// Str reads string.
|
||
|
func (d *Decoder) Str() (string, error) {
|
||
|
s, err := d.StrBytes()
|
||
|
if err != nil {
|
||
|
return "", err
|
||
|
}
|
||
|
return string(s), nil
|
||
|
}
|
||
|
|
||
|
func (d *Decoder) escapedChar(v value, c byte) (value, error) {
|
||
|
switch val := escapedStrSet[c]; val {
|
||
|
default:
|
||
|
v.buf = append(v.buf, val)
|
||
|
case 'u':
|
||
|
r1, err := d.readU4()
|
||
|
if err != nil {
|
||
|
return value{}, errors.Wrap(err, "read u4")
|
||
|
}
|
||
|
if utf16.IsSurrogate(r1) {
|
||
|
c, err := d.byte()
|
||
|
if err != nil {
|
||
|
return value{}, err
|
||
|
}
|
||
|
if c != '\\' {
|
||
|
d.unread()
|
||
|
return v.rune(r1), nil
|
||
|
}
|
||
|
c, err = d.byte()
|
||
|
if err != nil {
|
||
|
return value{}, err
|
||
|
}
|
||
|
if c != 'u' {
|
||
|
return d.escapedChar(v.rune(r1), c)
|
||
|
}
|
||
|
r2, err := d.readU4()
|
||
|
if err != nil {
|
||
|
return value{}, err
|
||
|
}
|
||
|
combined := utf16.DecodeRune(r1, r2)
|
||
|
if combined == '\uFFFD' {
|
||
|
v = v.rune(r1).rune(r2)
|
||
|
} else {
|
||
|
v = v.rune(combined)
|
||
|
}
|
||
|
} else {
|
||
|
v = v.rune(r1)
|
||
|
}
|
||
|
case 0:
|
||
|
return v, errors.Wrap(badToken(c), "bad escape: %w")
|
||
|
}
|
||
|
return v, nil
|
||
|
}
|
||
|
|
||
|
func (d *Decoder) readU4() (v rune, _ error) {
|
||
|
var b [4]byte
|
||
|
if err := d.readExact4(&b); err != nil {
|
||
|
return 0, err
|
||
|
}
|
||
|
for _, c := range b {
|
||
|
val := hexSet[c]
|
||
|
if val == 0 {
|
||
|
return 0, badToken(c)
|
||
|
}
|
||
|
v = v*16 + rune(val-1)
|
||
|
}
|
||
|
return v, nil
|
||
|
}
|
||
|
|
||
|
func appendRune(p []byte, r rune) []byte {
|
||
|
buf := make([]byte, 4)
|
||
|
n := utf8.EncodeRune(buf, r)
|
||
|
return append(p, buf[:n]...)
|
||
|
}
|