You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

325 lines
5.1 KiB

package jx
import (
"fmt"
"io"
"unicode/utf16"
"unicode/utf8"
"github.com/go-faster/errors"
)
// StrAppend reads string and appends it to byte slice.
func (d *Decoder) StrAppend(b []byte) ([]byte, error) {
v := value{
buf: b,
raw: false,
}
var err error
if v, err = d.str(v); err != nil {
return b, err
}
return v.buf, nil
}
type value struct {
buf []byte
raw bool // false forces buf reuse
}
func (v value) rune(r rune) value {
return value{
buf: appendRune(v.buf, r),
raw: v.raw,
}
}
// badTokenErr means that Token was unexpected while decoding.
type badTokenErr struct {
Token byte
}
func (e badTokenErr) Error() string {
return fmt.Sprintf("unexpected byte %d '%s'", e.Token, []byte{e.Token})
}
func badToken(c byte) error {
return badTokenErr{Token: c}
}
func (d *Decoder) str(v value) (value, error) {
if err := d.consume('"'); err != nil {
return value{}, errors.Wrap(err, "start")
}
var (
c byte
i int
)
for {
buf := d.buf[d.head:d.tail]
for len(buf) >= 8 {
c = buf[0]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[1]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[2]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[3]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[4]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[5]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[6]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[7]
if safeSet[c] != 0 {
goto readTok
}
i++
buf = buf[8:]
}
var n int
for n, c = range buf {
if safeSet[c] != 0 {
i += n
goto readTok
}
}
return d.strSlow(v)
}
readTok:
buf := d.buf[d.head:d.tail]
str := buf[:i]
switch {
case c == '"':
// Skip string + last quote.
d.head += i + 1
if v.raw {
return value{buf: str}, nil
}
return value{buf: append(v.buf, str...)}, nil
case c == '\\':
// Skip only string, keep quote in buffer.
d.head += i
// We need a copy anyway, because string is escaped.
return d.strSlow(value{buf: append(v.buf, str...)})
default:
return v, badToken(c)
}
}
func (d *Decoder) strSlow(v value) (value, error) {
var (
c byte
i int
)
readStr:
for {
i = 0
buf := d.buf[d.head:d.tail]
for len(buf) >= 8 {
c = buf[0]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[1]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[2]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[3]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[4]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[5]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[6]
if safeSet[c] != 0 {
goto readTok
}
i++
c = buf[7]
if safeSet[c] != 0 {
goto readTok
}
i++
buf = buf[8:]
}
for _, c = range buf {
if safeSet[c] != 0 {
goto readTok
}
i++
}
v.buf = append(v.buf, d.buf[d.head:d.head+i]...)
if err := d.read(); err != nil {
if err == io.EOF {
return value{}, io.ErrUnexpectedEOF
}
return value{}, err
}
}
readTok:
buf := d.buf[d.head:d.tail]
str := buf[:i]
d.head += i + 1
switch {
case c == '"':
return value{buf: append(v.buf, str...)}, nil
case c == '\\':
v.buf = append(v.buf, str...)
c, err := d.byte()
if err != nil {
return value{}, errors.Wrap(err, "next")
}
v, err = d.escapedChar(v, c)
if err != nil {
return v, errors.Wrap(err, "escape")
}
default:
return v, badToken(c)
}
goto readStr
}
// StrBytes returns string value as sub-slice of internal buffer.
//
// Bytes are valid only until next call to any Decoder method.
func (d *Decoder) StrBytes() ([]byte, error) {
v, err := d.str(value{raw: true})
if err != nil {
return nil, err
}
return v.buf, nil
}
// Str reads string.
func (d *Decoder) Str() (string, error) {
s, err := d.StrBytes()
if err != nil {
return "", err
}
return string(s), nil
}
func (d *Decoder) escapedChar(v value, c byte) (value, error) {
switch val := escapedStrSet[c]; val {
default:
v.buf = append(v.buf, val)
case 'u':
r1, err := d.readU4()
if err != nil {
return value{}, errors.Wrap(err, "read u4")
}
if utf16.IsSurrogate(r1) {
c, err := d.byte()
if err != nil {
return value{}, err
}
if c != '\\' {
d.unread()
return v.rune(r1), nil
}
c, err = d.byte()
if err != nil {
return value{}, err
}
if c != 'u' {
return d.escapedChar(v.rune(r1), c)
}
r2, err := d.readU4()
if err != nil {
return value{}, err
}
combined := utf16.DecodeRune(r1, r2)
if combined == '\uFFFD' {
v = v.rune(r1).rune(r2)
} else {
v = v.rune(combined)
}
} else {
v = v.rune(r1)
}
case 0:
return v, errors.Wrap(badToken(c), "bad escape: %w")
}
return v, nil
}
func (d *Decoder) readU4() (v rune, _ error) {
var b [4]byte
if err := d.readExact4(&b); err != nil {
return 0, err
}
for _, c := range b {
val := hexSet[c]
if val == 0 {
return 0, badToken(c)
}
v = v*16 + rune(val-1)
}
return v, nil
}
func appendRune(p []byte, r rune) []byte {
buf := make([]byte, 4)
n := utf8.EncodeRune(buf, r)
return append(p, buf[:n]...)
}