You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
877 lines
21 KiB
877 lines
21 KiB
3 years ago
|
package scanner
|
||
|
|
||
|
import (
|
||
|
"io"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/goccy/go-yaml/token"
|
||
|
"golang.org/x/xerrors"
|
||
|
)
|
||
|
|
||
|
// IndentState state for indent
|
||
|
type IndentState int
|
||
|
|
||
|
const (
|
||
|
// IndentStateEqual equals previous indent
|
||
|
IndentStateEqual IndentState = iota
|
||
|
// IndentStateUp more indent than previous
|
||
|
IndentStateUp
|
||
|
// IndentStateDown less indent than previous
|
||
|
IndentStateDown
|
||
|
// IndentStateKeep uses not indent token
|
||
|
IndentStateKeep
|
||
|
)
|
||
|
|
||
|
// Scanner holds the scanner's internal state while processing a given text.
|
||
|
// It can be allocated as part of another data structure but must be initialized via Init before use.
|
||
|
type Scanner struct {
|
||
|
source []rune
|
||
|
sourcePos int
|
||
|
sourceSize int
|
||
|
line int
|
||
|
column int
|
||
|
offset int
|
||
|
prevIndentLevel int
|
||
|
prevIndentNum int
|
||
|
prevIndentColumn int
|
||
|
docStartColumn int
|
||
|
indentLevel int
|
||
|
indentNum int
|
||
|
isFirstCharAtLine bool
|
||
|
isAnchor bool
|
||
|
startedFlowSequenceNum int
|
||
|
startedFlowMapNum int
|
||
|
indentState IndentState
|
||
|
savedPos *token.Position
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) pos() *token.Position {
|
||
|
return &token.Position{
|
||
|
Line: s.line,
|
||
|
Column: s.column,
|
||
|
Offset: s.offset,
|
||
|
IndentNum: s.indentNum,
|
||
|
IndentLevel: s.indentLevel,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) bufferedToken(ctx *Context) *token.Token {
|
||
|
if s.savedPos != nil {
|
||
|
tk := ctx.bufferedToken(s.savedPos)
|
||
|
s.savedPos = nil
|
||
|
return tk
|
||
|
}
|
||
|
size := len(ctx.buf)
|
||
|
return ctx.bufferedToken(&token.Position{
|
||
|
Line: s.line,
|
||
|
Column: s.column - size,
|
||
|
Offset: s.offset - size,
|
||
|
IndentNum: s.indentNum,
|
||
|
IndentLevel: s.indentLevel,
|
||
|
})
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) progressColumn(ctx *Context, num int) {
|
||
|
s.column += num
|
||
|
s.offset += num
|
||
|
ctx.progress(num)
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) progressLine(ctx *Context) {
|
||
|
s.column = 1
|
||
|
s.line++
|
||
|
s.offset++
|
||
|
s.indentNum = 0
|
||
|
s.isFirstCharAtLine = true
|
||
|
s.isAnchor = false
|
||
|
ctx.progress(1)
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool {
|
||
|
if !s.isChangedToIndentStateUp() {
|
||
|
return false
|
||
|
}
|
||
|
if ctx.isDocument() {
|
||
|
return true
|
||
|
}
|
||
|
if c == '-' && ctx.existsBuffer() {
|
||
|
return true
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) isNewLineChar(c rune) bool {
|
||
|
if c == '\n' {
|
||
|
return true
|
||
|
}
|
||
|
if c == '\r' {
|
||
|
return true
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) newLineCount(src []rune) int {
|
||
|
size := len(src)
|
||
|
cnt := 0
|
||
|
for i := 0; i < size; i++ {
|
||
|
c := src[i]
|
||
|
switch c {
|
||
|
case '\r':
|
||
|
if i+1 < size && src[i+1] == '\n' {
|
||
|
i++
|
||
|
}
|
||
|
cnt++
|
||
|
case '\n':
|
||
|
cnt++
|
||
|
}
|
||
|
}
|
||
|
return cnt
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) updateIndentState(ctx *Context) {
|
||
|
indentNumBasedIndentState := s.indentState
|
||
|
if s.prevIndentNum < s.indentNum {
|
||
|
s.indentLevel = s.prevIndentLevel + 1
|
||
|
indentNumBasedIndentState = IndentStateUp
|
||
|
} else if s.prevIndentNum == s.indentNum {
|
||
|
s.indentLevel = s.prevIndentLevel
|
||
|
indentNumBasedIndentState = IndentStateEqual
|
||
|
} else {
|
||
|
indentNumBasedIndentState = IndentStateDown
|
||
|
if s.prevIndentLevel > 0 {
|
||
|
s.indentLevel = s.prevIndentLevel - 1
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if s.prevIndentColumn > 0 {
|
||
|
if s.prevIndentColumn < s.column {
|
||
|
s.indentState = IndentStateUp
|
||
|
} else if s.prevIndentColumn != s.column || indentNumBasedIndentState != IndentStateEqual {
|
||
|
// The following case ( current position is 'd' ), some variables becomes like here
|
||
|
// - prevIndentColumn: 1 of 'a'
|
||
|
// - indentNumBasedIndentState: IndentStateDown because d's indentNum(1) is less than c's indentNum(3).
|
||
|
// Therefore, s.prevIndentColumn(1) == s.column(1) is true, but we want to treat this as IndentStateDown.
|
||
|
// So, we look also current indentState value by the above prevIndentNum based logic, and determins finally indentState.
|
||
|
// ---
|
||
|
// a:
|
||
|
// b
|
||
|
// c
|
||
|
// d: e
|
||
|
// ^
|
||
|
s.indentState = IndentStateDown
|
||
|
} else {
|
||
|
s.indentState = IndentStateEqual
|
||
|
}
|
||
|
} else {
|
||
|
s.indentState = indentNumBasedIndentState
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) updateIndent(ctx *Context, c rune) {
|
||
|
if s.isFirstCharAtLine && s.isNewLineChar(c) && ctx.isDocument() {
|
||
|
return
|
||
|
}
|
||
|
if s.isFirstCharAtLine && c == ' ' {
|
||
|
s.indentNum++
|
||
|
return
|
||
|
}
|
||
|
if !s.isFirstCharAtLine {
|
||
|
s.indentState = IndentStateKeep
|
||
|
return
|
||
|
}
|
||
|
s.updateIndentState(ctx)
|
||
|
s.isFirstCharAtLine = false
|
||
|
if s.isNeededKeepPreviousIndentNum(ctx, c) {
|
||
|
return
|
||
|
}
|
||
|
if s.indentState != IndentStateUp {
|
||
|
s.prevIndentColumn = 0
|
||
|
}
|
||
|
s.prevIndentNum = s.indentNum
|
||
|
s.prevIndentLevel = s.indentLevel
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) isChangedToIndentStateDown() bool {
|
||
|
return s.indentState == IndentStateDown
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) isChangedToIndentStateUp() bool {
|
||
|
return s.indentState == IndentStateUp
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) isChangedToIndentStateEqual() bool {
|
||
|
return s.indentState == IndentStateEqual
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) addBufferedTokenIfExists(ctx *Context) {
|
||
|
ctx.addToken(s.bufferedToken(ctx))
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) breakLiteral(ctx *Context) {
|
||
|
s.docStartColumn = 0
|
||
|
ctx.breakLiteral()
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanSingleQuote(ctx *Context) (tk *token.Token, pos int) {
|
||
|
ctx.addOriginBuf('\'')
|
||
|
srcpos := s.pos()
|
||
|
startIndex := ctx.idx + 1
|
||
|
src := ctx.src
|
||
|
size := len(src)
|
||
|
value := []rune{}
|
||
|
isFirstLineChar := false
|
||
|
isNewLine := false
|
||
|
for idx := startIndex; idx < size; idx++ {
|
||
|
if !isNewLine {
|
||
|
s.progressColumn(ctx, 1)
|
||
|
} else {
|
||
|
isNewLine = false
|
||
|
}
|
||
|
c := src[idx]
|
||
|
pos = idx + 1
|
||
|
ctx.addOriginBuf(c)
|
||
|
if s.isNewLineChar(c) {
|
||
|
value = append(value, ' ')
|
||
|
isFirstLineChar = true
|
||
|
isNewLine = true
|
||
|
s.progressLine(ctx)
|
||
|
continue
|
||
|
} else if c == ' ' && isFirstLineChar {
|
||
|
continue
|
||
|
} else if c != '\'' {
|
||
|
value = append(value, c)
|
||
|
isFirstLineChar = false
|
||
|
continue
|
||
|
}
|
||
|
if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' {
|
||
|
// '' handle as ' character
|
||
|
value = append(value, c)
|
||
|
ctx.addOriginBuf(c)
|
||
|
idx++
|
||
|
continue
|
||
|
}
|
||
|
s.progressColumn(ctx, 1)
|
||
|
tk = token.SingleQuote(string(value), string(ctx.obuf), srcpos)
|
||
|
pos = idx - startIndex + 1
|
||
|
return
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func hexToInt(b rune) int {
|
||
|
if b >= 'A' && b <= 'F' {
|
||
|
return int(b) - 'A' + 10
|
||
|
}
|
||
|
if b >= 'a' && b <= 'f' {
|
||
|
return int(b) - 'a' + 10
|
||
|
}
|
||
|
return int(b) - '0'
|
||
|
}
|
||
|
|
||
|
func hexRunesToInt(b []rune) int {
|
||
|
sum := 0
|
||
|
for i := 0; i < len(b); i++ {
|
||
|
sum += hexToInt(b[i]) << (uint(len(b)-i-1) * 4)
|
||
|
}
|
||
|
return sum
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) {
|
||
|
ctx.addOriginBuf('"')
|
||
|
srcpos := s.pos()
|
||
|
startIndex := ctx.idx + 1
|
||
|
src := ctx.src
|
||
|
size := len(src)
|
||
|
value := []rune{}
|
||
|
isFirstLineChar := false
|
||
|
isNewLine := false
|
||
|
for idx := startIndex; idx < size; idx++ {
|
||
|
if !isNewLine {
|
||
|
s.progressColumn(ctx, 1)
|
||
|
} else {
|
||
|
isNewLine = false
|
||
|
}
|
||
|
c := src[idx]
|
||
|
pos = idx + 1
|
||
|
ctx.addOriginBuf(c)
|
||
|
if s.isNewLineChar(c) {
|
||
|
value = append(value, ' ')
|
||
|
isFirstLineChar = true
|
||
|
isNewLine = true
|
||
|
s.progressLine(ctx)
|
||
|
continue
|
||
|
} else if c == ' ' && isFirstLineChar {
|
||
|
continue
|
||
|
} else if c == '\\' {
|
||
|
isFirstLineChar = false
|
||
|
if idx+1 < size {
|
||
|
nextChar := src[idx+1]
|
||
|
switch nextChar {
|
||
|
case 'b':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, '\b')
|
||
|
idx++
|
||
|
continue
|
||
|
case 'e':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, '\x1B')
|
||
|
idx++
|
||
|
continue
|
||
|
case 'f':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, '\f')
|
||
|
idx++
|
||
|
continue
|
||
|
case 'n':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, '\n')
|
||
|
idx++
|
||
|
continue
|
||
|
case 'v':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, '\v')
|
||
|
idx++
|
||
|
continue
|
||
|
case 'L': // LS (#x2028)
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, []rune{'\xE2', '\x80', '\xA8'}...)
|
||
|
idx++
|
||
|
continue
|
||
|
case 'N': // NEL (#x85)
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, []rune{'\xC2', '\x85'}...)
|
||
|
idx++
|
||
|
continue
|
||
|
case 'P': // PS (#x2029)
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, []rune{'\xE2', '\x80', '\xA9'}...)
|
||
|
idx++
|
||
|
continue
|
||
|
case '_': // #xA0
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, []rune{'\xC2', '\xA0'}...)
|
||
|
idx++
|
||
|
continue
|
||
|
case '"':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
value = append(value, nextChar)
|
||
|
idx++
|
||
|
continue
|
||
|
case 'x':
|
||
|
if idx+3 >= size {
|
||
|
// TODO: need to return error
|
||
|
//err = xerrors.New("invalid escape character \\x")
|
||
|
return
|
||
|
}
|
||
|
codeNum := hexRunesToInt(src[idx+2 : idx+4])
|
||
|
value = append(value, rune(codeNum))
|
||
|
idx += 3
|
||
|
continue
|
||
|
case 'u':
|
||
|
if idx+5 >= size {
|
||
|
// TODO: need to return error
|
||
|
//err = xerrors.New("invalid escape character \\u")
|
||
|
return
|
||
|
}
|
||
|
codeNum := hexRunesToInt(src[idx+2 : idx+6])
|
||
|
value = append(value, rune(codeNum))
|
||
|
idx += 5
|
||
|
continue
|
||
|
case 'U':
|
||
|
if idx+9 >= size {
|
||
|
// TODO: need to return error
|
||
|
//err = xerrors.New("invalid escape character \\U")
|
||
|
return
|
||
|
}
|
||
|
codeNum := hexRunesToInt(src[idx+2 : idx+10])
|
||
|
value = append(value, rune(codeNum))
|
||
|
idx += 9
|
||
|
continue
|
||
|
case '\\':
|
||
|
ctx.addOriginBuf(nextChar)
|
||
|
idx++
|
||
|
}
|
||
|
}
|
||
|
value = append(value, c)
|
||
|
continue
|
||
|
} else if c != '"' {
|
||
|
value = append(value, c)
|
||
|
isFirstLineChar = false
|
||
|
continue
|
||
|
}
|
||
|
s.progressColumn(ctx, 1)
|
||
|
tk = token.DoubleQuote(string(value), string(ctx.obuf), srcpos)
|
||
|
pos = idx - startIndex + 1
|
||
|
return
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanQuote(ctx *Context, ch rune) (tk *token.Token, pos int) {
|
||
|
if ch == '\'' {
|
||
|
return s.scanSingleQuote(ctx)
|
||
|
}
|
||
|
return s.scanDoubleQuote(ctx)
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) isMergeKey(ctx *Context) bool {
|
||
|
if ctx.repeatNum('<') != 2 {
|
||
|
return false
|
||
|
}
|
||
|
src := ctx.src
|
||
|
size := len(src)
|
||
|
for idx := ctx.idx + 2; idx < size; idx++ {
|
||
|
c := src[idx]
|
||
|
if c == ' ' {
|
||
|
continue
|
||
|
}
|
||
|
if c != ':' {
|
||
|
return false
|
||
|
}
|
||
|
if idx+1 < size {
|
||
|
nc := src[idx+1]
|
||
|
if nc == ' ' || s.isNewLineChar(nc) {
|
||
|
return true
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return false
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanTag(ctx *Context) (tk *token.Token, pos int) {
|
||
|
ctx.addOriginBuf('!')
|
||
|
ctx.progress(1) // skip '!' character
|
||
|
for idx, c := range ctx.src[ctx.idx:] {
|
||
|
pos = idx + 1
|
||
|
ctx.addOriginBuf(c)
|
||
|
switch c {
|
||
|
case ' ', '\n', '\r':
|
||
|
value := ctx.source(ctx.idx-1, ctx.idx+idx)
|
||
|
tk = token.Tag(value, string(ctx.obuf), s.pos())
|
||
|
pos = len([]rune(value))
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanComment(ctx *Context) (tk *token.Token, pos int) {
|
||
|
ctx.addOriginBuf('#')
|
||
|
ctx.progress(1) // skip '#' character
|
||
|
for idx, c := range ctx.src[ctx.idx:] {
|
||
|
pos = idx + 1
|
||
|
ctx.addOriginBuf(c)
|
||
|
switch c {
|
||
|
case '\n', '\r':
|
||
|
if ctx.previousChar() == '\\' {
|
||
|
continue
|
||
|
}
|
||
|
value := ctx.source(ctx.idx, ctx.idx+idx)
|
||
|
tk = token.Comment(value, string(ctx.obuf), s.pos())
|
||
|
pos = len([]rune(value)) + 1
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func trimCommentFromLiteralOpt(text string) (string, error) {
|
||
|
idx := strings.Index(text, "#")
|
||
|
if idx < 0 {
|
||
|
return text, nil
|
||
|
}
|
||
|
if idx == 0 {
|
||
|
return "", xerrors.New("invalid literal header")
|
||
|
}
|
||
|
return text[:idx-1], nil
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanLiteral(ctx *Context, c rune) {
|
||
|
ctx.addOriginBuf(c)
|
||
|
if ctx.isEOS() {
|
||
|
if ctx.isLiteral {
|
||
|
ctx.addBuf(c)
|
||
|
}
|
||
|
value := ctx.bufferedSrc()
|
||
|
ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos()))
|
||
|
ctx.resetBuffer()
|
||
|
s.progressColumn(ctx, 1)
|
||
|
} else if s.isNewLineChar(c) {
|
||
|
if ctx.isLiteral {
|
||
|
ctx.addBuf(c)
|
||
|
} else {
|
||
|
ctx.addBuf(' ')
|
||
|
}
|
||
|
s.progressLine(ctx)
|
||
|
} else if s.isFirstCharAtLine && c == ' ' {
|
||
|
if 0 < s.docStartColumn && s.docStartColumn <= s.column {
|
||
|
ctx.addBuf(c)
|
||
|
}
|
||
|
s.progressColumn(ctx, 1)
|
||
|
} else {
|
||
|
if s.docStartColumn == 0 {
|
||
|
s.docStartColumn = s.column
|
||
|
}
|
||
|
ctx.addBuf(c)
|
||
|
s.progressColumn(ctx, 1)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) {
|
||
|
header := ctx.currentChar()
|
||
|
ctx.addOriginBuf(header)
|
||
|
ctx.progress(1) // skip '|' or '>' character
|
||
|
for idx, c := range ctx.src[ctx.idx:] {
|
||
|
pos = idx
|
||
|
ctx.addOriginBuf(c)
|
||
|
switch c {
|
||
|
case '\n', '\r':
|
||
|
value := ctx.source(ctx.idx, ctx.idx+idx)
|
||
|
opt := strings.TrimRight(value, " ")
|
||
|
orgOptLen := len(opt)
|
||
|
opt, err = trimCommentFromLiteralOpt(opt)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
switch opt {
|
||
|
case "", "+", "-",
|
||
|
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
|
||
|
hasComment := len(opt) < orgOptLen
|
||
|
if header == '|' {
|
||
|
if hasComment {
|
||
|
commentLen := orgOptLen - len(opt)
|
||
|
headerPos := strings.Index(string(ctx.obuf), "|")
|
||
|
litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
|
||
|
commentBuf := ctx.obuf[len(litBuf):]
|
||
|
ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos()))
|
||
|
s.column += len(litBuf)
|
||
|
s.offset += len(litBuf)
|
||
|
commentHeader := strings.Index(value, "#")
|
||
|
ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
|
||
|
} else {
|
||
|
ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos()))
|
||
|
}
|
||
|
ctx.isLiteral = true
|
||
|
} else if header == '>' {
|
||
|
if hasComment {
|
||
|
commentLen := orgOptLen - len(opt)
|
||
|
headerPos := strings.Index(string(ctx.obuf), ">")
|
||
|
foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
|
||
|
commentBuf := ctx.obuf[len(foldedBuf):]
|
||
|
ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos()))
|
||
|
s.column += len(foldedBuf)
|
||
|
s.offset += len(foldedBuf)
|
||
|
commentHeader := strings.Index(value, "#")
|
||
|
ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
|
||
|
} else {
|
||
|
ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos()))
|
||
|
}
|
||
|
ctx.isFolded = true
|
||
|
}
|
||
|
s.indentState = IndentStateKeep
|
||
|
ctx.resetBuffer()
|
||
|
ctx.literalOpt = opt
|
||
|
return
|
||
|
}
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
err = xerrors.New("invalid literal header")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scanNewLine(ctx *Context, c rune) {
|
||
|
if len(ctx.buf) > 0 && s.savedPos == nil {
|
||
|
s.savedPos = s.pos()
|
||
|
s.savedPos.Column -= len(ctx.bufferedSrc())
|
||
|
}
|
||
|
|
||
|
// if the following case, origin buffer has unnecessary two spaces.
|
||
|
// So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too.
|
||
|
// ---
|
||
|
// a:[space][space]
|
||
|
// b: c
|
||
|
removedNum := ctx.removeRightSpaceFromBuf()
|
||
|
if removedNum > 0 {
|
||
|
s.column -= removedNum
|
||
|
s.offset -= removedNum
|
||
|
if s.savedPos != nil {
|
||
|
s.savedPos.Column -= removedNum
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ctx.isEOS() {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
} else if s.isAnchor {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
}
|
||
|
ctx.addBuf(' ')
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.isSingleLine = false
|
||
|
s.progressLine(ctx)
|
||
|
}
|
||
|
|
||
|
func (s *Scanner) scan(ctx *Context) (pos int) {
|
||
|
for ctx.next() {
|
||
|
pos = ctx.nextPos()
|
||
|
c := ctx.currentChar()
|
||
|
s.updateIndent(ctx, c)
|
||
|
if ctx.isDocument() {
|
||
|
if s.isChangedToIndentStateEqual() ||
|
||
|
s.isChangedToIndentStateDown() {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
s.breakLiteral(ctx)
|
||
|
} else {
|
||
|
s.scanLiteral(ctx, c)
|
||
|
continue
|
||
|
}
|
||
|
} else if s.isChangedToIndentStateDown() {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
} else if s.isChangedToIndentStateEqual() {
|
||
|
// if first character is new line character, buffer expect to raw folded literal
|
||
|
if len(ctx.obuf) > 0 && s.newLineCount(ctx.obuf) <= 1 {
|
||
|
// doesn't raw folded literal
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
}
|
||
|
}
|
||
|
switch c {
|
||
|
case '{':
|
||
|
if !ctx.existsBuffer() {
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos()))
|
||
|
s.startedFlowMapNum++
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '}':
|
||
|
if !ctx.existsBuffer() || s.startedFlowMapNum > 0 {
|
||
|
ctx.addToken(s.bufferedToken(ctx))
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos()))
|
||
|
s.startedFlowMapNum--
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '.':
|
||
|
if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('.') == 3 {
|
||
|
ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos()))
|
||
|
s.progressColumn(ctx, 3)
|
||
|
pos += 2
|
||
|
return
|
||
|
}
|
||
|
case '<':
|
||
|
if s.isMergeKey(ctx) {
|
||
|
s.prevIndentColumn = s.column
|
||
|
ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
pos++
|
||
|
return
|
||
|
}
|
||
|
case '-':
|
||
|
if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('-') == 3 {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos()))
|
||
|
s.progressColumn(ctx, 3)
|
||
|
pos += 2
|
||
|
return
|
||
|
}
|
||
|
if ctx.existsBuffer() && s.isChangedToIndentStateUp() {
|
||
|
// raw folded
|
||
|
ctx.isRawFolded = true
|
||
|
ctx.addBuf(c)
|
||
|
ctx.addOriginBuf(c)
|
||
|
s.progressColumn(ctx, 1)
|
||
|
continue
|
||
|
}
|
||
|
if ctx.existsBuffer() {
|
||
|
// '-' is literal
|
||
|
ctx.addBuf(c)
|
||
|
ctx.addOriginBuf(c)
|
||
|
s.progressColumn(ctx, 1)
|
||
|
continue
|
||
|
}
|
||
|
nc := ctx.nextChar()
|
||
|
if nc == ' ' || s.isNewLineChar(nc) {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
ctx.addOriginBuf(c)
|
||
|
tk := token.SequenceEntry(string(ctx.obuf), s.pos())
|
||
|
s.prevIndentColumn = tk.Position.Column
|
||
|
ctx.addToken(tk)
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '[':
|
||
|
if !ctx.existsBuffer() {
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos()))
|
||
|
s.startedFlowSequenceNum++
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case ']':
|
||
|
if !ctx.existsBuffer() || s.startedFlowSequenceNum > 0 {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos()))
|
||
|
s.startedFlowSequenceNum--
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case ',':
|
||
|
if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case ':':
|
||
|
nc := ctx.nextChar()
|
||
|
if s.startedFlowMapNum > 0 || nc == ' ' || s.isNewLineChar(nc) || ctx.isNextEOS() {
|
||
|
// mapping value
|
||
|
tk := s.bufferedToken(ctx)
|
||
|
if tk != nil {
|
||
|
s.prevIndentColumn = tk.Position.Column
|
||
|
ctx.addToken(tk)
|
||
|
}
|
||
|
ctx.addToken(token.MappingValue(s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '|', '>':
|
||
|
if !ctx.existsBuffer() {
|
||
|
progress, err := s.scanLiteralHeader(ctx)
|
||
|
if err != nil {
|
||
|
// TODO: returns syntax error object
|
||
|
return
|
||
|
}
|
||
|
s.progressColumn(ctx, progress)
|
||
|
s.progressLine(ctx)
|
||
|
continue
|
||
|
}
|
||
|
case '!':
|
||
|
if !ctx.existsBuffer() {
|
||
|
token, progress := s.scanTag(ctx)
|
||
|
ctx.addToken(token)
|
||
|
s.progressColumn(ctx, progress)
|
||
|
if c := ctx.previousChar(); s.isNewLineChar(c) {
|
||
|
s.progressLine(ctx)
|
||
|
}
|
||
|
pos += progress
|
||
|
return
|
||
|
}
|
||
|
case '%':
|
||
|
if !ctx.existsBuffer() && s.indentNum == 0 {
|
||
|
ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '?':
|
||
|
nc := ctx.nextChar()
|
||
|
if !ctx.existsBuffer() && nc == ' ' {
|
||
|
ctx.addToken(token.MappingKey(s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '&':
|
||
|
if !ctx.existsBuffer() {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.Anchor(string(ctx.obuf), s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
s.isAnchor = true
|
||
|
return
|
||
|
}
|
||
|
case '*':
|
||
|
if !ctx.existsBuffer() {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
ctx.addOriginBuf(c)
|
||
|
ctx.addToken(token.Alias(string(ctx.obuf), s.pos()))
|
||
|
s.progressColumn(ctx, 1)
|
||
|
return
|
||
|
}
|
||
|
case '#':
|
||
|
if !ctx.existsBuffer() || ctx.previousChar() == ' ' {
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
token, progress := s.scanComment(ctx)
|
||
|
ctx.addToken(token)
|
||
|
s.progressColumn(ctx, progress)
|
||
|
s.progressLine(ctx)
|
||
|
pos += progress
|
||
|
return
|
||
|
}
|
||
|
case '\'', '"':
|
||
|
if !ctx.existsBuffer() {
|
||
|
token, progress := s.scanQuote(ctx, c)
|
||
|
ctx.addToken(token)
|
||
|
pos += progress
|
||
|
return
|
||
|
}
|
||
|
case '\r', '\n':
|
||
|
// There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec.
|
||
|
// > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character.
|
||
|
// > Outside scalar content, YAML allows any line break to be used to terminate lines.
|
||
|
// > -- https://yaml.org/spec/1.2/spec.html
|
||
|
if c == '\r' && ctx.nextChar() == '\n' {
|
||
|
ctx.addOriginBuf('\r')
|
||
|
ctx.progress(1)
|
||
|
c = '\n'
|
||
|
}
|
||
|
s.scanNewLine(ctx, c)
|
||
|
continue
|
||
|
case ' ':
|
||
|
if ctx.isSaveIndentMode() || (!s.isAnchor && !s.isFirstCharAtLine) {
|
||
|
ctx.addBuf(c)
|
||
|
ctx.addOriginBuf(c)
|
||
|
s.progressColumn(ctx, 1)
|
||
|
continue
|
||
|
}
|
||
|
if s.isFirstCharAtLine {
|
||
|
s.progressColumn(ctx, 1)
|
||
|
ctx.addOriginBuf(c)
|
||
|
continue
|
||
|
}
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
pos-- // to rescan white space at next scanning for adding white space to next buffer.
|
||
|
s.isAnchor = false
|
||
|
return
|
||
|
}
|
||
|
ctx.addBuf(c)
|
||
|
ctx.addOriginBuf(c)
|
||
|
s.progressColumn(ctx, 1)
|
||
|
}
|
||
|
s.addBufferedTokenIfExists(ctx)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src.
|
||
|
func (s *Scanner) Init(text string) {
|
||
|
src := []rune(text)
|
||
|
s.source = src
|
||
|
s.sourcePos = 0
|
||
|
s.sourceSize = len(src)
|
||
|
s.line = 1
|
||
|
s.column = 1
|
||
|
s.offset = 1
|
||
|
s.prevIndentLevel = 0
|
||
|
s.prevIndentNum = 0
|
||
|
s.prevIndentColumn = 0
|
||
|
s.indentLevel = 0
|
||
|
s.indentNum = 0
|
||
|
s.isFirstCharAtLine = true
|
||
|
}
|
||
|
|
||
|
// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
|
||
|
func (s *Scanner) Scan() (token.Tokens, error) {
|
||
|
if s.sourcePos >= s.sourceSize {
|
||
|
return nil, io.EOF
|
||
|
}
|
||
|
ctx := newContext(s.source[s.sourcePos:])
|
||
|
defer ctx.release()
|
||
|
progress := s.scan(ctx)
|
||
|
s.sourcePos += progress
|
||
|
var tokens token.Tokens
|
||
|
tokens = append(tokens, ctx.tokens...)
|
||
|
return tokens, nil
|
||
|
}
|