You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
876 lines
21 KiB
876 lines
21 KiB
package scanner |
|
|
|
import ( |
|
"io" |
|
"strings" |
|
|
|
"github.com/goccy/go-yaml/token" |
|
"golang.org/x/xerrors" |
|
) |
|
|
|
// IndentState state for indent |
|
type IndentState int |
|
|
|
const ( |
|
// IndentStateEqual equals previous indent |
|
IndentStateEqual IndentState = iota |
|
// IndentStateUp more indent than previous |
|
IndentStateUp |
|
// IndentStateDown less indent than previous |
|
IndentStateDown |
|
// IndentStateKeep uses not indent token |
|
IndentStateKeep |
|
) |
|
|
|
// Scanner holds the scanner's internal state while processing a given text. |
|
// It can be allocated as part of another data structure but must be initialized via Init before use. |
|
type Scanner struct { |
|
source []rune |
|
sourcePos int |
|
sourceSize int |
|
line int |
|
column int |
|
offset int |
|
prevIndentLevel int |
|
prevIndentNum int |
|
prevIndentColumn int |
|
docStartColumn int |
|
indentLevel int |
|
indentNum int |
|
isFirstCharAtLine bool |
|
isAnchor bool |
|
startedFlowSequenceNum int |
|
startedFlowMapNum int |
|
indentState IndentState |
|
savedPos *token.Position |
|
} |
|
|
|
func (s *Scanner) pos() *token.Position { |
|
return &token.Position{ |
|
Line: s.line, |
|
Column: s.column, |
|
Offset: s.offset, |
|
IndentNum: s.indentNum, |
|
IndentLevel: s.indentLevel, |
|
} |
|
} |
|
|
|
func (s *Scanner) bufferedToken(ctx *Context) *token.Token { |
|
if s.savedPos != nil { |
|
tk := ctx.bufferedToken(s.savedPos) |
|
s.savedPos = nil |
|
return tk |
|
} |
|
size := len(ctx.buf) |
|
return ctx.bufferedToken(&token.Position{ |
|
Line: s.line, |
|
Column: s.column - size, |
|
Offset: s.offset - size, |
|
IndentNum: s.indentNum, |
|
IndentLevel: s.indentLevel, |
|
}) |
|
} |
|
|
|
func (s *Scanner) progressColumn(ctx *Context, num int) { |
|
s.column += num |
|
s.offset += num |
|
ctx.progress(num) |
|
} |
|
|
|
func (s *Scanner) progressLine(ctx *Context) { |
|
s.column = 1 |
|
s.line++ |
|
s.offset++ |
|
s.indentNum = 0 |
|
s.isFirstCharAtLine = true |
|
s.isAnchor = false |
|
ctx.progress(1) |
|
} |
|
|
|
func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool { |
|
if !s.isChangedToIndentStateUp() { |
|
return false |
|
} |
|
if ctx.isDocument() { |
|
return true |
|
} |
|
if c == '-' && ctx.existsBuffer() { |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (s *Scanner) isNewLineChar(c rune) bool { |
|
if c == '\n' { |
|
return true |
|
} |
|
if c == '\r' { |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
func (s *Scanner) newLineCount(src []rune) int { |
|
size := len(src) |
|
cnt := 0 |
|
for i := 0; i < size; i++ { |
|
c := src[i] |
|
switch c { |
|
case '\r': |
|
if i+1 < size && src[i+1] == '\n' { |
|
i++ |
|
} |
|
cnt++ |
|
case '\n': |
|
cnt++ |
|
} |
|
} |
|
return cnt |
|
} |
|
|
|
func (s *Scanner) updateIndentState(ctx *Context) { |
|
indentNumBasedIndentState := s.indentState |
|
if s.prevIndentNum < s.indentNum { |
|
s.indentLevel = s.prevIndentLevel + 1 |
|
indentNumBasedIndentState = IndentStateUp |
|
} else if s.prevIndentNum == s.indentNum { |
|
s.indentLevel = s.prevIndentLevel |
|
indentNumBasedIndentState = IndentStateEqual |
|
} else { |
|
indentNumBasedIndentState = IndentStateDown |
|
if s.prevIndentLevel > 0 { |
|
s.indentLevel = s.prevIndentLevel - 1 |
|
} |
|
} |
|
|
|
if s.prevIndentColumn > 0 { |
|
if s.prevIndentColumn < s.column { |
|
s.indentState = IndentStateUp |
|
} else if s.prevIndentColumn != s.column || indentNumBasedIndentState != IndentStateEqual { |
|
// The following case ( current position is 'd' ), some variables becomes like here |
|
// - prevIndentColumn: 1 of 'a' |
|
// - indentNumBasedIndentState: IndentStateDown because d's indentNum(1) is less than c's indentNum(3). |
|
// Therefore, s.prevIndentColumn(1) == s.column(1) is true, but we want to treat this as IndentStateDown. |
|
// So, we look also current indentState value by the above prevIndentNum based logic, and determins finally indentState. |
|
// --- |
|
// a: |
|
// b |
|
// c |
|
// d: e |
|
// ^ |
|
s.indentState = IndentStateDown |
|
} else { |
|
s.indentState = IndentStateEqual |
|
} |
|
} else { |
|
s.indentState = indentNumBasedIndentState |
|
} |
|
} |
|
|
|
func (s *Scanner) updateIndent(ctx *Context, c rune) { |
|
if s.isFirstCharAtLine && s.isNewLineChar(c) && ctx.isDocument() { |
|
return |
|
} |
|
if s.isFirstCharAtLine && c == ' ' { |
|
s.indentNum++ |
|
return |
|
} |
|
if !s.isFirstCharAtLine { |
|
s.indentState = IndentStateKeep |
|
return |
|
} |
|
s.updateIndentState(ctx) |
|
s.isFirstCharAtLine = false |
|
if s.isNeededKeepPreviousIndentNum(ctx, c) { |
|
return |
|
} |
|
if s.indentState != IndentStateUp { |
|
s.prevIndentColumn = 0 |
|
} |
|
s.prevIndentNum = s.indentNum |
|
s.prevIndentLevel = s.indentLevel |
|
} |
|
|
|
func (s *Scanner) isChangedToIndentStateDown() bool { |
|
return s.indentState == IndentStateDown |
|
} |
|
|
|
func (s *Scanner) isChangedToIndentStateUp() bool { |
|
return s.indentState == IndentStateUp |
|
} |
|
|
|
func (s *Scanner) isChangedToIndentStateEqual() bool { |
|
return s.indentState == IndentStateEqual |
|
} |
|
|
|
func (s *Scanner) addBufferedTokenIfExists(ctx *Context) { |
|
ctx.addToken(s.bufferedToken(ctx)) |
|
} |
|
|
|
func (s *Scanner) breakLiteral(ctx *Context) { |
|
s.docStartColumn = 0 |
|
ctx.breakLiteral() |
|
} |
|
|
|
func (s *Scanner) scanSingleQuote(ctx *Context) (tk *token.Token, pos int) { |
|
ctx.addOriginBuf('\'') |
|
srcpos := s.pos() |
|
startIndex := ctx.idx + 1 |
|
src := ctx.src |
|
size := len(src) |
|
value := []rune{} |
|
isFirstLineChar := false |
|
isNewLine := false |
|
for idx := startIndex; idx < size; idx++ { |
|
if !isNewLine { |
|
s.progressColumn(ctx, 1) |
|
} else { |
|
isNewLine = false |
|
} |
|
c := src[idx] |
|
pos = idx + 1 |
|
ctx.addOriginBuf(c) |
|
if s.isNewLineChar(c) { |
|
value = append(value, ' ') |
|
isFirstLineChar = true |
|
isNewLine = true |
|
s.progressLine(ctx) |
|
continue |
|
} else if c == ' ' && isFirstLineChar { |
|
continue |
|
} else if c != '\'' { |
|
value = append(value, c) |
|
isFirstLineChar = false |
|
continue |
|
} |
|
if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' { |
|
// '' handle as ' character |
|
value = append(value, c) |
|
ctx.addOriginBuf(c) |
|
idx++ |
|
continue |
|
} |
|
s.progressColumn(ctx, 1) |
|
tk = token.SingleQuote(string(value), string(ctx.obuf), srcpos) |
|
pos = idx - startIndex + 1 |
|
return |
|
} |
|
return |
|
} |
|
|
|
func hexToInt(b rune) int { |
|
if b >= 'A' && b <= 'F' { |
|
return int(b) - 'A' + 10 |
|
} |
|
if b >= 'a' && b <= 'f' { |
|
return int(b) - 'a' + 10 |
|
} |
|
return int(b) - '0' |
|
} |
|
|
|
func hexRunesToInt(b []rune) int { |
|
sum := 0 |
|
for i := 0; i < len(b); i++ { |
|
sum += hexToInt(b[i]) << (uint(len(b)-i-1) * 4) |
|
} |
|
return sum |
|
} |
|
|
|
func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) { |
|
ctx.addOriginBuf('"') |
|
srcpos := s.pos() |
|
startIndex := ctx.idx + 1 |
|
src := ctx.src |
|
size := len(src) |
|
value := []rune{} |
|
isFirstLineChar := false |
|
isNewLine := false |
|
for idx := startIndex; idx < size; idx++ { |
|
if !isNewLine { |
|
s.progressColumn(ctx, 1) |
|
} else { |
|
isNewLine = false |
|
} |
|
c := src[idx] |
|
pos = idx + 1 |
|
ctx.addOriginBuf(c) |
|
if s.isNewLineChar(c) { |
|
value = append(value, ' ') |
|
isFirstLineChar = true |
|
isNewLine = true |
|
s.progressLine(ctx) |
|
continue |
|
} else if c == ' ' && isFirstLineChar { |
|
continue |
|
} else if c == '\\' { |
|
isFirstLineChar = false |
|
if idx+1 < size { |
|
nextChar := src[idx+1] |
|
switch nextChar { |
|
case 'b': |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, '\b') |
|
idx++ |
|
continue |
|
case 'e': |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, '\x1B') |
|
idx++ |
|
continue |
|
case 'f': |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, '\f') |
|
idx++ |
|
continue |
|
case 'n': |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, '\n') |
|
idx++ |
|
continue |
|
case 'v': |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, '\v') |
|
idx++ |
|
continue |
|
case 'L': // LS (#x2028) |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, []rune{'\xE2', '\x80', '\xA8'}...) |
|
idx++ |
|
continue |
|
case 'N': // NEL (#x85) |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, []rune{'\xC2', '\x85'}...) |
|
idx++ |
|
continue |
|
case 'P': // PS (#x2029) |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, []rune{'\xE2', '\x80', '\xA9'}...) |
|
idx++ |
|
continue |
|
case '_': // #xA0 |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, []rune{'\xC2', '\xA0'}...) |
|
idx++ |
|
continue |
|
case '"': |
|
ctx.addOriginBuf(nextChar) |
|
value = append(value, nextChar) |
|
idx++ |
|
continue |
|
case 'x': |
|
if idx+3 >= size { |
|
// TODO: need to return error |
|
//err = xerrors.New("invalid escape character \\x") |
|
return |
|
} |
|
codeNum := hexRunesToInt(src[idx+2 : idx+4]) |
|
value = append(value, rune(codeNum)) |
|
idx += 3 |
|
continue |
|
case 'u': |
|
if idx+5 >= size { |
|
// TODO: need to return error |
|
//err = xerrors.New("invalid escape character \\u") |
|
return |
|
} |
|
codeNum := hexRunesToInt(src[idx+2 : idx+6]) |
|
value = append(value, rune(codeNum)) |
|
idx += 5 |
|
continue |
|
case 'U': |
|
if idx+9 >= size { |
|
// TODO: need to return error |
|
//err = xerrors.New("invalid escape character \\U") |
|
return |
|
} |
|
codeNum := hexRunesToInt(src[idx+2 : idx+10]) |
|
value = append(value, rune(codeNum)) |
|
idx += 9 |
|
continue |
|
case '\\': |
|
ctx.addOriginBuf(nextChar) |
|
idx++ |
|
} |
|
} |
|
value = append(value, c) |
|
continue |
|
} else if c != '"' { |
|
value = append(value, c) |
|
isFirstLineChar = false |
|
continue |
|
} |
|
s.progressColumn(ctx, 1) |
|
tk = token.DoubleQuote(string(value), string(ctx.obuf), srcpos) |
|
pos = idx - startIndex + 1 |
|
return |
|
} |
|
return |
|
} |
|
|
|
func (s *Scanner) scanQuote(ctx *Context, ch rune) (tk *token.Token, pos int) { |
|
if ch == '\'' { |
|
return s.scanSingleQuote(ctx) |
|
} |
|
return s.scanDoubleQuote(ctx) |
|
} |
|
|
|
func (s *Scanner) isMergeKey(ctx *Context) bool { |
|
if ctx.repeatNum('<') != 2 { |
|
return false |
|
} |
|
src := ctx.src |
|
size := len(src) |
|
for idx := ctx.idx + 2; idx < size; idx++ { |
|
c := src[idx] |
|
if c == ' ' { |
|
continue |
|
} |
|
if c != ':' { |
|
return false |
|
} |
|
if idx+1 < size { |
|
nc := src[idx+1] |
|
if nc == ' ' || s.isNewLineChar(nc) { |
|
return true |
|
} |
|
} |
|
} |
|
return false |
|
} |
|
|
|
func (s *Scanner) scanTag(ctx *Context) (tk *token.Token, pos int) { |
|
ctx.addOriginBuf('!') |
|
ctx.progress(1) // skip '!' character |
|
for idx, c := range ctx.src[ctx.idx:] { |
|
pos = idx + 1 |
|
ctx.addOriginBuf(c) |
|
switch c { |
|
case ' ', '\n', '\r': |
|
value := ctx.source(ctx.idx-1, ctx.idx+idx) |
|
tk = token.Tag(value, string(ctx.obuf), s.pos()) |
|
pos = len([]rune(value)) |
|
return |
|
} |
|
} |
|
return |
|
} |
|
|
|
func (s *Scanner) scanComment(ctx *Context) (tk *token.Token, pos int) { |
|
ctx.addOriginBuf('#') |
|
ctx.progress(1) // skip '#' character |
|
for idx, c := range ctx.src[ctx.idx:] { |
|
pos = idx + 1 |
|
ctx.addOriginBuf(c) |
|
switch c { |
|
case '\n', '\r': |
|
if ctx.previousChar() == '\\' { |
|
continue |
|
} |
|
value := ctx.source(ctx.idx, ctx.idx+idx) |
|
tk = token.Comment(value, string(ctx.obuf), s.pos()) |
|
pos = len([]rune(value)) + 1 |
|
return |
|
} |
|
} |
|
return |
|
} |
|
|
|
func trimCommentFromLiteralOpt(text string) (string, error) { |
|
idx := strings.Index(text, "#") |
|
if idx < 0 { |
|
return text, nil |
|
} |
|
if idx == 0 { |
|
return "", xerrors.New("invalid literal header") |
|
} |
|
return text[:idx-1], nil |
|
} |
|
|
|
func (s *Scanner) scanLiteral(ctx *Context, c rune) { |
|
ctx.addOriginBuf(c) |
|
if ctx.isEOS() { |
|
if ctx.isLiteral { |
|
ctx.addBuf(c) |
|
} |
|
value := ctx.bufferedSrc() |
|
ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos())) |
|
ctx.resetBuffer() |
|
s.progressColumn(ctx, 1) |
|
} else if s.isNewLineChar(c) { |
|
if ctx.isLiteral { |
|
ctx.addBuf(c) |
|
} else { |
|
ctx.addBuf(' ') |
|
} |
|
s.progressLine(ctx) |
|
} else if s.isFirstCharAtLine && c == ' ' { |
|
if 0 < s.docStartColumn && s.docStartColumn <= s.column { |
|
ctx.addBuf(c) |
|
} |
|
s.progressColumn(ctx, 1) |
|
} else { |
|
if s.docStartColumn == 0 { |
|
s.docStartColumn = s.column |
|
} |
|
ctx.addBuf(c) |
|
s.progressColumn(ctx, 1) |
|
} |
|
} |
|
|
|
func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) { |
|
header := ctx.currentChar() |
|
ctx.addOriginBuf(header) |
|
ctx.progress(1) // skip '|' or '>' character |
|
for idx, c := range ctx.src[ctx.idx:] { |
|
pos = idx |
|
ctx.addOriginBuf(c) |
|
switch c { |
|
case '\n', '\r': |
|
value := ctx.source(ctx.idx, ctx.idx+idx) |
|
opt := strings.TrimRight(value, " ") |
|
orgOptLen := len(opt) |
|
opt, err = trimCommentFromLiteralOpt(opt) |
|
if err != nil { |
|
return |
|
} |
|
switch opt { |
|
case "", "+", "-", |
|
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9": |
|
hasComment := len(opt) < orgOptLen |
|
if header == '|' { |
|
if hasComment { |
|
commentLen := orgOptLen - len(opt) |
|
headerPos := strings.Index(string(ctx.obuf), "|") |
|
litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos] |
|
commentBuf := ctx.obuf[len(litBuf):] |
|
ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos())) |
|
s.column += len(litBuf) |
|
s.offset += len(litBuf) |
|
commentHeader := strings.Index(value, "#") |
|
ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos())) |
|
} else { |
|
ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos())) |
|
} |
|
ctx.isLiteral = true |
|
} else if header == '>' { |
|
if hasComment { |
|
commentLen := orgOptLen - len(opt) |
|
headerPos := strings.Index(string(ctx.obuf), ">") |
|
foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos] |
|
commentBuf := ctx.obuf[len(foldedBuf):] |
|
ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos())) |
|
s.column += len(foldedBuf) |
|
s.offset += len(foldedBuf) |
|
commentHeader := strings.Index(value, "#") |
|
ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos())) |
|
} else { |
|
ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos())) |
|
} |
|
ctx.isFolded = true |
|
} |
|
s.indentState = IndentStateKeep |
|
ctx.resetBuffer() |
|
ctx.literalOpt = opt |
|
return |
|
} |
|
break |
|
} |
|
} |
|
err = xerrors.New("invalid literal header") |
|
return |
|
} |
|
|
|
func (s *Scanner) scanNewLine(ctx *Context, c rune) { |
|
if len(ctx.buf) > 0 && s.savedPos == nil { |
|
s.savedPos = s.pos() |
|
s.savedPos.Column -= len(ctx.bufferedSrc()) |
|
} |
|
|
|
// if the following case, origin buffer has unnecessary two spaces. |
|
// So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too. |
|
// --- |
|
// a:[space][space] |
|
// b: c |
|
removedNum := ctx.removeRightSpaceFromBuf() |
|
if removedNum > 0 { |
|
s.column -= removedNum |
|
s.offset -= removedNum |
|
if s.savedPos != nil { |
|
s.savedPos.Column -= removedNum |
|
} |
|
} |
|
|
|
if ctx.isEOS() { |
|
s.addBufferedTokenIfExists(ctx) |
|
} else if s.isAnchor { |
|
s.addBufferedTokenIfExists(ctx) |
|
} |
|
ctx.addBuf(' ') |
|
ctx.addOriginBuf(c) |
|
ctx.isSingleLine = false |
|
s.progressLine(ctx) |
|
} |
|
|
|
func (s *Scanner) scan(ctx *Context) (pos int) { |
|
for ctx.next() { |
|
pos = ctx.nextPos() |
|
c := ctx.currentChar() |
|
s.updateIndent(ctx, c) |
|
if ctx.isDocument() { |
|
if s.isChangedToIndentStateEqual() || |
|
s.isChangedToIndentStateDown() { |
|
s.addBufferedTokenIfExists(ctx) |
|
s.breakLiteral(ctx) |
|
} else { |
|
s.scanLiteral(ctx, c) |
|
continue |
|
} |
|
} else if s.isChangedToIndentStateDown() { |
|
s.addBufferedTokenIfExists(ctx) |
|
} else if s.isChangedToIndentStateEqual() { |
|
// if first character is new line character, buffer expect to raw folded literal |
|
if len(ctx.obuf) > 0 && s.newLineCount(ctx.obuf) <= 1 { |
|
// doesn't raw folded literal |
|
s.addBufferedTokenIfExists(ctx) |
|
} |
|
} |
|
switch c { |
|
case '{': |
|
if !ctx.existsBuffer() { |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos())) |
|
s.startedFlowMapNum++ |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '}': |
|
if !ctx.existsBuffer() || s.startedFlowMapNum > 0 { |
|
ctx.addToken(s.bufferedToken(ctx)) |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos())) |
|
s.startedFlowMapNum-- |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '.': |
|
if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('.') == 3 { |
|
ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos())) |
|
s.progressColumn(ctx, 3) |
|
pos += 2 |
|
return |
|
} |
|
case '<': |
|
if s.isMergeKey(ctx) { |
|
s.prevIndentColumn = s.column |
|
ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos())) |
|
s.progressColumn(ctx, 1) |
|
pos++ |
|
return |
|
} |
|
case '-': |
|
if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('-') == 3 { |
|
s.addBufferedTokenIfExists(ctx) |
|
ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos())) |
|
s.progressColumn(ctx, 3) |
|
pos += 2 |
|
return |
|
} |
|
if ctx.existsBuffer() && s.isChangedToIndentStateUp() { |
|
// raw folded |
|
ctx.isRawFolded = true |
|
ctx.addBuf(c) |
|
ctx.addOriginBuf(c) |
|
s.progressColumn(ctx, 1) |
|
continue |
|
} |
|
if ctx.existsBuffer() { |
|
// '-' is literal |
|
ctx.addBuf(c) |
|
ctx.addOriginBuf(c) |
|
s.progressColumn(ctx, 1) |
|
continue |
|
} |
|
nc := ctx.nextChar() |
|
if nc == ' ' || s.isNewLineChar(nc) { |
|
s.addBufferedTokenIfExists(ctx) |
|
ctx.addOriginBuf(c) |
|
tk := token.SequenceEntry(string(ctx.obuf), s.pos()) |
|
s.prevIndentColumn = tk.Position.Column |
|
ctx.addToken(tk) |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '[': |
|
if !ctx.existsBuffer() { |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos())) |
|
s.startedFlowSequenceNum++ |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case ']': |
|
if !ctx.existsBuffer() || s.startedFlowSequenceNum > 0 { |
|
s.addBufferedTokenIfExists(ctx) |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos())) |
|
s.startedFlowSequenceNum-- |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case ',': |
|
if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 { |
|
s.addBufferedTokenIfExists(ctx) |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos())) |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case ':': |
|
nc := ctx.nextChar() |
|
if s.startedFlowMapNum > 0 || nc == ' ' || s.isNewLineChar(nc) || ctx.isNextEOS() { |
|
// mapping value |
|
tk := s.bufferedToken(ctx) |
|
if tk != nil { |
|
s.prevIndentColumn = tk.Position.Column |
|
ctx.addToken(tk) |
|
} |
|
ctx.addToken(token.MappingValue(s.pos())) |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '|', '>': |
|
if !ctx.existsBuffer() { |
|
progress, err := s.scanLiteralHeader(ctx) |
|
if err != nil { |
|
// TODO: returns syntax error object |
|
return |
|
} |
|
s.progressColumn(ctx, progress) |
|
s.progressLine(ctx) |
|
continue |
|
} |
|
case '!': |
|
if !ctx.existsBuffer() { |
|
token, progress := s.scanTag(ctx) |
|
ctx.addToken(token) |
|
s.progressColumn(ctx, progress) |
|
if c := ctx.previousChar(); s.isNewLineChar(c) { |
|
s.progressLine(ctx) |
|
} |
|
pos += progress |
|
return |
|
} |
|
case '%': |
|
if !ctx.existsBuffer() && s.indentNum == 0 { |
|
ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos())) |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '?': |
|
nc := ctx.nextChar() |
|
if !ctx.existsBuffer() && nc == ' ' { |
|
ctx.addToken(token.MappingKey(s.pos())) |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '&': |
|
if !ctx.existsBuffer() { |
|
s.addBufferedTokenIfExists(ctx) |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.Anchor(string(ctx.obuf), s.pos())) |
|
s.progressColumn(ctx, 1) |
|
s.isAnchor = true |
|
return |
|
} |
|
case '*': |
|
if !ctx.existsBuffer() { |
|
s.addBufferedTokenIfExists(ctx) |
|
ctx.addOriginBuf(c) |
|
ctx.addToken(token.Alias(string(ctx.obuf), s.pos())) |
|
s.progressColumn(ctx, 1) |
|
return |
|
} |
|
case '#': |
|
if !ctx.existsBuffer() || ctx.previousChar() == ' ' { |
|
s.addBufferedTokenIfExists(ctx) |
|
token, progress := s.scanComment(ctx) |
|
ctx.addToken(token) |
|
s.progressColumn(ctx, progress) |
|
s.progressLine(ctx) |
|
pos += progress |
|
return |
|
} |
|
case '\'', '"': |
|
if !ctx.existsBuffer() { |
|
token, progress := s.scanQuote(ctx, c) |
|
ctx.addToken(token) |
|
pos += progress |
|
return |
|
} |
|
case '\r', '\n': |
|
// There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec. |
|
// > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character. |
|
// > Outside scalar content, YAML allows any line break to be used to terminate lines. |
|
// > -- https://yaml.org/spec/1.2/spec.html |
|
if c == '\r' && ctx.nextChar() == '\n' { |
|
ctx.addOriginBuf('\r') |
|
ctx.progress(1) |
|
c = '\n' |
|
} |
|
s.scanNewLine(ctx, c) |
|
continue |
|
case ' ': |
|
if ctx.isSaveIndentMode() || (!s.isAnchor && !s.isFirstCharAtLine) { |
|
ctx.addBuf(c) |
|
ctx.addOriginBuf(c) |
|
s.progressColumn(ctx, 1) |
|
continue |
|
} |
|
if s.isFirstCharAtLine { |
|
s.progressColumn(ctx, 1) |
|
ctx.addOriginBuf(c) |
|
continue |
|
} |
|
s.addBufferedTokenIfExists(ctx) |
|
pos-- // to rescan white space at next scanning for adding white space to next buffer. |
|
s.isAnchor = false |
|
return |
|
} |
|
ctx.addBuf(c) |
|
ctx.addOriginBuf(c) |
|
s.progressColumn(ctx, 1) |
|
} |
|
s.addBufferedTokenIfExists(ctx) |
|
return |
|
} |
|
|
|
// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src. |
|
func (s *Scanner) Init(text string) { |
|
src := []rune(text) |
|
s.source = src |
|
s.sourcePos = 0 |
|
s.sourceSize = len(src) |
|
s.line = 1 |
|
s.column = 1 |
|
s.offset = 1 |
|
s.prevIndentLevel = 0 |
|
s.prevIndentNum = 0 |
|
s.prevIndentColumn = 0 |
|
s.indentLevel = 0 |
|
s.indentNum = 0 |
|
s.isFirstCharAtLine = true |
|
} |
|
|
|
// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF. |
|
func (s *Scanner) Scan() (token.Tokens, error) { |
|
if s.sourcePos >= s.sourceSize { |
|
return nil, io.EOF |
|
} |
|
ctx := newContext(s.source[s.sourcePos:]) |
|
defer ctx.release() |
|
progress := s.scan(ctx) |
|
s.sourcePos += progress |
|
var tokens token.Tokens |
|
tokens = append(tokens, ctx.tokens...) |
|
return tokens, nil |
|
}
|
|
|