You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
377 lines
9.5 KiB
377 lines
9.5 KiB
3 years ago
|
// Copyright 2014 The Go Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package cases
|
||
|
|
||
|
import "golang.org/x/text/transform"
|
||
|
|
||
|
// A context is used for iterating over source bytes, fetching case info and
|
||
|
// writing to a destination buffer.
|
||
|
//
|
||
|
// Casing operations may need more than one rune of context to decide how a rune
|
||
|
// should be cased. Casing implementations should call checkpoint on context
|
||
|
// whenever it is known to be safe to return the runes processed so far.
|
||
|
//
|
||
|
// It is recommended for implementations to not allow for more than 30 case
|
||
|
// ignorables as lookahead (analogous to the limit in norm) and to use state if
|
||
|
// unbounded lookahead is needed for cased runes.
|
||
|
type context struct {
|
||
|
dst, src []byte
|
||
|
atEOF bool
|
||
|
|
||
|
pDst int // pDst points past the last written rune in dst.
|
||
|
pSrc int // pSrc points to the start of the currently scanned rune.
|
||
|
|
||
|
// checkpoints safe to return in Transform, where nDst <= pDst and nSrc <= pSrc.
|
||
|
nDst, nSrc int
|
||
|
err error
|
||
|
|
||
|
sz int // size of current rune
|
||
|
info info // case information of currently scanned rune
|
||
|
|
||
|
// State preserved across calls to Transform.
|
||
|
isMidWord bool // false if next cased letter needs to be title-cased.
|
||
|
}
|
||
|
|
||
|
func (c *context) Reset() {
|
||
|
c.isMidWord = false
|
||
|
}
|
||
|
|
||
|
// ret returns the return values for the Transform method. It checks whether
|
||
|
// there were insufficient bytes in src to complete and introduces an error
|
||
|
// accordingly, if necessary.
|
||
|
func (c *context) ret() (nDst, nSrc int, err error) {
|
||
|
if c.err != nil || c.nSrc == len(c.src) {
|
||
|
return c.nDst, c.nSrc, c.err
|
||
|
}
|
||
|
// This point is only reached by mappers if there was no short destination
|
||
|
// buffer. This means that the source buffer was exhausted and that c.sz was
|
||
|
// set to 0 by next.
|
||
|
if c.atEOF && c.pSrc == len(c.src) {
|
||
|
return c.pDst, c.pSrc, nil
|
||
|
}
|
||
|
return c.nDst, c.nSrc, transform.ErrShortSrc
|
||
|
}
|
||
|
|
||
|
// retSpan returns the return values for the Span method. It checks whether
|
||
|
// there were insufficient bytes in src to complete and introduces an error
|
||
|
// accordingly, if necessary.
|
||
|
func (c *context) retSpan() (n int, err error) {
|
||
|
_, nSrc, err := c.ret()
|
||
|
return nSrc, err
|
||
|
}
|
||
|
|
||
|
// checkpoint sets the return value buffer points for Transform to the current
|
||
|
// positions.
|
||
|
func (c *context) checkpoint() {
|
||
|
if c.err == nil {
|
||
|
c.nDst, c.nSrc = c.pDst, c.pSrc+c.sz
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// unreadRune causes the last rune read by next to be reread on the next
|
||
|
// invocation of next. Only one unreadRune may be called after a call to next.
|
||
|
func (c *context) unreadRune() {
|
||
|
c.sz = 0
|
||
|
}
|
||
|
|
||
|
func (c *context) next() bool {
|
||
|
c.pSrc += c.sz
|
||
|
if c.pSrc == len(c.src) || c.err != nil {
|
||
|
c.info, c.sz = 0, 0
|
||
|
return false
|
||
|
}
|
||
|
v, sz := trie.lookup(c.src[c.pSrc:])
|
||
|
c.info, c.sz = info(v), sz
|
||
|
if c.sz == 0 {
|
||
|
if c.atEOF {
|
||
|
// A zero size means we have an incomplete rune. If we are atEOF,
|
||
|
// this means it is an illegal rune, which we will consume one
|
||
|
// byte at a time.
|
||
|
c.sz = 1
|
||
|
} else {
|
||
|
c.err = transform.ErrShortSrc
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// writeBytes adds bytes to dst.
|
||
|
func (c *context) writeBytes(b []byte) bool {
|
||
|
if len(c.dst)-c.pDst < len(b) {
|
||
|
c.err = transform.ErrShortDst
|
||
|
return false
|
||
|
}
|
||
|
// This loop is faster than using copy.
|
||
|
for _, ch := range b {
|
||
|
c.dst[c.pDst] = ch
|
||
|
c.pDst++
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// writeString writes the given string to dst.
|
||
|
func (c *context) writeString(s string) bool {
|
||
|
if len(c.dst)-c.pDst < len(s) {
|
||
|
c.err = transform.ErrShortDst
|
||
|
return false
|
||
|
}
|
||
|
// This loop is faster than using copy.
|
||
|
for i := 0; i < len(s); i++ {
|
||
|
c.dst[c.pDst] = s[i]
|
||
|
c.pDst++
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// copy writes the current rune to dst.
|
||
|
func (c *context) copy() bool {
|
||
|
return c.writeBytes(c.src[c.pSrc : c.pSrc+c.sz])
|
||
|
}
|
||
|
|
||
|
// copyXOR copies the current rune to dst and modifies it by applying the XOR
|
||
|
// pattern of the case info. It is the responsibility of the caller to ensure
|
||
|
// that this is a rune with a XOR pattern defined.
|
||
|
func (c *context) copyXOR() bool {
|
||
|
if !c.copy() {
|
||
|
return false
|
||
|
}
|
||
|
if c.info&xorIndexBit == 0 {
|
||
|
// Fast path for 6-bit XOR pattern, which covers most cases.
|
||
|
c.dst[c.pDst-1] ^= byte(c.info >> xorShift)
|
||
|
} else {
|
||
|
// Interpret XOR bits as an index.
|
||
|
// TODO: test performance for unrolling this loop. Verify that we have
|
||
|
// at least two bytes and at most three.
|
||
|
idx := c.info >> xorShift
|
||
|
for p := c.pDst - 1; ; p-- {
|
||
|
c.dst[p] ^= xorData[idx]
|
||
|
idx--
|
||
|
if xorData[idx] == 0 {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// hasPrefix returns true if src[pSrc:] starts with the given string.
|
||
|
func (c *context) hasPrefix(s string) bool {
|
||
|
b := c.src[c.pSrc:]
|
||
|
if len(b) < len(s) {
|
||
|
return false
|
||
|
}
|
||
|
for i, c := range b[:len(s)] {
|
||
|
if c != s[i] {
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// caseType returns an info with only the case bits, normalized to either
|
||
|
// cLower, cUpper, cTitle or cUncased.
|
||
|
func (c *context) caseType() info {
|
||
|
cm := c.info & 0x7
|
||
|
if cm < 4 {
|
||
|
return cm
|
||
|
}
|
||
|
if cm >= cXORCase {
|
||
|
// xor the last bit of the rune with the case type bits.
|
||
|
b := c.src[c.pSrc+c.sz-1]
|
||
|
return info(b&1) ^ cm&0x3
|
||
|
}
|
||
|
if cm == cIgnorableCased {
|
||
|
return cLower
|
||
|
}
|
||
|
return cUncased
|
||
|
}
|
||
|
|
||
|
// lower writes the lowercase version of the current rune to dst.
|
||
|
func lower(c *context) bool {
|
||
|
ct := c.caseType()
|
||
|
if c.info&hasMappingMask == 0 || ct == cLower {
|
||
|
return c.copy()
|
||
|
}
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
return c.copyXOR()
|
||
|
}
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||
|
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||
|
return c.writeString(e[offset : offset+nLower])
|
||
|
}
|
||
|
return c.copy()
|
||
|
}
|
||
|
|
||
|
func isLower(c *context) bool {
|
||
|
ct := c.caseType()
|
||
|
if c.info&hasMappingMask == 0 || ct == cLower {
|
||
|
return true
|
||
|
}
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
if nLower := (e[1] >> lengthBits) & lengthMask; nLower != noChange {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// upper writes the uppercase version of the current rune to dst.
|
||
|
func upper(c *context) bool {
|
||
|
ct := c.caseType()
|
||
|
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||
|
return c.copy()
|
||
|
}
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
return c.copyXOR()
|
||
|
}
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||
|
// Get length of first special case mapping.
|
||
|
n := (e[1] >> lengthBits) & lengthMask
|
||
|
if ct == cTitle {
|
||
|
// The first special case mapping is for lower. Set n to the second.
|
||
|
if n == noChange {
|
||
|
n = 0
|
||
|
}
|
||
|
n, e = e[1]&lengthMask, e[n:]
|
||
|
}
|
||
|
if n != noChange {
|
||
|
return c.writeString(e[offset : offset+n])
|
||
|
}
|
||
|
return c.copy()
|
||
|
}
|
||
|
|
||
|
// isUpper writes the isUppercase version of the current rune to dst.
|
||
|
func isUpper(c *context) bool {
|
||
|
ct := c.caseType()
|
||
|
if c.info&hasMappingMask == 0 || ct == cUpper {
|
||
|
return true
|
||
|
}
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
// Get length of first special case mapping.
|
||
|
n := (e[1] >> lengthBits) & lengthMask
|
||
|
if ct == cTitle {
|
||
|
n = e[1] & lengthMask
|
||
|
}
|
||
|
if n != noChange {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// title writes the title case version of the current rune to dst.
|
||
|
func title(c *context) bool {
|
||
|
ct := c.caseType()
|
||
|
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||
|
return c.copy()
|
||
|
}
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
if ct == cLower {
|
||
|
return c.copyXOR()
|
||
|
}
|
||
|
return c.copy()
|
||
|
}
|
||
|
// Get the exception data.
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
offset := 2 + e[0]&lengthMask // size of header + fold string
|
||
|
|
||
|
nFirst := (e[1] >> lengthBits) & lengthMask
|
||
|
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||
|
if nFirst != noChange {
|
||
|
e = e[nFirst:]
|
||
|
}
|
||
|
return c.writeString(e[offset : offset+nTitle])
|
||
|
}
|
||
|
if ct == cLower && nFirst != noChange {
|
||
|
// Use the uppercase version instead.
|
||
|
return c.writeString(e[offset : offset+nFirst])
|
||
|
}
|
||
|
// Already in correct case.
|
||
|
return c.copy()
|
||
|
}
|
||
|
|
||
|
// isTitle reports whether the current rune is in title case.
|
||
|
func isTitle(c *context) bool {
|
||
|
ct := c.caseType()
|
||
|
if c.info&hasMappingMask == 0 || ct == cTitle {
|
||
|
return true
|
||
|
}
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
if ct == cLower {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
// Get the exception data.
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
if nTitle := e[1] & lengthMask; nTitle != noChange {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
nFirst := (e[1] >> lengthBits) & lengthMask
|
||
|
if ct == cLower && nFirst != noChange {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
|
||
|
// foldFull writes the foldFull version of the current rune to dst.
|
||
|
func foldFull(c *context) bool {
|
||
|
if c.info&hasMappingMask == 0 {
|
||
|
return c.copy()
|
||
|
}
|
||
|
ct := c.caseType()
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||
|
return c.copyXOR()
|
||
|
}
|
||
|
return c.copy()
|
||
|
}
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
n := e[0] & lengthMask
|
||
|
if n == 0 {
|
||
|
if ct == cLower {
|
||
|
return c.copy()
|
||
|
}
|
||
|
n = (e[1] >> lengthBits) & lengthMask
|
||
|
}
|
||
|
return c.writeString(e[2 : 2+n])
|
||
|
}
|
||
|
|
||
|
// isFoldFull reports whether the current run is mapped to foldFull
|
||
|
func isFoldFull(c *context) bool {
|
||
|
if c.info&hasMappingMask == 0 {
|
||
|
return true
|
||
|
}
|
||
|
ct := c.caseType()
|
||
|
if c.info&exceptionBit == 0 {
|
||
|
if ct != cLower || c.info&inverseFoldBit != 0 {
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|
||
|
return true
|
||
|
}
|
||
|
e := exceptions[c.info>>exceptionShift:]
|
||
|
n := e[0] & lengthMask
|
||
|
if n == 0 && ct == cLower {
|
||
|
return true
|
||
|
}
|
||
|
c.err = transform.ErrEndOfSpan
|
||
|
return false
|
||
|
}
|