diff options
Diffstat (limited to 'vendor/golang.org/x/text/internal/colltab/collelem.go')
-rw-r--r-- | vendor/golang.org/x/text/internal/colltab/collelem.go | 371 |
1 files changed, 0 insertions, 371 deletions
diff --git a/vendor/golang.org/x/text/internal/colltab/collelem.go b/vendor/golang.org/x/text/internal/colltab/collelem.go deleted file mode 100644 index 396cebd..0000000 --- a/vendor/golang.org/x/text/internal/colltab/collelem.go +++ /dev/null @@ -1,371 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package colltab - -import ( - "fmt" - "unicode" -) - -// Level identifies the collation comparison level. -// The primary level corresponds to the basic sorting of text. -// The secondary level corresponds to accents and related linguistic elements. -// The tertiary level corresponds to casing and related concepts. -// The quaternary level is derived from the other levels by the -// various algorithms for handling variable elements. -type Level int - -const ( - Primary Level = iota - Secondary - Tertiary - Quaternary - Identity - - NumLevels -) - -const ( - defaultSecondary = 0x20 - defaultTertiary = 0x2 - maxTertiary = 0x1F - MaxQuaternary = 0x1FFFFF // 21 bits. -) - -// Elem is a representation of a collation element. This API provides ways to encode -// and decode Elems. Implementations of collation tables may use values greater -// or equal to PrivateUse for their own purposes. However, these should never be -// returned by AppendNext. -type Elem uint32 - -const ( - maxCE Elem = 0xAFFFFFFF - PrivateUse = minContract - minContract = 0xC0000000 - maxContract = 0xDFFFFFFF - minExpand = 0xE0000000 - maxExpand = 0xEFFFFFFF - minDecomp = 0xF0000000 -) - -type ceType int - -const ( - ceNormal ceType = iota // ceNormal includes implicits (ce == 0) - ceContractionIndex // rune can be a start of a contraction - ceExpansionIndex // rune expands into a sequence of collation elements - ceDecompose // rune expands using NFKC decomposition -) - -func (ce Elem) ctype() ceType { - if ce <= maxCE { - return ceNormal - } - if ce <= maxContract { - return ceContractionIndex - } else { - if ce <= maxExpand { - return ceExpansionIndex - } - return ceDecompose - } - panic("should not reach here") - return ceType(-1) -} - -// For normal collation elements, we assume that a collation element either has -// a primary or non-default secondary value, not both. -// Collation elements with a primary value are of the form -// 01pppppp pppppppp ppppppp0 ssssssss -// - p* is primary collation value -// - s* is the secondary collation value -// 00pppppp pppppppp ppppppps sssttttt, where -// - p* is primary collation value -// - s* offset of secondary from default value. -// - t* is the tertiary collation value -// 100ttttt cccccccc pppppppp pppppppp -// - t* is the tertiar collation value -// - c* is the canonical combining class -// - p* is the primary collation value -// Collation elements with a secondary value are of the form -// 1010cccc ccccssss ssssssss tttttttt, where -// - c* is the canonical combining class -// - s* is the secondary collation value -// - t* is the tertiary collation value -// 11qqqqqq qqqqqqqq qqqqqqq0 00000000 -// - q* quaternary value -const ( - ceTypeMask = 0xC0000000 - ceTypeMaskExt = 0xE0000000 - ceIgnoreMask = 0xF00FFFFF - ceType1 = 0x40000000 - ceType2 = 0x00000000 - ceType3or4 = 0x80000000 - ceType4 = 0xA0000000 - ceTypeQ = 0xC0000000 - Ignore = ceType4 - firstNonPrimary = 0x80000000 - lastSpecialPrimary = 0xA0000000 - secondaryMask = 0x80000000 - hasTertiaryMask = 0x40000000 - primaryValueMask = 0x3FFFFE00 - maxPrimaryBits = 21 - compactPrimaryBits = 16 - maxSecondaryBits = 12 - maxTertiaryBits = 8 - maxCCCBits = 8 - maxSecondaryCompactBits = 8 - maxSecondaryDiffBits = 4 - maxTertiaryCompactBits = 5 - primaryShift = 9 - compactSecondaryShift = 5 - minCompactSecondary = defaultSecondary - 4 -) - -func makeImplicitCE(primary int) Elem { - return ceType1 | Elem(primary<<primaryShift) | defaultSecondary -} - -// MakeElem returns an Elem for the given values. It will return an error -// if the given combination of values is invalid. -func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) { - if w := primary; w >= 1<<maxPrimaryBits || w < 0 { - return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits) - } - if w := secondary; w >= 1<<maxSecondaryBits || w < 0 { - return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits) - } - if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 { - return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits) - } - ce := Elem(0) - if primary != 0 { - if ccc != 0 { - if primary >= 1<<compactPrimaryBits { - return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits) - } - if secondary != defaultSecondary { - return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc) - } - ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits)) - ce |= Elem(ccc) << compactPrimaryBits - ce |= Elem(primary) - ce |= ceType3or4 - } else if tertiary == defaultTertiary { - if secondary >= 1<<maxSecondaryCompactBits { - return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits) - } - ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary) - ce |= ceType1 - } else { - d := secondary - defaultSecondary + maxSecondaryDiffBits - if d >= 1<<maxSecondaryDiffBits || d < 0 { - return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits) - } - if tertiary >= 1<<maxTertiaryCompactBits { - return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits) - } - ce = Elem(primary<<maxSecondaryDiffBits + d) - ce = ce<<maxTertiaryCompactBits + Elem(tertiary) - } - } else { - ce = Elem(secondary<<maxTertiaryBits + tertiary) - ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits) - ce |= ceType4 - } - return ce, nil -} - -// MakeQuaternary returns an Elem with the given quaternary value. -func MakeQuaternary(v int) Elem { - return ceTypeQ | Elem(v<<primaryShift) -} - -// Mask sets weights for any level smaller than l to 0. -// The resulting Elem can be used to test for equality with -// other Elems to which the same mask has been applied. -func (ce Elem) Mask(l Level) uint32 { - return 0 -} - -// CCC returns the canonical combining class associated with the underlying character, -// if applicable, or 0 otherwise. -func (ce Elem) CCC() uint8 { - if ce&ceType3or4 != 0 { - if ce&ceType4 == ceType3or4 { - return uint8(ce >> 16) - } - return uint8(ce >> 20) - } - return 0 -} - -// Primary returns the primary collation weight for ce. -func (ce Elem) Primary() int { - if ce >= firstNonPrimary { - if ce > lastSpecialPrimary { - return 0 - } - return int(uint16(ce)) - } - return int(ce&primaryValueMask) >> primaryShift -} - -// Secondary returns the secondary collation weight for ce. -func (ce Elem) Secondary() int { - switch ce & ceTypeMask { - case ceType1: - return int(uint8(ce)) - case ceType2: - return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF) - case ceType3or4: - if ce < ceType4 { - return defaultSecondary - } - return int(ce>>8) & 0xFFF - case ceTypeQ: - return 0 - } - panic("should not reach here") -} - -// Tertiary returns the tertiary collation weight for ce. -func (ce Elem) Tertiary() uint8 { - if ce&hasTertiaryMask == 0 { - if ce&ceType3or4 == 0 { - return uint8(ce & 0x1F) - } - if ce&ceType4 == ceType4 { - return uint8(ce) - } - return uint8(ce>>24) & 0x1F // type 2 - } else if ce&ceTypeMask == ceType1 { - return defaultTertiary - } - // ce is a quaternary value. - return 0 -} - -func (ce Elem) updateTertiary(t uint8) Elem { - if ce&ceTypeMask == ceType1 { - // convert to type 4 - nce := ce & primaryValueMask - nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift - ce = nce - } else if ce&ceTypeMaskExt == ceType3or4 { - ce &= ^Elem(maxTertiary << 24) - return ce | (Elem(t) << 24) - } else { - // type 2 or 4 - ce &= ^Elem(maxTertiary) - } - return ce | Elem(t) -} - -// Quaternary returns the quaternary value if explicitly specified, -// 0 if ce == Ignore, or MaxQuaternary otherwise. -// Quaternary values are used only for shifted variants. -func (ce Elem) Quaternary() int { - if ce&ceTypeMask == ceTypeQ { - return int(ce&primaryValueMask) >> primaryShift - } else if ce&ceIgnoreMask == Ignore { - return 0 - } - return MaxQuaternary -} - -// Weight returns the collation weight for the given level. -func (ce Elem) Weight(l Level) int { - switch l { - case Primary: - return ce.Primary() - case Secondary: - return ce.Secondary() - case Tertiary: - return int(ce.Tertiary()) - case Quaternary: - return ce.Quaternary() - } - return 0 // return 0 (ignore) for undefined levels. -} - -// For contractions, collation elements are of the form -// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where -// - n* is the size of the first node in the contraction trie. -// - i* is the index of the first node in the contraction trie. -// - b* is the offset into the contraction collation element table. -// See contract.go for details on the contraction trie. -const ( - maxNBits = 4 - maxTrieIndexBits = 12 - maxContractOffsetBits = 13 -) - -func splitContractIndex(ce Elem) (index, n, offset int) { - n = int(ce & (1<<maxNBits - 1)) - ce >>= maxNBits - index = int(ce & (1<<maxTrieIndexBits - 1)) - ce >>= maxTrieIndexBits - offset = int(ce & (1<<maxContractOffsetBits - 1)) - return -} - -// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb, -// where b* is the index into the expansion sequence table. -const maxExpandIndexBits = 16 - -func splitExpandIndex(ce Elem) (index int) { - return int(uint16(ce)) -} - -// Some runes can be expanded using NFKD decomposition. Instead of storing the full -// sequence of collation elements, we decompose the rune and lookup the collation -// elements for each rune in the decomposition and modify the tertiary weights. -// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where -// - v* is the replacement tertiary weight for the first rune, -// - w* is the replacement tertiary weight for the second rune, -// Tertiary weights of subsequent runes should be replaced with maxTertiary. -// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. -func splitDecompose(ce Elem) (t1, t2 uint8) { - return uint8(ce), uint8(ce >> 8) -} - -const ( - // These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. - minUnified rune = 0x4E00 - maxUnified = 0x9FFF - minCompatibility = 0xF900 - maxCompatibility = 0xFAFF - minRare = 0x3400 - maxRare = 0x4DBF -) -const ( - commonUnifiedOffset = 0x10000 - rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF - otherOffset = 0x50000 // largest rune in rare is U+2FA1D - illegalOffset = otherOffset + int(unicode.MaxRune) - maxPrimary = illegalOffset + 1 -) - -// implicitPrimary returns the primary weight for the a rune -// for which there is no entry for the rune in the collation table. -// We take a different approach from the one specified in -// https://unicode.org/reports/tr10/#Implicit_Weights, -// but preserve the resulting relative ordering of the runes. -func implicitPrimary(r rune) int { - if unicode.Is(unicode.Ideographic, r) { - if r >= minUnified && r <= maxUnified { - // The most common case for CJK. - return int(r) + commonUnifiedOffset - } - if r >= minCompatibility && r <= maxCompatibility { - // This will typically not hit. The DUCET explicitly specifies mappings - // for all characters that do not decompose. - return int(r) + commonUnifiedOffset - } - return int(r) + rareUnifiedOffset - } - return int(r) + otherOffset -} |