dep prune
This commit is contained in:
parent
65441fa5b3
commit
3a2d24baca
1564 changed files with 0 additions and 638818 deletions
198
vendor/golang.org/x/text/unicode/bidi/bidi.go
generated
vendored
198
vendor/golang.org/x/text/unicode/bidi/bidi.go
generated
vendored
|
|
@ -1,198 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_trieval.go gen_ranges.go
|
||||
|
||||
// Package bidi contains functionality for bidirectional text support.
|
||||
//
|
||||
// See http://www.unicode.org/reports/tr9.
|
||||
//
|
||||
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
|
||||
// and without notice.
|
||||
package bidi // import "golang.org/x/text/unicode/bidi"
|
||||
|
||||
// TODO:
|
||||
// The following functionality would not be hard to implement, but hinges on
|
||||
// the definition of a Segmenter interface. For now this is up to the user.
|
||||
// - Iterate over paragraphs
|
||||
// - Segmenter to iterate over runs directly from a given text.
|
||||
// Also:
|
||||
// - Transformer for reordering?
|
||||
// - Transformer (validator, really) for Bidi Rule.
|
||||
|
||||
// This API tries to avoid dealing with embedding levels for now. Under the hood
|
||||
// these will be computed, but the question is to which extent the user should
|
||||
// know they exist. We should at some point allow the user to specify an
|
||||
// embedding hierarchy, though.
|
||||
|
||||
// A Direction indicates the overall flow of text.
|
||||
type Direction int
|
||||
|
||||
const (
|
||||
// LeftToRight indicates the text contains no right-to-left characters and
|
||||
// that either there are some left-to-right characters or the option
|
||||
// DefaultDirection(LeftToRight) was passed.
|
||||
LeftToRight Direction = iota
|
||||
|
||||
// RightToLeft indicates the text contains no left-to-right characters and
|
||||
// that either there are some right-to-left characters or the option
|
||||
// DefaultDirection(RightToLeft) was passed.
|
||||
RightToLeft
|
||||
|
||||
// Mixed indicates text contains both left-to-right and right-to-left
|
||||
// characters.
|
||||
Mixed
|
||||
|
||||
// Neutral means that text contains no left-to-right and right-to-left
|
||||
// characters and that no default direction has been set.
|
||||
Neutral
|
||||
)
|
||||
|
||||
type options struct{}
|
||||
|
||||
// An Option is an option for Bidi processing.
|
||||
type Option func(*options)
|
||||
|
||||
// ICU allows the user to define embedding levels. This may be used, for example,
|
||||
// to use hierarchical structure of markup languages to define embeddings.
|
||||
// The following option may be a way to expose this functionality in this API.
|
||||
// // LevelFunc sets a function that associates nesting levels with the given text.
|
||||
// // The levels function will be called with monotonically increasing values for p.
|
||||
// func LevelFunc(levels func(p int) int) Option {
|
||||
// panic("unimplemented")
|
||||
// }
|
||||
|
||||
// DefaultDirection sets the default direction for a Paragraph. The direction is
|
||||
// overridden if the text contains directional characters.
|
||||
func DefaultDirection(d Direction) Option {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// A Paragraph holds a single Paragraph for Bidi processing.
|
||||
type Paragraph struct {
|
||||
// buffers
|
||||
}
|
||||
|
||||
// SetBytes configures p for the given paragraph text. It replaces text
|
||||
// previously set by SetBytes or SetString. If b contains a paragraph separator
|
||||
// it will only process the first paragraph and report the number of bytes
|
||||
// consumed from b including this separator. Error may be non-nil if options are
|
||||
// given.
|
||||
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// SetString configures p for the given paragraph text. It replaces text
|
||||
// previously set by SetBytes or SetString. If b contains a paragraph separator
|
||||
// it will only process the first paragraph and report the number of bytes
|
||||
// consumed from b including this separator. Error may be non-nil if options are
|
||||
// given.
|
||||
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// IsLeftToRight reports whether the principle direction of rendering for this
|
||||
// paragraphs is left-to-right. If this returns false, the principle direction
|
||||
// of rendering is right-to-left.
|
||||
func (p *Paragraph) IsLeftToRight() bool {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Direction returns the direction of the text of this paragraph.
|
||||
//
|
||||
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
|
||||
func (p *Paragraph) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// RunAt reports the Run at the given position of the input text.
|
||||
//
|
||||
// This method can be used for computing line breaks on paragraphs.
|
||||
func (p *Paragraph) RunAt(pos int) Run {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Order computes the visual ordering of all the runs in a Paragraph.
|
||||
func (p *Paragraph) Order() (Ordering, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Line computes the visual ordering of runs for a single line starting and
|
||||
// ending at the given positions in the original text.
|
||||
func (p *Paragraph) Line(start, end int) (Ordering, error) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// An Ordering holds the computed visual order of runs of a Paragraph. Calling
|
||||
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
|
||||
// The methods of an Ordering should only be called by one goroutine at a time.
|
||||
type Ordering struct{}
|
||||
|
||||
// Direction reports the directionality of the runs.
|
||||
//
|
||||
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
|
||||
func (o *Ordering) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// NumRuns returns the number of runs.
|
||||
func (o *Ordering) NumRuns() int {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Run returns the ith run within the ordering.
|
||||
func (o *Ordering) Run(i int) Run {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// TODO: perhaps with options.
|
||||
// // Reorder creates a reader that reads the runes in visual order per character.
|
||||
// // Modifiers remain after the runes they modify.
|
||||
// func (l *Runs) Reorder() io.Reader {
|
||||
// panic("unimplemented")
|
||||
// }
|
||||
|
||||
// A Run is a continuous sequence of characters of a single direction.
|
||||
type Run struct {
|
||||
}
|
||||
|
||||
// String returns the text of the run in its original order.
|
||||
func (r *Run) String() string {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Bytes returns the text of the run in its original order.
|
||||
func (r *Run) Bytes() []byte {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// TODO: methods for
|
||||
// - Display order
|
||||
// - headers and footers
|
||||
// - bracket replacement.
|
||||
|
||||
// Direction reports the direction of the run.
|
||||
func (r *Run) Direction() Direction {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// Position of the Run within the text passed to SetBytes or SetString of the
|
||||
// originating Paragraph value.
|
||||
func (r *Run) Pos() (start, end int) {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// AppendReverse reverses the order of characters of in, appends them to out,
|
||||
// and returns the result. Modifiers will still follow the runes they modify.
|
||||
// Brackets are replaced with their counterparts.
|
||||
func AppendReverse(out, in []byte) []byte {
|
||||
panic("unimplemented")
|
||||
}
|
||||
|
||||
// ReverseString reverses the order of characters in s and returns a new string.
|
||||
// Modifiers will still follow the runes they modify. Brackets are replaced with
|
||||
// their counterparts.
|
||||
func ReverseString(s string) string {
|
||||
panic("unimplemented")
|
||||
}
|
||||
335
vendor/golang.org/x/text/unicode/bidi/bracket.go
generated
vendored
335
vendor/golang.org/x/text/unicode/bidi/bracket.go
generated
vendored
|
|
@ -1,335 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"fmt"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// This file contains a port of the reference implementation of the
|
||||
// Bidi Parentheses Algorithm:
|
||||
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
|
||||
//
|
||||
// The implementation in this file covers definitions BD14-BD16 and rule N0
|
||||
// of UAX#9.
|
||||
//
|
||||
// Some preprocessing is done for each rune before data is passed to this
|
||||
// algorithm:
|
||||
// - opening and closing brackets are identified
|
||||
// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
|
||||
// is identical for the opening and closing bracket. It is left to do these
|
||||
// mappings.
|
||||
// - The BPA algorithm requires that bracket characters that are canonical
|
||||
// equivalents of each other be able to be substituted for each other.
|
||||
// It is the responsibility of the caller to do this canonicalization.
|
||||
//
|
||||
// In implementing BD16, this implementation departs slightly from the "logical"
|
||||
// algorithm defined in UAX#9. In particular, the stack referenced there
|
||||
// supports operations that go beyond a "basic" stack. An equivalent
|
||||
// implementation based on a linked list is used here.
|
||||
|
||||
// Bidi_Paired_Bracket_Type
|
||||
// BD14. An opening paired bracket is a character whose
|
||||
// Bidi_Paired_Bracket_Type property value is Open.
|
||||
//
|
||||
// BD15. A closing paired bracket is a character whose
|
||||
// Bidi_Paired_Bracket_Type property value is Close.
|
||||
type bracketType byte
|
||||
|
||||
const (
|
||||
bpNone bracketType = iota
|
||||
bpOpen
|
||||
bpClose
|
||||
)
|
||||
|
||||
// bracketPair holds a pair of index values for opening and closing bracket
|
||||
// location of a bracket pair.
|
||||
type bracketPair struct {
|
||||
opener int
|
||||
closer int
|
||||
}
|
||||
|
||||
func (b *bracketPair) String() string {
|
||||
return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
|
||||
}
|
||||
|
||||
// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
|
||||
type bracketPairs []bracketPair
|
||||
|
||||
func (b bracketPairs) Len() int { return len(b) }
|
||||
func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
|
||||
|
||||
// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
|
||||
//
|
||||
// For each rune, it takes the indexes into the original string, the class the
|
||||
// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
|
||||
// takes the direction type for the start-of-sentence and the embedding level.
|
||||
//
|
||||
// The identifiers for bracket types are the rune of the canonicalized opening
|
||||
// bracket for brackets (open or close) or 0 for runes that are not brackets.
|
||||
func resolvePairedBrackets(s *isolatingRunSequence) {
|
||||
p := bracketPairer{
|
||||
sos: s.sos,
|
||||
openers: list.New(),
|
||||
codesIsolatedRun: s.types,
|
||||
indexes: s.indexes,
|
||||
}
|
||||
dirEmbed := L
|
||||
if s.level&1 != 0 {
|
||||
dirEmbed = R
|
||||
}
|
||||
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
|
||||
p.resolveBrackets(dirEmbed, s.p.initialTypes)
|
||||
}
|
||||
|
||||
type bracketPairer struct {
|
||||
sos Class // direction corresponding to start of sequence
|
||||
|
||||
// The following is a restatement of BD 16 using non-algorithmic language.
|
||||
//
|
||||
// A bracket pair is a pair of characters consisting of an opening
|
||||
// paired bracket and a closing paired bracket such that the
|
||||
// Bidi_Paired_Bracket property value of the former equals the latter,
|
||||
// subject to the following constraints.
|
||||
// - both characters of a pair occur in the same isolating run sequence
|
||||
// - the closing character of a pair follows the opening character
|
||||
// - any bracket character can belong at most to one pair, the earliest possible one
|
||||
// - any bracket character not part of a pair is treated like an ordinary character
|
||||
// - pairs may nest properly, but their spans may not overlap otherwise
|
||||
|
||||
// Bracket characters with canonical decompositions are supposed to be
|
||||
// treated as if they had been normalized, to allow normalized and non-
|
||||
// normalized text to give the same result. In this implementation that step
|
||||
// is pushed out to the caller. The caller has to ensure that the pairValue
|
||||
// slices contain the rune of the opening bracket after normalization for
|
||||
// any opening or closing bracket.
|
||||
|
||||
openers *list.List // list of positions for opening brackets
|
||||
|
||||
// bracket pair positions sorted by location of opening bracket
|
||||
pairPositions bracketPairs
|
||||
|
||||
codesIsolatedRun []Class // directional bidi codes for an isolated run
|
||||
indexes []int // array of index values into the original string
|
||||
|
||||
}
|
||||
|
||||
// matchOpener reports whether characters at given positions form a matching
|
||||
// bracket pair.
|
||||
func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
|
||||
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
|
||||
}
|
||||
|
||||
const maxPairingDepth = 63
|
||||
|
||||
// locateBrackets locates matching bracket pairs according to BD16.
|
||||
//
|
||||
// This implementation uses a linked list instead of a stack, because, while
|
||||
// elements are added at the front (like a push) they are not generally removed
|
||||
// in atomic 'pop' operations, reducing the benefit of the stack archetype.
|
||||
func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
|
||||
// traverse the run
|
||||
// do that explicitly (not in a for-each) so we can record position
|
||||
for i, index := range p.indexes {
|
||||
|
||||
// look at the bracket type for each character
|
||||
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
|
||||
// continue scanning
|
||||
continue
|
||||
}
|
||||
switch pairTypes[index] {
|
||||
case bpOpen:
|
||||
// check if maximum pairing depth reached
|
||||
if p.openers.Len() == maxPairingDepth {
|
||||
p.openers.Init()
|
||||
return
|
||||
}
|
||||
// remember opener location, most recent first
|
||||
p.openers.PushFront(i)
|
||||
|
||||
case bpClose:
|
||||
// see if there is a match
|
||||
count := 0
|
||||
for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
|
||||
count++
|
||||
opener := elem.Value.(int)
|
||||
if p.matchOpener(pairValues, opener, i) {
|
||||
// if the opener matches, add nested pair to the ordered list
|
||||
p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
|
||||
// remove up to and including matched opener
|
||||
for ; count > 0; count-- {
|
||||
p.openers.Remove(p.openers.Front())
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
sort.Sort(p.pairPositions)
|
||||
// if we get here, the closing bracket matched no openers
|
||||
// and gets ignored
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bracket pairs within an isolating run sequence are processed as units so
|
||||
// that both the opening and the closing paired bracket in a pair resolve to
|
||||
// the same direction.
|
||||
//
|
||||
// N0. Process bracket pairs in an isolating run sequence sequentially in
|
||||
// the logical order of the text positions of the opening paired brackets
|
||||
// using the logic given below. Within this scope, bidirectional types EN
|
||||
// and AN are treated as R.
|
||||
//
|
||||
// Identify the bracket pairs in the current isolating run sequence
|
||||
// according to BD16. For each bracket-pair element in the list of pairs of
|
||||
// text positions:
|
||||
//
|
||||
// a Inspect the bidirectional types of the characters enclosed within the
|
||||
// bracket pair.
|
||||
//
|
||||
// b If any strong type (either L or R) matching the embedding direction is
|
||||
// found, set the type for both brackets in the pair to match the embedding
|
||||
// direction.
|
||||
//
|
||||
// o [ e ] o -> o e e e o
|
||||
//
|
||||
// o [ o e ] -> o e o e e
|
||||
//
|
||||
// o [ NI e ] -> o e NI e e
|
||||
//
|
||||
// c Otherwise, if a strong type (opposite the embedding direction) is
|
||||
// found, test for adjacent strong types as follows: 1 First, check
|
||||
// backwards before the opening paired bracket until the first strong type
|
||||
// (L, R, or sos) is found. If that first preceding strong type is opposite
|
||||
// the embedding direction, then set the type for both brackets in the pair
|
||||
// to that type. 2 Otherwise, set the type for both brackets in the pair to
|
||||
// the embedding direction.
|
||||
//
|
||||
// o [ o ] e -> o o o o e
|
||||
//
|
||||
// o [ o NI ] o -> o o o NI o o
|
||||
//
|
||||
// e [ o ] o -> e e o e o
|
||||
//
|
||||
// e [ o ] e -> e e o e e
|
||||
//
|
||||
// e ( o [ o ] NI ) e -> e e o o o o NI e e
|
||||
//
|
||||
// d Otherwise, do not set the type for the current bracket pair. Note that
|
||||
// if the enclosed text contains no strong types the paired brackets will
|
||||
// both resolve to the same level when resolved individually using rules N1
|
||||
// and N2.
|
||||
//
|
||||
// e ( NI ) o -> e ( NI ) o
|
||||
|
||||
// getStrongTypeN0 maps character's directional code to strong type as required
|
||||
// by rule N0.
|
||||
//
|
||||
// TODO: have separate type for "strong" directionality.
|
||||
func (p *bracketPairer) getStrongTypeN0(index int) Class {
|
||||
switch p.codesIsolatedRun[index] {
|
||||
// in the scope of N0, number types are treated as R
|
||||
case EN, AN, AL, R:
|
||||
return R
|
||||
case L:
|
||||
return L
|
||||
default:
|
||||
return ON
|
||||
}
|
||||
}
|
||||
|
||||
// classifyPairContent reports the strong types contained inside a Bracket Pair,
|
||||
// assuming the given embedding direction.
|
||||
//
|
||||
// It returns ON if no strong type is found. If a single strong type is found,
|
||||
// it returns this this type. Otherwise it returns the embedding direction.
|
||||
//
|
||||
// TODO: use separate type for "strong" directionality.
|
||||
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
|
||||
dirOpposite := ON
|
||||
for i := loc.opener + 1; i < loc.closer; i++ {
|
||||
dir := p.getStrongTypeN0(i)
|
||||
if dir == ON {
|
||||
continue
|
||||
}
|
||||
if dir == dirEmbed {
|
||||
return dir // type matching embedding direction found
|
||||
}
|
||||
dirOpposite = dir
|
||||
}
|
||||
// return ON if no strong type found, or class opposite to dirEmbed
|
||||
return dirOpposite
|
||||
}
|
||||
|
||||
// classBeforePair determines which strong types are present before a Bracket
|
||||
// Pair. Return R or L if strong type found, otherwise ON.
|
||||
func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
|
||||
for i := loc.opener - 1; i >= 0; i-- {
|
||||
if dir := p.getStrongTypeN0(i); dir != ON {
|
||||
return dir
|
||||
}
|
||||
}
|
||||
// no strong types found, return sos
|
||||
return p.sos
|
||||
}
|
||||
|
||||
// assignBracketType implements rule N0 for a single bracket pair.
|
||||
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
|
||||
// rule "N0, a", inspect contents of pair
|
||||
dirPair := p.classifyPairContent(loc, dirEmbed)
|
||||
|
||||
// dirPair is now L, R, or N (no strong type found)
|
||||
|
||||
// the following logical tests are performed out of order compared to
|
||||
// the statement of the rules but yield the same results
|
||||
if dirPair == ON {
|
||||
return // case "d" - nothing to do
|
||||
}
|
||||
|
||||
if dirPair != dirEmbed {
|
||||
// case "c": strong type found, opposite - check before (c.1)
|
||||
dirPair = p.classBeforePair(loc)
|
||||
if dirPair == dirEmbed || dirPair == ON {
|
||||
// no strong opposite type found before - use embedding (c.2)
|
||||
dirPair = dirEmbed
|
||||
}
|
||||
}
|
||||
// else: case "b", strong type found matching embedding,
|
||||
// no explicit action needed, as dirPair is already set to embedding
|
||||
// direction
|
||||
|
||||
// set the bracket types to the type found
|
||||
p.setBracketsToType(loc, dirPair, initialTypes)
|
||||
}
|
||||
|
||||
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
|
||||
p.codesIsolatedRun[loc.opener] = dirPair
|
||||
p.codesIsolatedRun[loc.closer] = dirPair
|
||||
|
||||
for i := loc.opener + 1; i < loc.closer; i++ {
|
||||
index := p.indexes[i]
|
||||
if initialTypes[index] != NSM {
|
||||
break
|
||||
}
|
||||
p.codesIsolatedRun[i] = dirPair
|
||||
}
|
||||
|
||||
for i := loc.closer + 1; i < len(p.indexes); i++ {
|
||||
index := p.indexes[i]
|
||||
if initialTypes[index] != NSM {
|
||||
break
|
||||
}
|
||||
p.codesIsolatedRun[i] = dirPair
|
||||
}
|
||||
}
|
||||
|
||||
// resolveBrackets implements rule N0 for a list of pairs.
|
||||
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
|
||||
for _, loc := range p.pairPositions {
|
||||
p.assignBracketType(loc, dirEmbed, initialTypes)
|
||||
}
|
||||
}
|
||||
1058
vendor/golang.org/x/text/unicode/bidi/core.go
generated
vendored
1058
vendor/golang.org/x/text/unicode/bidi/core.go
generated
vendored
File diff suppressed because it is too large
Load diff
224
vendor/golang.org/x/text/unicode/bidi/core_test.go
generated
vendored
224
vendor/golang.org/x/text/unicode/bidi/core_test.go
generated
vendored
|
|
@ -1,224 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
var testLevels = flag.Bool("levels", false, "enable testing of levels")
|
||||
|
||||
// TestBidiCore performs the tests in BidiTest.txt.
|
||||
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt.
|
||||
func TestBidiCore(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
r := gen.OpenUCDFile("BidiTest.txt")
|
||||
defer r.Close()
|
||||
|
||||
var wantLevels, wantOrder []string
|
||||
p := ucd.New(r, ucd.Part(func(p *ucd.Parser) {
|
||||
s := strings.Split(p.String(0), ":")
|
||||
switch s[0] {
|
||||
case "Levels":
|
||||
wantLevels = strings.Fields(s[1])
|
||||
case "Reorder":
|
||||
wantOrder = strings.Fields(s[1])
|
||||
default:
|
||||
log.Fatalf("Unknown part %q.", s[0])
|
||||
}
|
||||
}))
|
||||
|
||||
for p.Next() {
|
||||
types := []Class{}
|
||||
for _, s := range p.Strings(0) {
|
||||
types = append(types, bidiClass[s])
|
||||
}
|
||||
// We ignore the bracketing part of the algorithm.
|
||||
pairTypes := make([]bracketType, len(types))
|
||||
pairValues := make([]rune, len(types))
|
||||
|
||||
for i := uint(0); i < 3; i++ {
|
||||
if p.Uint(1)&(1<<i) == 0 {
|
||||
continue
|
||||
}
|
||||
lev := level(int(i) - 1)
|
||||
par := newParagraph(types, pairTypes, pairValues, lev)
|
||||
|
||||
if *testLevels {
|
||||
levels := par.getLevels([]int{len(types)})
|
||||
for i, s := range wantLevels {
|
||||
if s == "x" {
|
||||
continue
|
||||
}
|
||||
l, _ := strconv.ParseUint(s, 10, 8)
|
||||
if level(l)&1 != levels[i]&1 {
|
||||
t.Errorf("%s:%d:levels: got %v; want %v", p.String(0), lev, levels, wantLevels)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
order := par.getReordering([]int{len(types)})
|
||||
gotOrder := filterOrder(types, order)
|
||||
if got, want := fmt.Sprint(gotOrder), fmt.Sprint(wantOrder); got != want {
|
||||
t.Errorf("%s:%d:order: got %v; want %v\noriginal %v", p.String(0), lev, got, want, order)
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := p.Err(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
var removeClasses = map[Class]bool{
|
||||
LRO: true,
|
||||
RLO: true,
|
||||
RLE: true,
|
||||
LRE: true,
|
||||
PDF: true,
|
||||
BN: true,
|
||||
}
|
||||
|
||||
// TestBidiCharacters performs the tests in BidiCharacterTest.txt.
|
||||
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
|
||||
func TestBidiCharacters(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiCharacterTest.txt"), func(p *ucd.Parser) {
|
||||
var (
|
||||
types []Class
|
||||
pairTypes []bracketType
|
||||
pairValues []rune
|
||||
parLevel level
|
||||
|
||||
wantLevel = level(p.Int(2))
|
||||
wantLevels = p.Strings(3)
|
||||
wantVisualOrder = p.Strings(4)
|
||||
)
|
||||
|
||||
switch l := p.Int(1); l {
|
||||
case 0, 1:
|
||||
parLevel = level(l)
|
||||
case 2:
|
||||
parLevel = implicitLevel
|
||||
default:
|
||||
// Spec says to ignore unknown parts.
|
||||
}
|
||||
|
||||
runes := p.Runes(0)
|
||||
|
||||
for _, r := range runes {
|
||||
// Assign the bracket type.
|
||||
if d := norm.NFKD.PropertiesString(string(r)).Decomposition(); d != nil {
|
||||
r = []rune(string(d))[0]
|
||||
}
|
||||
p, _ := LookupRune(r)
|
||||
|
||||
// Assign the class for this rune.
|
||||
types = append(types, p.Class())
|
||||
|
||||
switch {
|
||||
case !p.IsBracket():
|
||||
pairTypes = append(pairTypes, bpNone)
|
||||
pairValues = append(pairValues, 0)
|
||||
case p.IsOpeningBracket():
|
||||
pairTypes = append(pairTypes, bpOpen)
|
||||
pairValues = append(pairValues, r)
|
||||
default:
|
||||
pairTypes = append(pairTypes, bpClose)
|
||||
pairValues = append(pairValues, p.reverseBracket(r))
|
||||
}
|
||||
}
|
||||
par := newParagraph(types, pairTypes, pairValues, parLevel)
|
||||
|
||||
// Test results:
|
||||
if got := par.embeddingLevel; got != wantLevel {
|
||||
t.Errorf("%v:level: got %d; want %d", string(runes), got, wantLevel)
|
||||
}
|
||||
|
||||
if *testLevels {
|
||||
gotLevels := getLevelStrings(types, par.getLevels([]int{len(types)}))
|
||||
if got, want := fmt.Sprint(gotLevels), fmt.Sprint(wantLevels); got != want {
|
||||
t.Errorf("%04X %q:%d: got %v; want %v\nval: %x\npair: %v", runes, string(runes), parLevel, got, want, pairValues, pairTypes)
|
||||
}
|
||||
}
|
||||
|
||||
order := par.getReordering([]int{len(types)})
|
||||
order = filterOrder(types, order)
|
||||
if got, want := fmt.Sprint(order), fmt.Sprint(wantVisualOrder); got != want {
|
||||
t.Errorf("%04X %q:%d: got %v; want %v\ngot order: %s", runes, string(runes), parLevel, got, want, reorder(runes, order))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func getLevelStrings(cl []Class, levels []level) []string {
|
||||
var results []string
|
||||
for i, l := range levels {
|
||||
if !removeClasses[cl[i]] {
|
||||
results = append(results, fmt.Sprint(l))
|
||||
} else {
|
||||
results = append(results, "x")
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func filterOrder(cl []Class, order []int) []int {
|
||||
no := []int{}
|
||||
for _, o := range order {
|
||||
if !removeClasses[cl[o]] {
|
||||
no = append(no, o)
|
||||
}
|
||||
}
|
||||
return no
|
||||
}
|
||||
|
||||
func reorder(r []rune, order []int) string {
|
||||
nr := make([]rune, len(order))
|
||||
for i, o := range order {
|
||||
nr[i] = r[o]
|
||||
}
|
||||
return string(nr)
|
||||
}
|
||||
|
||||
// bidiClass names and codes taken from class "bc" in
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
|
||||
var bidiClass = map[string]Class{
|
||||
"AL": AL, // classArabicLetter,
|
||||
"AN": AN, // classArabicNumber,
|
||||
"B": B, // classParagraphSeparator,
|
||||
"BN": BN, // classBoundaryNeutral,
|
||||
"CS": CS, // classCommonSeparator,
|
||||
"EN": EN, // classEuropeanNumber,
|
||||
"ES": ES, // classEuropeanSeparator,
|
||||
"ET": ET, // classEuropeanTerminator,
|
||||
"L": L, // classLeftToRight,
|
||||
"NSM": NSM, // classNonspacingMark,
|
||||
"ON": ON, // classOtherNeutral,
|
||||
"R": R, // classRightToLeft,
|
||||
"S": S, // classSegmentSeparator,
|
||||
"WS": WS, // classWhiteSpace,
|
||||
|
||||
"LRO": LRO, // classLeftToRightOverride,
|
||||
"RLO": RLO, // classRightToLeftOverride,
|
||||
"LRE": LRE, // classLeftToRightEmbedding,
|
||||
"RLE": RLE, // classRightToLeftEmbedding,
|
||||
"PDF": PDF, // classPopDirectionalFormat,
|
||||
"LRI": LRI, // classLeftToRightIsolate,
|
||||
"RLI": RLI, // classRightToLeftIsolate,
|
||||
"FSI": FSI, // classFirstStrongIsolate,
|
||||
"PDI": PDI, // classPopDirectionalIsolate,
|
||||
}
|
||||
133
vendor/golang.org/x/text/unicode/bidi/gen.go
generated
vendored
133
vendor/golang.org/x/text/unicode/bidi/gen.go
generated
vendored
|
|
@ -1,133 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/triegen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var outputFile = flag.String("out", "tables.go", "output file")
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
|
||||
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
|
||||
|
||||
genTables()
|
||||
}
|
||||
|
||||
// bidiClass names and codes taken from class "bc" in
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
|
||||
var bidiClass = map[string]Class{
|
||||
"AL": AL, // ArabicLetter
|
||||
"AN": AN, // ArabicNumber
|
||||
"B": B, // ParagraphSeparator
|
||||
"BN": BN, // BoundaryNeutral
|
||||
"CS": CS, // CommonSeparator
|
||||
"EN": EN, // EuropeanNumber
|
||||
"ES": ES, // EuropeanSeparator
|
||||
"ET": ET, // EuropeanTerminator
|
||||
"L": L, // LeftToRight
|
||||
"NSM": NSM, // NonspacingMark
|
||||
"ON": ON, // OtherNeutral
|
||||
"R": R, // RightToLeft
|
||||
"S": S, // SegmentSeparator
|
||||
"WS": WS, // WhiteSpace
|
||||
|
||||
"FSI": Control,
|
||||
"PDF": Control,
|
||||
"PDI": Control,
|
||||
"LRE": Control,
|
||||
"LRI": Control,
|
||||
"LRO": Control,
|
||||
"RLE": Control,
|
||||
"RLI": Control,
|
||||
"RLO": Control,
|
||||
}
|
||||
|
||||
func genTables() {
|
||||
if numClass > 0x0F {
|
||||
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
|
||||
}
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile(*outputFile, "bidi")
|
||||
|
||||
gen.WriteUnicodeVersion(w)
|
||||
|
||||
t := triegen.NewTrie("bidi")
|
||||
|
||||
// Build data about bracket mapping. These bits need to be or-ed with
|
||||
// any other bits.
|
||||
orMask := map[rune]uint64{}
|
||||
|
||||
xorMap := map[rune]int{}
|
||||
xorMasks := []rune{0} // First value is no-op.
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
|
||||
r1 := p.Rune(0)
|
||||
r2 := p.Rune(1)
|
||||
xor := r1 ^ r2
|
||||
if _, ok := xorMap[xor]; !ok {
|
||||
xorMap[xor] = len(xorMasks)
|
||||
xorMasks = append(xorMasks, xor)
|
||||
}
|
||||
entry := uint64(xorMap[xor]) << xorMaskShift
|
||||
switch p.String(2) {
|
||||
case "o":
|
||||
entry |= openMask
|
||||
case "c", "n":
|
||||
default:
|
||||
log.Fatalf("Unknown bracket class %q.", p.String(2))
|
||||
}
|
||||
orMask[r1] = entry
|
||||
})
|
||||
|
||||
w.WriteComment(`
|
||||
xorMasks contains masks to be xor-ed with brackets to get the reverse
|
||||
version.`)
|
||||
w.WriteVar("xorMasks", xorMasks)
|
||||
|
||||
done := map[rune]bool{}
|
||||
|
||||
insert := func(r rune, c Class) {
|
||||
if !done[r] {
|
||||
t.Insert(r, orMask[r]|uint64(c))
|
||||
done[r] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Insert the derived BiDi properties.
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
class, ok := bidiClass[p.String(1)]
|
||||
if !ok {
|
||||
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
|
||||
}
|
||||
insert(r, class)
|
||||
})
|
||||
visitDefaults(insert)
|
||||
|
||||
// TODO: use sparse blocks. This would reduce table size considerably
|
||||
// from the looks of it.
|
||||
|
||||
sz, err := t.Gen(w)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
w.Size += sz
|
||||
}
|
||||
|
||||
// dummy values to make methods in gen_common compile. The real versions
|
||||
// will be generated by this file to tables.go.
|
||||
var (
|
||||
xorMasks []rune
|
||||
)
|
||||
57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go
generated
vendored
57
vendor/golang.org/x/text/unicode/bidi/gen_ranges.go
generated
vendored
|
|
@ -1,57 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
// These tables are hand-extracted from:
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
|
||||
func visitDefaults(fn func(r rune, c Class)) {
|
||||
// first write default values for ranges listed above.
|
||||
visitRunes(fn, AL, []rune{
|
||||
0x0600, 0x07BF, // Arabic
|
||||
0x08A0, 0x08FF, // Arabic Extended-A
|
||||
0xFB50, 0xFDCF, // Arabic Presentation Forms
|
||||
0xFDF0, 0xFDFF,
|
||||
0xFE70, 0xFEFF,
|
||||
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
|
||||
})
|
||||
visitRunes(fn, R, []rune{
|
||||
0x0590, 0x05FF, // Hebrew
|
||||
0x07C0, 0x089F, // Nko et al.
|
||||
0xFB1D, 0xFB4F,
|
||||
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
|
||||
0x0001E800, 0x0001EDFF,
|
||||
0x0001EF00, 0x0001EFFF,
|
||||
})
|
||||
visitRunes(fn, ET, []rune{ // European Terminator
|
||||
0x20A0, 0x20Cf, // Currency symbols
|
||||
})
|
||||
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
|
||||
fn(r, BN) // Boundary Neutral
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
fn(p.Rune(0), BN) // Boundary Neutral
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
|
||||
for i := 0; i < len(runes); i += 2 {
|
||||
lo, hi := runes[i], runes[i+1]
|
||||
for j := lo; j <= hi; j++ {
|
||||
fn(j, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go
generated
vendored
64
vendor/golang.org/x/text/unicode/bidi/gen_trieval.go
generated
vendored
|
|
@ -1,64 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
||||
206
vendor/golang.org/x/text/unicode/bidi/prop.go
generated
vendored
206
vendor/golang.org/x/text/unicode/bidi/prop.go
generated
vendored
|
|
@ -1,206 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// Properties provides access to BiDi properties of runes.
|
||||
type Properties struct {
|
||||
entry uint8
|
||||
last uint8
|
||||
}
|
||||
|
||||
var trie = newBidiTrie(0)
|
||||
|
||||
// TODO: using this for bidirule reduces the running time by about 5%. Consider
|
||||
// if this is worth exposing or if we can find a way to speed up the Class
|
||||
// method.
|
||||
//
|
||||
// // CompactClass is like Class, but maps all of the BiDi control classes
|
||||
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
|
||||
// func (p Properties) CompactClass() Class {
|
||||
// return Class(p.entry & 0x0F)
|
||||
// }
|
||||
|
||||
// Class returns the Bidi class for p.
|
||||
func (p Properties) Class() Class {
|
||||
c := Class(p.entry & 0x0F)
|
||||
if c == Control {
|
||||
c = controlByteToClass[p.last&0xF]
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// IsBracket reports whether the rune is a bracket.
|
||||
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
|
||||
|
||||
// IsOpeningBracket reports whether the rune is an opening bracket.
|
||||
// IsBracket must return true.
|
||||
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
|
||||
|
||||
// TODO: find a better API and expose.
|
||||
func (p Properties) reverseBracket(r rune) rune {
|
||||
return xorMasks[p.entry>>xorMaskShift] ^ r
|
||||
}
|
||||
|
||||
var controlByteToClass = [16]Class{
|
||||
0xD: LRO, // U+202D LeftToRightOverride,
|
||||
0xE: RLO, // U+202E RightToLeftOverride,
|
||||
0xA: LRE, // U+202A LeftToRightEmbedding,
|
||||
0xB: RLE, // U+202B RightToLeftEmbedding,
|
||||
0xC: PDF, // U+202C PopDirectionalFormat,
|
||||
0x6: LRI, // U+2066 LeftToRightIsolate,
|
||||
0x7: RLI, // U+2067 RightToLeftIsolate,
|
||||
0x8: FSI, // U+2068 FirstStrongIsolate,
|
||||
0x9: PDI, // U+2069 PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// LookupRune returns properties for r.
|
||||
func LookupRune(r rune) (p Properties, size int) {
|
||||
var buf [4]byte
|
||||
n := utf8.EncodeRune(buf[:], r)
|
||||
return Lookup(buf[:n])
|
||||
}
|
||||
|
||||
// TODO: these lookup methods are based on the generated trie code. The returned
|
||||
// sizes have slightly different semantics from the generated code, in that it
|
||||
// always returns size==1 for an illegal UTF-8 byte (instead of the length
|
||||
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
|
||||
// leave invalid UTF-8 untouched, in which case it has performance benefits to
|
||||
// do so (without changing the semantics). Bidi requires the semantics used here
|
||||
// for the bidirule implementation to be compatible with the Go semantics.
|
||||
// They ultimately should perhaps be adopted by all trie implementations, for
|
||||
// convenience sake.
|
||||
// This unrolled code also boosts performance of the secure/bidirule package by
|
||||
// about 30%.
|
||||
// So, to remove this code:
|
||||
// - add option to trie generator to define return type.
|
||||
// - always return 1 byte size for ill-formed UTF-8 runes.
|
||||
|
||||
// Lookup returns properties for the first rune in s and the width in bytes of
|
||||
// its encoding. The size will be 0 if s does not hold enough bytes to complete
|
||||
// the encoding.
|
||||
func Lookup(s []byte) (p Properties, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return Properties{entry: bidiValues[c0]}, 1
|
||||
case c0 < 0xC2:
|
||||
return Properties{}, 1
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = bidiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
|
||||
}
|
||||
// Illegal rune
|
||||
return Properties{}, 1
|
||||
}
|
||||
|
||||
// LookupString returns properties for the first rune in s and the width in
|
||||
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
|
||||
// complete the encoding.
|
||||
func LookupString(s string) (p Properties, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < 0x80: // is ASCII
|
||||
return Properties{entry: bidiValues[c0]}, 1
|
||||
case c0 < 0xC2:
|
||||
return Properties{}, 1
|
||||
case c0 < 0xE0: // 2-byte UTF-8
|
||||
if len(s) < 2 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
|
||||
case c0 < 0xF0: // 3-byte UTF-8
|
||||
if len(s) < 3 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
|
||||
case c0 < 0xF8: // 4-byte UTF-8
|
||||
if len(s) < 4 {
|
||||
return Properties{}, 0
|
||||
}
|
||||
i := bidiIndex[c0]
|
||||
c1 := s[1]
|
||||
if c1 < 0x80 || 0xC0 <= c1 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o := uint32(i)<<6 + uint32(c1)
|
||||
i = bidiIndex[o]
|
||||
c2 := s[2]
|
||||
if c2 < 0x80 || 0xC0 <= c2 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
o = uint32(i)<<6 + uint32(c2)
|
||||
i = bidiIndex[o]
|
||||
c3 := s[3]
|
||||
if c3 < 0x80 || 0xC0 <= c3 {
|
||||
return Properties{}, 1
|
||||
}
|
||||
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
|
||||
}
|
||||
// Illegal rune
|
||||
return Properties{}, 1
|
||||
}
|
||||
53
vendor/golang.org/x/text/unicode/bidi/ranges_test.go
generated
vendored
53
vendor/golang.org/x/text/unicode/bidi/ranges_test.go
generated
vendored
|
|
@ -1,53 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
// These tables are hand-extracted from:
|
||||
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
|
||||
func visitDefaults(fn func(r rune, c Class)) {
|
||||
// first write default values for ranges listed above.
|
||||
visitRunes(fn, AL, []rune{
|
||||
0x0600, 0x07BF, // Arabic
|
||||
0x08A0, 0x08FF, // Arabic Extended-A
|
||||
0xFB50, 0xFDCF, // Arabic Presentation Forms
|
||||
0xFDF0, 0xFDFF,
|
||||
0xFE70, 0xFEFF,
|
||||
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
|
||||
})
|
||||
visitRunes(fn, R, []rune{
|
||||
0x0590, 0x05FF, // Hebrew
|
||||
0x07C0, 0x089F, // Nko et al.
|
||||
0xFB1D, 0xFB4F,
|
||||
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
|
||||
0x0001E800, 0x0001EDFF,
|
||||
0x0001EF00, 0x0001EFFF,
|
||||
})
|
||||
visitRunes(fn, ET, []rune{ // European Terminator
|
||||
0x20A0, 0x20Cf, // Currency symbols
|
||||
})
|
||||
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
|
||||
fn(r, BN) // Boundary Neutral
|
||||
})
|
||||
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
|
||||
if p.String(1) == "Default_Ignorable_Code_Point" {
|
||||
fn(p.Rune(0), BN) // Boundary Neutral
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
|
||||
for i := 0; i < len(runes); i += 2 {
|
||||
lo, hi := runes[i], runes[i+1]
|
||||
for j := lo; j <= hi; j++ {
|
||||
fn(j, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
1813
vendor/golang.org/x/text/unicode/bidi/tables.go
generated
vendored
1813
vendor/golang.org/x/text/unicode/bidi/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
82
vendor/golang.org/x/text/unicode/bidi/tables_test.go
generated
vendored
82
vendor/golang.org/x/text/unicode/bidi/tables_test.go
generated
vendored
|
|
@ -1,82 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package bidi
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
var labels = []string{
|
||||
AL: "AL",
|
||||
AN: "AN",
|
||||
B: "B",
|
||||
BN: "BN",
|
||||
CS: "CS",
|
||||
EN: "EN",
|
||||
ES: "ES",
|
||||
ET: "ET",
|
||||
L: "L",
|
||||
NSM: "NSM",
|
||||
ON: "ON",
|
||||
R: "R",
|
||||
S: "S",
|
||||
WS: "WS",
|
||||
|
||||
LRO: "LRO",
|
||||
RLO: "RLO",
|
||||
LRE: "LRE",
|
||||
RLE: "RLE",
|
||||
PDF: "PDF",
|
||||
LRI: "LRI",
|
||||
RLI: "RLI",
|
||||
FSI: "FSI",
|
||||
PDI: "PDI",
|
||||
}
|
||||
|
||||
func TestTables(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
|
||||
r1 := p.Rune(0)
|
||||
want := p.Rune(1)
|
||||
|
||||
e, _ := LookupRune(r1)
|
||||
if got := e.reverseBracket(r1); got != want {
|
||||
t.Errorf("Reverse(%U) = %U; want %U", r1, got, want)
|
||||
}
|
||||
})
|
||||
|
||||
done := map[rune]bool{}
|
||||
test := func(name string, r rune, want string) {
|
||||
str := string(r)
|
||||
e, _ := LookupString(str)
|
||||
if got := labels[e.Class()]; got != want {
|
||||
t.Errorf("%s:%U: got %s; want %s", name, r, got, want)
|
||||
}
|
||||
if e2, sz := LookupRune(r); e != e2 || sz != len(str) {
|
||||
t.Errorf("LookupRune(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
|
||||
}
|
||||
if e2, sz := Lookup([]byte(str)); e != e2 || sz != len(str) {
|
||||
t.Errorf("Lookup(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
|
||||
}
|
||||
done[r] = true
|
||||
}
|
||||
|
||||
// Insert the derived BiDi properties.
|
||||
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
|
||||
r := p.Rune(0)
|
||||
test("derived", r, p.String(1))
|
||||
})
|
||||
visitDefaults(func(r rune, c Class) {
|
||||
if !done[r] {
|
||||
test("default", r, labels[c])
|
||||
}
|
||||
})
|
||||
|
||||
}
|
||||
60
vendor/golang.org/x/text/unicode/bidi/trieval.go
generated
vendored
60
vendor/golang.org/x/text/unicode/bidi/trieval.go
generated
vendored
|
|
@ -1,60 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package bidi
|
||||
|
||||
// Class is the Unicode BiDi class. Each rune has a single class.
|
||||
type Class uint
|
||||
|
||||
const (
|
||||
L Class = iota // LeftToRight
|
||||
R // RightToLeft
|
||||
EN // EuropeanNumber
|
||||
ES // EuropeanSeparator
|
||||
ET // EuropeanTerminator
|
||||
AN // ArabicNumber
|
||||
CS // CommonSeparator
|
||||
B // ParagraphSeparator
|
||||
S // SegmentSeparator
|
||||
WS // WhiteSpace
|
||||
ON // OtherNeutral
|
||||
BN // BoundaryNeutral
|
||||
NSM // NonspacingMark
|
||||
AL // ArabicLetter
|
||||
Control // Control LRO - PDI
|
||||
|
||||
numClass
|
||||
|
||||
LRO // LeftToRightOverride
|
||||
RLO // RightToLeftOverride
|
||||
LRE // LeftToRightEmbedding
|
||||
RLE // RightToLeftEmbedding
|
||||
PDF // PopDirectionalFormat
|
||||
LRI // LeftToRightIsolate
|
||||
RLI // RightToLeftIsolate
|
||||
FSI // FirstStrongIsolate
|
||||
PDI // PopDirectionalIsolate
|
||||
|
||||
unknownClass = ^Class(0)
|
||||
)
|
||||
|
||||
var controlToClass = map[rune]Class{
|
||||
0x202D: LRO, // LeftToRightOverride,
|
||||
0x202E: RLO, // RightToLeftOverride,
|
||||
0x202A: LRE, // LeftToRightEmbedding,
|
||||
0x202B: RLE, // RightToLeftEmbedding,
|
||||
0x202C: PDF, // PopDirectionalFormat,
|
||||
0x2066: LRI, // LeftToRightIsolate,
|
||||
0x2067: RLI, // RightToLeftIsolate,
|
||||
0x2068: FSI, // FirstStrongIsolate,
|
||||
0x2069: PDI, // PopDirectionalIsolate,
|
||||
}
|
||||
|
||||
// A trie entry has the following bits:
|
||||
// 7..5 XOR mask for brackets
|
||||
// 4 1: Bracket open, 0: Bracket close
|
||||
// 3..0 Class type
|
||||
|
||||
const (
|
||||
openMask = 0x10
|
||||
xorMaskShift = 5
|
||||
)
|
||||
115
vendor/golang.org/x/text/unicode/rangetable/gen.go
generated
vendored
115
vendor/golang.org/x/text/unicode/rangetable/gen.go
generated
vendored
|
|
@ -1,115 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/collate"
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
"golang.org/x/text/language"
|
||||
"golang.org/x/text/unicode/rangetable"
|
||||
)
|
||||
|
||||
var versionList = flag.String("versions", "",
|
||||
"list of versions for which to generate RangeTables")
|
||||
|
||||
const bootstrapMessage = `No versions specified.
|
||||
To bootstrap the code generation, run:
|
||||
go run gen.go --versions=4.1.0,5.0.0,6.0.0,6.1.0,6.2.0,6.3.0,7.0.0
|
||||
|
||||
and ensure that the latest versions are included by checking:
|
||||
http://www.unicode.org/Public/`
|
||||
|
||||
func getVersions() []string {
|
||||
if *versionList == "" {
|
||||
log.Fatal(bootstrapMessage)
|
||||
}
|
||||
|
||||
c := collate.New(language.Und, collate.Numeric)
|
||||
versions := strings.Split(*versionList, ",")
|
||||
c.SortStrings(versions)
|
||||
|
||||
// Ensure that at least the current version is included.
|
||||
for _, v := range versions {
|
||||
if v == gen.UnicodeVersion() {
|
||||
return versions
|
||||
}
|
||||
}
|
||||
|
||||
versions = append(versions, gen.UnicodeVersion())
|
||||
c.SortStrings(versions)
|
||||
return versions
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
versions := getVersions()
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
|
||||
fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ","))
|
||||
fmt.Fprintf(w, "import \"unicode\"\n\n")
|
||||
|
||||
vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) }
|
||||
|
||||
fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n")
|
||||
for _, v := range versions {
|
||||
fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v))
|
||||
}
|
||||
fmt.Fprintf(w, "}\n\n")
|
||||
|
||||
var size int
|
||||
for _, v := range versions {
|
||||
assigned := []rune{}
|
||||
|
||||
r := gen.Open("http://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt")
|
||||
ucd.Parse(r, func(p *ucd.Parser) {
|
||||
assigned = append(assigned, p.Rune(0))
|
||||
})
|
||||
|
||||
rt := rangetable.New(assigned...)
|
||||
sz := int(reflect.TypeOf(unicode.RangeTable{}).Size())
|
||||
sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16)
|
||||
sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32)
|
||||
|
||||
fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024)
|
||||
fmt.Fprintf(w, "var assigned%s = ", vstr(v))
|
||||
print(w, rt)
|
||||
|
||||
size += sz
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024)
|
||||
|
||||
gen.WriteGoFile("tables.go", "rangetable", w.Bytes())
|
||||
}
|
||||
|
||||
func print(w io.Writer, rt *unicode.RangeTable) {
|
||||
fmt.Fprintln(w, "&unicode.RangeTable{")
|
||||
fmt.Fprintln(w, "\tR16: []unicode.Range16{")
|
||||
for _, r := range rt.R16 {
|
||||
fmt.Fprintf(w, "\t\t{%#04x, %#04x, %d},\n", r.Lo, r.Hi, r.Stride)
|
||||
}
|
||||
fmt.Fprintln(w, "\t},")
|
||||
fmt.Fprintln(w, "\tR32: []unicode.Range32{")
|
||||
for _, r := range rt.R32 {
|
||||
fmt.Fprintf(w, "\t\t{%#08x, %#08x, %d},\n", r.Lo, r.Hi, r.Stride)
|
||||
}
|
||||
fmt.Fprintln(w, "\t},")
|
||||
fmt.Fprintf(w, "\tLatinOffset: %d,\n", rt.LatinOffset)
|
||||
fmt.Fprintf(w, "}\n\n")
|
||||
}
|
||||
260
vendor/golang.org/x/text/unicode/rangetable/merge.go
generated
vendored
260
vendor/golang.org/x/text/unicode/rangetable/merge.go
generated
vendored
|
|
@ -1,260 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rangetable
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// atEnd is used to mark a completed iteration.
|
||||
const atEnd = unicode.MaxRune + 1
|
||||
|
||||
// Merge returns a new RangeTable that is the union of the given tables.
|
||||
// It can also be used to compact user-created RangeTables. The entries in
|
||||
// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
|
||||
//
|
||||
// A lookup in the resulting table can be several times faster than using In
|
||||
// directly on the ranges. Merge is an expensive operation, however, and only
|
||||
// makes sense if one intends to use the result for more than a couple of
|
||||
// hundred lookups.
|
||||
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
|
||||
rt := &unicode.RangeTable{}
|
||||
if len(ranges) == 0 {
|
||||
return rt
|
||||
}
|
||||
|
||||
iter := tablesIter(make([]tableIndex, len(ranges)))
|
||||
|
||||
for i, t := range ranges {
|
||||
iter[i] = tableIndex{t, 0, atEnd}
|
||||
if len(t.R16) > 0 {
|
||||
iter[i].next = rune(t.R16[0].Lo)
|
||||
}
|
||||
}
|
||||
|
||||
if r0 := iter.next16(); r0.Stride != 0 {
|
||||
for {
|
||||
r1 := iter.next16()
|
||||
if r1.Stride == 0 {
|
||||
rt.R16 = append(rt.R16, r0)
|
||||
break
|
||||
}
|
||||
stride := r1.Lo - r0.Hi
|
||||
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
|
||||
// Fully merge the next range into the previous one.
|
||||
r0.Hi, r0.Stride = r1.Hi, stride
|
||||
continue
|
||||
} else if stride == r0.Stride {
|
||||
// Move the first element of r1 to r0. This may eliminate an
|
||||
// entry.
|
||||
r0.Hi = r1.Lo
|
||||
r0.Stride = stride
|
||||
r1.Lo = r1.Lo + r1.Stride
|
||||
if r1.Lo > r1.Hi {
|
||||
continue
|
||||
}
|
||||
}
|
||||
rt.R16 = append(rt.R16, r0)
|
||||
r0 = r1
|
||||
}
|
||||
}
|
||||
|
||||
for i, t := range ranges {
|
||||
iter[i] = tableIndex{t, 0, atEnd}
|
||||
if len(t.R32) > 0 {
|
||||
iter[i].next = rune(t.R32[0].Lo)
|
||||
}
|
||||
}
|
||||
|
||||
if r0 := iter.next32(); r0.Stride != 0 {
|
||||
for {
|
||||
r1 := iter.next32()
|
||||
if r1.Stride == 0 {
|
||||
rt.R32 = append(rt.R32, r0)
|
||||
break
|
||||
}
|
||||
stride := r1.Lo - r0.Hi
|
||||
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
|
||||
// Fully merge the next range into the previous one.
|
||||
r0.Hi, r0.Stride = r1.Hi, stride
|
||||
continue
|
||||
} else if stride == r0.Stride {
|
||||
// Move the first element of r1 to r0. This may eliminate an
|
||||
// entry.
|
||||
r0.Hi = r1.Lo
|
||||
r1.Lo = r1.Lo + r1.Stride
|
||||
if r1.Lo > r1.Hi {
|
||||
continue
|
||||
}
|
||||
}
|
||||
rt.R32 = append(rt.R32, r0)
|
||||
r0 = r1
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
|
||||
rt.LatinOffset = i + 1
|
||||
}
|
||||
|
||||
return rt
|
||||
}
|
||||
|
||||
type tableIndex struct {
|
||||
t *unicode.RangeTable
|
||||
p uint32
|
||||
next rune
|
||||
}
|
||||
|
||||
type tablesIter []tableIndex
|
||||
|
||||
// sortIter does an insertion sort using the next field of tableIndex. Insertion
|
||||
// sort is a good sorting algorithm for this case.
|
||||
func sortIter(t []tableIndex) {
|
||||
for i := range t {
|
||||
for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
|
||||
t[j], t[j-1] = t[j-1], t[j]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// next16 finds the ranged to be added to the table. If ranges overlap between
|
||||
// multiple tables it clips the result to a non-overlapping range if the
|
||||
// elements are not fully subsumed. It returns a zero range if there are no more
|
||||
// ranges.
|
||||
func (ti tablesIter) next16() unicode.Range16 {
|
||||
sortIter(ti)
|
||||
|
||||
t0 := ti[0]
|
||||
if t0.next == atEnd {
|
||||
return unicode.Range16{}
|
||||
}
|
||||
r0 := t0.t.R16[t0.p]
|
||||
r0.Lo = uint16(t0.next)
|
||||
|
||||
// We restrict the Hi of the current range if it overlaps with another range.
|
||||
for i := range ti {
|
||||
tn := ti[i]
|
||||
// Since our tableIndices are sorted by next, we can break if the there
|
||||
// is no overlap. The first value of a next range can always be merged
|
||||
// into the current one, so we can break in case of equality as well.
|
||||
if rune(r0.Hi) <= tn.next {
|
||||
break
|
||||
}
|
||||
rn := tn.t.R16[tn.p]
|
||||
rn.Lo = uint16(tn.next)
|
||||
|
||||
// Limit r0.Hi based on next ranges in list, but allow it to overlap
|
||||
// with ranges as long as it subsumes it.
|
||||
m := (rn.Lo - r0.Lo) % r0.Stride
|
||||
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
|
||||
// Overlap, take the min of the two Hi values: for simplicity's sake
|
||||
// we only process one range at a time.
|
||||
if r0.Hi > rn.Hi {
|
||||
r0.Hi = rn.Hi
|
||||
}
|
||||
} else {
|
||||
// Not a compatible stride. Set to the last possible value before
|
||||
// rn.Lo, but ensure there is at least one value.
|
||||
if x := rn.Lo - m; r0.Lo <= x {
|
||||
r0.Hi = x
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Update the next values for each table.
|
||||
for i := range ti {
|
||||
tn := &ti[i]
|
||||
if rune(r0.Hi) < tn.next {
|
||||
break
|
||||
}
|
||||
rn := tn.t.R16[tn.p]
|
||||
stride := rune(rn.Stride)
|
||||
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
|
||||
if rune(rn.Hi) < tn.next {
|
||||
if tn.p++; int(tn.p) == len(tn.t.R16) {
|
||||
tn.next = atEnd
|
||||
} else {
|
||||
tn.next = rune(tn.t.R16[tn.p].Lo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r0.Lo == r0.Hi {
|
||||
r0.Stride = 1
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// next32 finds the ranged to be added to the table. If ranges overlap between
|
||||
// multiple tables it clips the result to a non-overlapping range if the
|
||||
// elements are not fully subsumed. It returns a zero range if there are no more
|
||||
// ranges.
|
||||
func (ti tablesIter) next32() unicode.Range32 {
|
||||
sortIter(ti)
|
||||
|
||||
t0 := ti[0]
|
||||
if t0.next == atEnd {
|
||||
return unicode.Range32{}
|
||||
}
|
||||
r0 := t0.t.R32[t0.p]
|
||||
r0.Lo = uint32(t0.next)
|
||||
|
||||
// We restrict the Hi of the current range if it overlaps with another range.
|
||||
for i := range ti {
|
||||
tn := ti[i]
|
||||
// Since our tableIndices are sorted by next, we can break if the there
|
||||
// is no overlap. The first value of a next range can always be merged
|
||||
// into the current one, so we can break in case of equality as well.
|
||||
if rune(r0.Hi) <= tn.next {
|
||||
break
|
||||
}
|
||||
rn := tn.t.R32[tn.p]
|
||||
rn.Lo = uint32(tn.next)
|
||||
|
||||
// Limit r0.Hi based on next ranges in list, but allow it to overlap
|
||||
// with ranges as long as it subsumes it.
|
||||
m := (rn.Lo - r0.Lo) % r0.Stride
|
||||
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
|
||||
// Overlap, take the min of the two Hi values: for simplicity's sake
|
||||
// we only process one range at a time.
|
||||
if r0.Hi > rn.Hi {
|
||||
r0.Hi = rn.Hi
|
||||
}
|
||||
} else {
|
||||
// Not a compatible stride. Set to the last possible value before
|
||||
// rn.Lo, but ensure there is at least one value.
|
||||
if x := rn.Lo - m; r0.Lo <= x {
|
||||
r0.Hi = x
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Update the next values for each table.
|
||||
for i := range ti {
|
||||
tn := &ti[i]
|
||||
if rune(r0.Hi) < tn.next {
|
||||
break
|
||||
}
|
||||
rn := tn.t.R32[tn.p]
|
||||
stride := rune(rn.Stride)
|
||||
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
|
||||
if rune(rn.Hi) < tn.next {
|
||||
if tn.p++; int(tn.p) == len(tn.t.R32) {
|
||||
tn.next = atEnd
|
||||
} else {
|
||||
tn.next = rune(tn.t.R32[tn.p].Lo)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r0.Lo == r0.Hi {
|
||||
r0.Stride = 1
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
184
vendor/golang.org/x/text/unicode/rangetable/merge_test.go
generated
vendored
184
vendor/golang.org/x/text/unicode/rangetable/merge_test.go
generated
vendored
|
|
@ -1,184 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rangetable
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var (
|
||||
maxRuneTable = &unicode.RangeTable{
|
||||
R32: []unicode.Range32{
|
||||
{unicode.MaxRune, unicode.MaxRune, 1},
|
||||
},
|
||||
}
|
||||
|
||||
overlap1 = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{
|
||||
{0x100, 0xfffc, 4},
|
||||
},
|
||||
R32: []unicode.Range32{
|
||||
{0x100000, 0x10fffc, 4},
|
||||
},
|
||||
}
|
||||
|
||||
overlap2 = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{
|
||||
{0x101, 0xfffd, 4},
|
||||
},
|
||||
R32: []unicode.Range32{
|
||||
{0x100001, 0x10fffd, 3},
|
||||
},
|
||||
}
|
||||
|
||||
// The following table should be compacted into two entries for R16 and R32.
|
||||
optimize = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{
|
||||
{0x1, 0x1, 1},
|
||||
{0x2, 0x2, 1},
|
||||
{0x3, 0x3, 1},
|
||||
{0x5, 0x5, 1},
|
||||
{0x7, 0x7, 1},
|
||||
{0x9, 0x9, 1},
|
||||
{0xb, 0xf, 2},
|
||||
},
|
||||
R32: []unicode.Range32{
|
||||
{0x10001, 0x10001, 1},
|
||||
{0x10002, 0x10002, 1},
|
||||
{0x10003, 0x10003, 1},
|
||||
{0x10005, 0x10005, 1},
|
||||
{0x10007, 0x10007, 1},
|
||||
{0x10009, 0x10009, 1},
|
||||
{0x1000b, 0x1000f, 2},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func TestMerge(t *testing.T) {
|
||||
for i, tt := range [][]*unicode.RangeTable{
|
||||
{unicode.Cc, unicode.Cf},
|
||||
{unicode.L, unicode.Ll},
|
||||
{unicode.L, unicode.Ll, unicode.Lu},
|
||||
{unicode.Ll, unicode.Lu},
|
||||
{unicode.M},
|
||||
unicode.GraphicRanges,
|
||||
cased,
|
||||
|
||||
// Merge R16 only and R32 only and vice versa.
|
||||
{unicode.Khmer, unicode.Khudawadi},
|
||||
{unicode.Imperial_Aramaic, unicode.Radical},
|
||||
|
||||
// Merge with empty.
|
||||
{&unicode.RangeTable{}},
|
||||
{&unicode.RangeTable{}, &unicode.RangeTable{}},
|
||||
{&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
|
||||
{&unicode.RangeTable{}, unicode.Hiragana},
|
||||
{unicode.Inherited, &unicode.RangeTable{}},
|
||||
{&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
|
||||
|
||||
// Hypothetical tables.
|
||||
{maxRuneTable},
|
||||
{overlap1, overlap2},
|
||||
|
||||
// Optimization
|
||||
{optimize},
|
||||
} {
|
||||
rt := Merge(tt...)
|
||||
for r := rune(0); r <= unicode.MaxRune; r++ {
|
||||
if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
|
||||
t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
|
||||
}
|
||||
}
|
||||
// Test optimization and correctness for R16.
|
||||
for k := 0; k < len(rt.R16)-1; k++ {
|
||||
if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
|
||||
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
|
||||
}
|
||||
if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
|
||||
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
|
||||
}
|
||||
if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
|
||||
t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
|
||||
i, k, rt.R16[k], rt.R16[k+1])
|
||||
}
|
||||
}
|
||||
// Test optimization and correctness for R32.
|
||||
for k := 0; k < len(rt.R32)-1; k++ {
|
||||
if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
|
||||
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
|
||||
}
|
||||
if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
|
||||
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
|
||||
}
|
||||
if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
|
||||
t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
|
||||
i, k, rt.R32[k], rt.R32[k+1])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const runes = "Hello World in 2015!,\U0010fffd"
|
||||
|
||||
func BenchmarkNotMerged(t *testing.B) {
|
||||
for i := 0; i < t.N; i++ {
|
||||
for _, r := range runes {
|
||||
unicode.In(r, unicode.GraphicRanges...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMerged(t *testing.B) {
|
||||
rt := Merge(unicode.GraphicRanges...)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
for _, r := range runes {
|
||||
unicode.Is(rt, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var cased = []*unicode.RangeTable{
|
||||
unicode.Lower,
|
||||
unicode.Upper,
|
||||
unicode.Title,
|
||||
unicode.Other_Lowercase,
|
||||
unicode.Other_Uppercase,
|
||||
}
|
||||
|
||||
func BenchmarkNotMergedCased(t *testing.B) {
|
||||
for i := 0; i < t.N; i++ {
|
||||
for _, r := range runes {
|
||||
unicode.In(r, cased...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkMergedCased(t *testing.B) {
|
||||
// This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
|
||||
// Unicode 7.0.0.
|
||||
rt := Merge(cased...)
|
||||
|
||||
for i := 0; i < t.N; i++ {
|
||||
for _, r := range runes {
|
||||
unicode.Is(rt, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkInit(t *testing.B) {
|
||||
for i := 0; i < t.N; i++ {
|
||||
Merge(cased...)
|
||||
Merge(unicode.GraphicRanges...)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkInit2(t *testing.B) {
|
||||
// Hypothetical near-worst-case performance.
|
||||
for i := 0; i < t.N; i++ {
|
||||
Merge(overlap1, overlap2)
|
||||
}
|
||||
}
|
||||
70
vendor/golang.org/x/text/unicode/rangetable/rangetable.go
generated
vendored
70
vendor/golang.org/x/text/unicode/rangetable/rangetable.go
generated
vendored
|
|
@ -1,70 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package rangetable provides utilities for creating and inspecting
|
||||
// unicode.RangeTables.
|
||||
package rangetable
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// New creates a RangeTable from the given runes, which may contain duplicates.
|
||||
func New(r ...rune) *unicode.RangeTable {
|
||||
if len(r) == 0 {
|
||||
return &unicode.RangeTable{}
|
||||
}
|
||||
|
||||
sort.Sort(byRune(r))
|
||||
|
||||
// Remove duplicates.
|
||||
k := 1
|
||||
for i := 1; i < len(r); i++ {
|
||||
if r[k-1] != r[i] {
|
||||
r[k] = r[i]
|
||||
k++
|
||||
}
|
||||
}
|
||||
|
||||
var rt unicode.RangeTable
|
||||
for _, r := range r[:k] {
|
||||
if r <= 0xFFFF {
|
||||
rt.R16 = append(rt.R16, unicode.Range16{Lo: uint16(r), Hi: uint16(r), Stride: 1})
|
||||
} else {
|
||||
rt.R32 = append(rt.R32, unicode.Range32{Lo: uint32(r), Hi: uint32(r), Stride: 1})
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize RangeTable.
|
||||
return Merge(&rt)
|
||||
}
|
||||
|
||||
type byRune []rune
|
||||
|
||||
func (r byRune) Len() int { return len(r) }
|
||||
func (r byRune) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
|
||||
func (r byRune) Less(i, j int) bool { return r[i] < r[j] }
|
||||
|
||||
// Visit visits all runes in the given RangeTable in order, calling fn for each.
|
||||
func Visit(rt *unicode.RangeTable, fn func(rune)) {
|
||||
for _, r16 := range rt.R16 {
|
||||
for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
|
||||
fn(r)
|
||||
}
|
||||
}
|
||||
for _, r32 := range rt.R32 {
|
||||
for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
|
||||
fn(r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assigned returns a RangeTable with all assigned code points for a given
|
||||
// Unicode version. This includes graphic, format, control, and private-use
|
||||
// characters. It returns nil if the data for the given version is not
|
||||
// available.
|
||||
func Assigned(version string) *unicode.RangeTable {
|
||||
return assigned[version]
|
||||
}
|
||||
55
vendor/golang.org/x/text/unicode/rangetable/rangetable_test.go
generated
vendored
55
vendor/golang.org/x/text/unicode/rangetable/rangetable_test.go
generated
vendored
|
|
@ -1,55 +0,0 @@
|
|||
package rangetable
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
var (
|
||||
empty = &unicode.RangeTable{}
|
||||
many = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{{0, 0xffff, 5}},
|
||||
R32: []unicode.Range32{{0x10004, 0x10009, 5}},
|
||||
LatinOffset: 0,
|
||||
}
|
||||
)
|
||||
|
||||
func TestVisit(t *testing.T) {
|
||||
Visit(empty, func(got rune) {
|
||||
t.Error("call from empty RangeTable")
|
||||
})
|
||||
|
||||
var want rune
|
||||
Visit(many, func(got rune) {
|
||||
if got != want {
|
||||
t.Errorf("got %U; want %U", got, want)
|
||||
}
|
||||
want += 5
|
||||
})
|
||||
if want -= 5; want != 0x10009 {
|
||||
t.Errorf("last run was %U; want U+10009", want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNew(t *testing.T) {
|
||||
for i, rt := range []*unicode.RangeTable{
|
||||
empty,
|
||||
unicode.Co,
|
||||
unicode.Letter,
|
||||
unicode.ASCII_Hex_Digit,
|
||||
many,
|
||||
maxRuneTable,
|
||||
} {
|
||||
var got, want []rune
|
||||
Visit(rt, func(r rune) {
|
||||
want = append(want, r)
|
||||
})
|
||||
Visit(New(want...), func(r rune) {
|
||||
got = append(got, r)
|
||||
})
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("%d:\ngot %v;\nwant %v", i, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
6376
vendor/golang.org/x/text/unicode/rangetable/tables.go
generated
vendored
6376
vendor/golang.org/x/text/unicode/rangetable/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
59
vendor/golang.org/x/text/unicode/runenames/bits.go
generated
vendored
59
vendor/golang.org/x/text/unicode/runenames/bits.go
generated
vendored
|
|
@ -1,59 +0,0 @@
|
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package runenames
|
||||
|
||||
// This file contains code common to gen.go and the package code.
|
||||
|
||||
// The mapping from rune to string (i.e. offset and length in the data string)
|
||||
// is encoded as a two level table. The first level maps from contiguous rune
|
||||
// ranges [runeOffset, runeOffset+runeLength) to entries. Entries are either
|
||||
// direct (for repeated names such as "<CJK Ideograph>") or indirect (for runs
|
||||
// of unique names such as "SPACE", "EXCLAMATION MARK", "QUOTATION MARK", ...).
|
||||
//
|
||||
// Each first level table element is 64 bits. The runeOffset (21 bits) and
|
||||
// runeLength (16 bits) take the 37 high bits. The entry takes the 27 low bits,
|
||||
// with directness encoded in the least significant bit.
|
||||
//
|
||||
// A direct entry encodes a dataOffset (18 bits) and dataLength (8 bits) in the
|
||||
// data string. 18 bits is too short to encode the entire data string's length,
|
||||
// but the data string's contents are arranged so that all of the few direct
|
||||
// entries' offsets come before all of the many indirect entries' offsets.
|
||||
//
|
||||
// An indirect entry encodes a dataBase (10 bits) and a table1Offset (16 bits).
|
||||
// The table1Offset is the start of a range in the second level table. The
|
||||
// length of that range is the same as the runeLength.
|
||||
//
|
||||
// Each second level table element is 16 bits, an index into data, relative to
|
||||
// a bias equal to (dataBase << dataBaseUnit). That (bias + index) is the
|
||||
// (dataOffset + dataLength) in the data string. The dataOffset is implied by
|
||||
// the previous table element (with the same implicit bias).
|
||||
|
||||
const (
|
||||
bitsRuneOffset = 21
|
||||
bitsRuneLength = 16
|
||||
bitsDataOffset = 18
|
||||
bitsDataLength = 8
|
||||
bitsDirect = 1
|
||||
|
||||
bitsDataBase = 10
|
||||
bitsTable1Offset = 16
|
||||
|
||||
shiftRuneOffset = 0 + bitsDirect + bitsDataLength + bitsDataOffset + bitsRuneLength
|
||||
shiftRuneLength = 0 + bitsDirect + bitsDataLength + bitsDataOffset
|
||||
shiftDataOffset = 0 + bitsDirect + bitsDataLength
|
||||
shiftDataLength = 0 + bitsDirect
|
||||
shiftDirect = 0
|
||||
|
||||
shiftDataBase = 0 + bitsDirect + bitsTable1Offset
|
||||
shiftTable1Offset = 0 + bitsDirect
|
||||
|
||||
maskRuneLength = 1<<bitsRuneLength - 1
|
||||
maskDataOffset = 1<<bitsDataOffset - 1
|
||||
maskDataLength = 1<<bitsDataLength - 1
|
||||
maskDirect = 1<<bitsDirect - 1
|
||||
|
||||
maskDataBase = 1<<bitsDataBase - 1
|
||||
maskTable1Offset = 1<<bitsTable1Offset - 1
|
||||
|
||||
dataBaseUnit = 10
|
||||
)
|
||||
118
vendor/golang.org/x/text/unicode/runenames/example_test.go
generated
vendored
118
vendor/golang.org/x/text/unicode/runenames/example_test.go
generated
vendored
|
|
@ -1,118 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runenames_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"golang.org/x/text/unicode/runenames"
|
||||
)
|
||||
|
||||
func Example() {
|
||||
runes := []rune{
|
||||
-1,
|
||||
'\U00000000',
|
||||
'\U0000001f',
|
||||
'\U00000020',
|
||||
'\U00000021',
|
||||
'\U00000041',
|
||||
'\U0000007e',
|
||||
'\U0000007f',
|
||||
'\U00000080',
|
||||
'\U000000e0',
|
||||
|
||||
'\U0000037f',
|
||||
'\U00000380',
|
||||
'\U00000381',
|
||||
'\U00000382',
|
||||
'\U00000383',
|
||||
'\U00000384',
|
||||
'\U00000385',
|
||||
'\U00000386',
|
||||
'\U000007c0',
|
||||
|
||||
'\U00002603',
|
||||
'\U000033ff',
|
||||
'\U00003400',
|
||||
'\U00003401',
|
||||
'\U00003402',
|
||||
'\U00004dc0',
|
||||
|
||||
'\U00009fd5',
|
||||
'\U00009fd6',
|
||||
'\U00009fff',
|
||||
'\U0000a000',
|
||||
0xdc00, // '\U0000dc00' (Low Surrogate) is an invalid Go literal.
|
||||
'\U0000f800',
|
||||
'\U0000fffc',
|
||||
'\U0000fffd',
|
||||
'\U0000fffe',
|
||||
'\U0000ffff',
|
||||
|
||||
'\U00010000',
|
||||
'\U0001f574',
|
||||
'\U0002fa1d',
|
||||
'\U0002fa1e',
|
||||
'\U000e0100',
|
||||
'\U000e01ef',
|
||||
'\U000e01f0',
|
||||
'\U00100000',
|
||||
'\U0010fffd',
|
||||
'\U0010fffe',
|
||||
'\U0010ffff',
|
||||
}
|
||||
|
||||
for _, r := range runes {
|
||||
fmt.Printf("%08x %q\n", r, runenames.Name(r))
|
||||
}
|
||||
|
||||
// Output:
|
||||
// -0000001 ""
|
||||
// 00000000 "<control>"
|
||||
// 0000001f "<control>"
|
||||
// 00000020 "SPACE"
|
||||
// 00000021 "EXCLAMATION MARK"
|
||||
// 00000041 "LATIN CAPITAL LETTER A"
|
||||
// 0000007e "TILDE"
|
||||
// 0000007f "<control>"
|
||||
// 00000080 "<control>"
|
||||
// 000000e0 "LATIN SMALL LETTER A WITH GRAVE"
|
||||
// 0000037f "GREEK CAPITAL LETTER YOT"
|
||||
// 00000380 ""
|
||||
// 00000381 ""
|
||||
// 00000382 ""
|
||||
// 00000383 ""
|
||||
// 00000384 "GREEK TONOS"
|
||||
// 00000385 "GREEK DIALYTIKA TONOS"
|
||||
// 00000386 "GREEK CAPITAL LETTER ALPHA WITH TONOS"
|
||||
// 000007c0 "NKO DIGIT ZERO"
|
||||
// 00002603 "SNOWMAN"
|
||||
// 000033ff "SQUARE GAL"
|
||||
// 00003400 "<CJK Ideograph Extension A>"
|
||||
// 00003401 "<CJK Ideograph Extension A>"
|
||||
// 00003402 "<CJK Ideograph Extension A>"
|
||||
// 00004dc0 "HEXAGRAM FOR THE CREATIVE HEAVEN"
|
||||
// 00009fd5 "<CJK Ideograph>"
|
||||
// 00009fd6 ""
|
||||
// 00009fff ""
|
||||
// 0000a000 "YI SYLLABLE IT"
|
||||
// 0000dc00 "<Low Surrogate>"
|
||||
// 0000f800 "<Private Use>"
|
||||
// 0000fffc "OBJECT REPLACEMENT CHARACTER"
|
||||
// 0000fffd "REPLACEMENT CHARACTER"
|
||||
// 0000fffe ""
|
||||
// 0000ffff ""
|
||||
// 00010000 "LINEAR B SYLLABLE B008 A"
|
||||
// 0001f574 "MAN IN BUSINESS SUIT LEVITATING"
|
||||
// 0002fa1d "CJK COMPATIBILITY IDEOGRAPH-2FA1D"
|
||||
// 0002fa1e ""
|
||||
// 000e0100 "VARIATION SELECTOR-17"
|
||||
// 000e01ef "VARIATION SELECTOR-256"
|
||||
// 000e01f0 ""
|
||||
// 00100000 "<Plane 16 Private Use>"
|
||||
// 0010fffd "<Plane 16 Private Use>"
|
||||
// 0010fffe ""
|
||||
// 0010ffff ""
|
||||
}
|
||||
195
vendor/golang.org/x/text/unicode/runenames/gen.go
generated
vendored
195
vendor/golang.org/x/text/unicode/runenames/gen.go
generated
vendored
|
|
@ -1,195 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
// snippet is a slice of data; data is the concatenation of all of the names.
|
||||
type snippet struct {
|
||||
offset int
|
||||
length int
|
||||
s string
|
||||
}
|
||||
|
||||
func makeTable0EntryDirect(rOffset, rLength, dOffset, dLength int) uint64 {
|
||||
if rOffset >= 1<<bitsRuneOffset {
|
||||
log.Fatalf("makeTable0EntryDirect: rOffset %d is too large", rOffset)
|
||||
}
|
||||
if rLength >= 1<<bitsRuneLength {
|
||||
log.Fatalf("makeTable0EntryDirect: rLength %d is too large", rLength)
|
||||
}
|
||||
if dOffset >= 1<<bitsDataOffset {
|
||||
log.Fatalf("makeTable0EntryDirect: dOffset %d is too large", dOffset)
|
||||
}
|
||||
if dLength >= 1<<bitsRuneLength {
|
||||
log.Fatalf("makeTable0EntryDirect: dLength %d is too large", dLength)
|
||||
}
|
||||
return uint64(rOffset)<<shiftRuneOffset |
|
||||
uint64(rLength)<<shiftRuneLength |
|
||||
uint64(dOffset)<<shiftDataOffset |
|
||||
uint64(dLength)<<shiftDataLength |
|
||||
1 // Direct bit.
|
||||
}
|
||||
|
||||
func makeTable0EntryIndirect(rOffset, rLength, dBase, t1Offset int) uint64 {
|
||||
if rOffset >= 1<<bitsRuneOffset {
|
||||
log.Fatalf("makeTable0EntryIndirect: rOffset %d is too large", rOffset)
|
||||
}
|
||||
if rLength >= 1<<bitsRuneLength {
|
||||
log.Fatalf("makeTable0EntryIndirect: rLength %d is too large", rLength)
|
||||
}
|
||||
if dBase >= 1<<bitsDataBase {
|
||||
log.Fatalf("makeTable0EntryIndirect: dBase %d is too large", dBase)
|
||||
}
|
||||
if t1Offset >= 1<<bitsTable1Offset {
|
||||
log.Fatalf("makeTable0EntryIndirect: t1Offset %d is too large", t1Offset)
|
||||
}
|
||||
return uint64(rOffset)<<shiftRuneOffset |
|
||||
uint64(rLength)<<shiftRuneLength |
|
||||
uint64(dBase)<<shiftDataBase |
|
||||
uint64(t1Offset)<<shiftTable1Offset |
|
||||
0 // Direct bit.
|
||||
}
|
||||
|
||||
func makeTable1Entry(x int) uint16 {
|
||||
if x < 0 || 0xffff < x {
|
||||
log.Fatalf("makeTable1Entry: entry %d is out of range", x)
|
||||
}
|
||||
return uint16(x)
|
||||
}
|
||||
|
||||
var (
|
||||
data []byte
|
||||
snippets = make([]snippet, 1+unicode.MaxRune)
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
names, counts := parse()
|
||||
appendRepeatNames(names, counts)
|
||||
appendUniqueNames(names, counts)
|
||||
|
||||
table0, table1 := makeTables()
|
||||
|
||||
gen.Repackage("gen_bits.go", "bits.go", "runenames")
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
w.WriteVar("table0", table0)
|
||||
w.WriteVar("table1", table1)
|
||||
w.WriteConst("data", string(data))
|
||||
w.WriteGoFile("tables.go", "runenames")
|
||||
}
|
||||
|
||||
func parse() (names []string, counts map[string]int) {
|
||||
names = make([]string, 1+unicode.MaxRune)
|
||||
counts = map[string]int{}
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
r, s := p.Rune(0), p.String(ucd.Name)
|
||||
if s == "" {
|
||||
return
|
||||
}
|
||||
if s[0] == '<' {
|
||||
const first = ", First>"
|
||||
if i := strings.Index(s, first); i >= 0 {
|
||||
s = s[:i] + ">"
|
||||
}
|
||||
}
|
||||
names[r] = s
|
||||
counts[s]++
|
||||
})
|
||||
return names, counts
|
||||
}
|
||||
|
||||
func appendRepeatNames(names []string, counts map[string]int) {
|
||||
alreadySeen := map[string]snippet{}
|
||||
for r, s := range names {
|
||||
if s == "" || counts[s] == 1 {
|
||||
continue
|
||||
}
|
||||
if s[0] != '<' {
|
||||
log.Fatalf("Repeated name %q does not start with a '<'", s)
|
||||
}
|
||||
|
||||
if z, ok := alreadySeen[s]; ok {
|
||||
snippets[r] = z
|
||||
continue
|
||||
}
|
||||
|
||||
z := snippet{
|
||||
offset: len(data),
|
||||
length: len(s),
|
||||
s: s,
|
||||
}
|
||||
data = append(data, s...)
|
||||
snippets[r] = z
|
||||
alreadySeen[s] = z
|
||||
}
|
||||
}
|
||||
|
||||
func appendUniqueNames(names []string, counts map[string]int) {
|
||||
for r, s := range names {
|
||||
if s == "" || counts[s] != 1 {
|
||||
continue
|
||||
}
|
||||
if s[0] == '<' {
|
||||
log.Fatalf("Unique name %q starts with a '<'", s)
|
||||
}
|
||||
|
||||
z := snippet{
|
||||
offset: len(data),
|
||||
length: len(s),
|
||||
s: s,
|
||||
}
|
||||
data = append(data, s...)
|
||||
snippets[r] = z
|
||||
}
|
||||
}
|
||||
|
||||
func makeTables() (table0 []uint64, table1 []uint16) {
|
||||
for i := 0; i < len(snippets); {
|
||||
zi := snippets[i]
|
||||
if zi == (snippet{}) {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for repeat names. If we have one, we only need a table0 entry.
|
||||
j := i + 1
|
||||
for ; j < len(snippets) && zi == snippets[j]; j++ {
|
||||
}
|
||||
if j > i+1 {
|
||||
table0 = append(table0, makeTable0EntryDirect(i, j-i, zi.offset, zi.length))
|
||||
i = j
|
||||
continue
|
||||
}
|
||||
|
||||
// Otherwise, we have a run of unique names. We need one table0 entry
|
||||
// and two or more table1 entries.
|
||||
base := zi.offset &^ (1<<dataBaseUnit - 1)
|
||||
t1Offset := len(table1) + 1
|
||||
table1 = append(table1, makeTable1Entry(zi.offset-base))
|
||||
table1 = append(table1, makeTable1Entry(zi.offset+zi.length-base))
|
||||
for ; j < len(snippets) && snippets[j] != (snippet{}); j++ {
|
||||
zj := snippets[j]
|
||||
if data[zj.offset] == '<' {
|
||||
break
|
||||
}
|
||||
table1 = append(table1, makeTable1Entry(zj.offset+zj.length-base))
|
||||
}
|
||||
table0 = append(table0, makeTable0EntryIndirect(i, j-i, base>>dataBaseUnit, t1Offset))
|
||||
i = j
|
||||
}
|
||||
return table0, table1
|
||||
}
|
||||
63
vendor/golang.org/x/text/unicode/runenames/gen_bits.go
generated
vendored
63
vendor/golang.org/x/text/unicode/runenames/gen_bits.go
generated
vendored
|
|
@ -1,63 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to gen.go and the package code.
|
||||
|
||||
// The mapping from rune to string (i.e. offset and length in the data string)
|
||||
// is encoded as a two level table. The first level maps from contiguous rune
|
||||
// ranges [runeOffset, runeOffset+runeLength) to entries. Entries are either
|
||||
// direct (for repeated names such as "<CJK Ideograph>") or indirect (for runs
|
||||
// of unique names such as "SPACE", "EXCLAMATION MARK", "QUOTATION MARK", ...).
|
||||
//
|
||||
// Each first level table element is 64 bits. The runeOffset (21 bits) and
|
||||
// runeLength (16 bits) take the 37 high bits. The entry takes the 27 low bits,
|
||||
// with directness encoded in the least significant bit.
|
||||
//
|
||||
// A direct entry encodes a dataOffset (18 bits) and dataLength (8 bits) in the
|
||||
// data string. 18 bits is too short to encode the entire data string's length,
|
||||
// but the data string's contents are arranged so that all of the few direct
|
||||
// entries' offsets come before all of the many indirect entries' offsets.
|
||||
//
|
||||
// An indirect entry encodes a dataBase (10 bits) and a table1Offset (16 bits).
|
||||
// The table1Offset is the start of a range in the second level table. The
|
||||
// length of that range is the same as the runeLength.
|
||||
//
|
||||
// Each second level table element is 16 bits, an index into data, relative to
|
||||
// a bias equal to (dataBase << dataBaseUnit). That (bias + index) is the
|
||||
// (dataOffset + dataLength) in the data string. The dataOffset is implied by
|
||||
// the previous table element (with the same implicit bias).
|
||||
|
||||
const (
|
||||
bitsRuneOffset = 21
|
||||
bitsRuneLength = 16
|
||||
bitsDataOffset = 18
|
||||
bitsDataLength = 8
|
||||
bitsDirect = 1
|
||||
|
||||
bitsDataBase = 10
|
||||
bitsTable1Offset = 16
|
||||
|
||||
shiftRuneOffset = 0 + bitsDirect + bitsDataLength + bitsDataOffset + bitsRuneLength
|
||||
shiftRuneLength = 0 + bitsDirect + bitsDataLength + bitsDataOffset
|
||||
shiftDataOffset = 0 + bitsDirect + bitsDataLength
|
||||
shiftDataLength = 0 + bitsDirect
|
||||
shiftDirect = 0
|
||||
|
||||
shiftDataBase = 0 + bitsDirect + bitsTable1Offset
|
||||
shiftTable1Offset = 0 + bitsDirect
|
||||
|
||||
maskRuneLength = 1<<bitsRuneLength - 1
|
||||
maskDataOffset = 1<<bitsDataOffset - 1
|
||||
maskDataLength = 1<<bitsDataLength - 1
|
||||
maskDirect = 1<<bitsDirect - 1
|
||||
|
||||
maskDataBase = 1<<bitsDataBase - 1
|
||||
maskTable1Offset = 1<<bitsTable1Offset - 1
|
||||
|
||||
dataBaseUnit = 10
|
||||
)
|
||||
48
vendor/golang.org/x/text/unicode/runenames/runenames.go
generated
vendored
48
vendor/golang.org/x/text/unicode/runenames/runenames.go
generated
vendored
|
|
@ -1,48 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_bits.go
|
||||
|
||||
// Package runenames provides rune names from the Unicode Character Database.
|
||||
// For example, the name for '\u0100' is "LATIN CAPITAL LETTER A WITH MACRON".
|
||||
//
|
||||
// See http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
|
||||
package runenames
|
||||
|
||||
import (
|
||||
"sort"
|
||||
)
|
||||
|
||||
// Name returns the name for r.
|
||||
func Name(r rune) string {
|
||||
i := sort.Search(len(table0), func(j int) bool {
|
||||
e := table0[j]
|
||||
rOffset := rune(e >> shiftRuneOffset)
|
||||
return r < rOffset
|
||||
})
|
||||
if i == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
e := table0[i-1]
|
||||
rOffset := rune(e >> shiftRuneOffset)
|
||||
rLength := rune(e>>shiftRuneLength) & maskRuneLength
|
||||
if r >= rOffset+rLength {
|
||||
return ""
|
||||
}
|
||||
|
||||
if (e>>shiftDirect)&maskDirect != 0 {
|
||||
o := int(e>>shiftDataOffset) & maskDataOffset
|
||||
n := int(e>>shiftDataLength) & maskDataLength
|
||||
return data[o : o+n]
|
||||
}
|
||||
|
||||
base := uint32(e>>shiftDataBase) & maskDataBase
|
||||
base <<= dataBaseUnit
|
||||
j := rune(e>>shiftTable1Offset) & maskTable1Offset
|
||||
j += r - rOffset
|
||||
d0 := base + uint32(table1[j-1]) // dataOffset
|
||||
d1 := base + uint32(table1[j-0]) // dataOffset + dataLength
|
||||
return data[d0:d1]
|
||||
}
|
||||
46
vendor/golang.org/x/text/unicode/runenames/runenames_test.go
generated
vendored
46
vendor/golang.org/x/text/unicode/runenames/runenames_test.go
generated
vendored
|
|
@ -1,46 +0,0 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runenames
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"unicode"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/testtext"
|
||||
"golang.org/x/text/internal/ucd"
|
||||
)
|
||||
|
||||
func TestName(t *testing.T) {
|
||||
testtext.SkipIfNotLong(t)
|
||||
|
||||
wants := make([]string, 1+unicode.MaxRune)
|
||||
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
|
||||
r, s := p.Rune(0), p.String(ucd.Name)
|
||||
if s == "" {
|
||||
return
|
||||
}
|
||||
if s[0] == '<' {
|
||||
const first = ", First>"
|
||||
if i := strings.Index(s, first); i >= 0 {
|
||||
s = s[:i] + ">"
|
||||
}
|
||||
}
|
||||
wants[r] = s
|
||||
})
|
||||
|
||||
nErrors := 0
|
||||
for r, want := range wants {
|
||||
got := Name(rune(r))
|
||||
if got != want {
|
||||
t.Errorf("r=%#08x: got %q, want %q", r, got, want)
|
||||
nErrors++
|
||||
if nErrors == 100 {
|
||||
t.Fatal("too many errors")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
15514
vendor/golang.org/x/text/unicode/runenames/tables.go
generated
vendored
15514
vendor/golang.org/x/text/unicode/runenames/tables.go
generated
vendored
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue