dep prune

This commit is contained in:
dhax 2017-11-15 18:48:32 +01:00
parent 65441fa5b3
commit 3a2d24baca
1564 changed files with 0 additions and 638818 deletions

View file

@ -1,198 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go gen_ranges.go
// Package bidi contains functionality for bidirectional text support.
//
// See http://www.unicode.org/reports/tr9.
//
// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
// and without notice.
package bidi // import "golang.org/x/text/unicode/bidi"
// TODO:
// The following functionality would not be hard to implement, but hinges on
// the definition of a Segmenter interface. For now this is up to the user.
// - Iterate over paragraphs
// - Segmenter to iterate over runs directly from a given text.
// Also:
// - Transformer for reordering?
// - Transformer (validator, really) for Bidi Rule.
// This API tries to avoid dealing with embedding levels for now. Under the hood
// these will be computed, but the question is to which extent the user should
// know they exist. We should at some point allow the user to specify an
// embedding hierarchy, though.
// A Direction indicates the overall flow of text.
type Direction int
const (
// LeftToRight indicates the text contains no right-to-left characters and
// that either there are some left-to-right characters or the option
// DefaultDirection(LeftToRight) was passed.
LeftToRight Direction = iota
// RightToLeft indicates the text contains no left-to-right characters and
// that either there are some right-to-left characters or the option
// DefaultDirection(RightToLeft) was passed.
RightToLeft
// Mixed indicates text contains both left-to-right and right-to-left
// characters.
Mixed
// Neutral means that text contains no left-to-right and right-to-left
// characters and that no default direction has been set.
Neutral
)
type options struct{}
// An Option is an option for Bidi processing.
type Option func(*options)
// ICU allows the user to define embedding levels. This may be used, for example,
// to use hierarchical structure of markup languages to define embeddings.
// The following option may be a way to expose this functionality in this API.
// // LevelFunc sets a function that associates nesting levels with the given text.
// // The levels function will be called with monotonically increasing values for p.
// func LevelFunc(levels func(p int) int) Option {
// panic("unimplemented")
// }
// DefaultDirection sets the default direction for a Paragraph. The direction is
// overridden if the text contains directional characters.
func DefaultDirection(d Direction) Option {
panic("unimplemented")
}
// A Paragraph holds a single Paragraph for Bidi processing.
type Paragraph struct {
// buffers
}
// SetBytes configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
panic("unimplemented")
}
// SetString configures p for the given paragraph text. It replaces text
// previously set by SetBytes or SetString. If b contains a paragraph separator
// it will only process the first paragraph and report the number of bytes
// consumed from b including this separator. Error may be non-nil if options are
// given.
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
panic("unimplemented")
}
// IsLeftToRight reports whether the principle direction of rendering for this
// paragraphs is left-to-right. If this returns false, the principle direction
// of rendering is right-to-left.
func (p *Paragraph) IsLeftToRight() bool {
panic("unimplemented")
}
// Direction returns the direction of the text of this paragraph.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (p *Paragraph) Direction() Direction {
panic("unimplemented")
}
// RunAt reports the Run at the given position of the input text.
//
// This method can be used for computing line breaks on paragraphs.
func (p *Paragraph) RunAt(pos int) Run {
panic("unimplemented")
}
// Order computes the visual ordering of all the runs in a Paragraph.
func (p *Paragraph) Order() (Ordering, error) {
panic("unimplemented")
}
// Line computes the visual ordering of runs for a single line starting and
// ending at the given positions in the original text.
func (p *Paragraph) Line(start, end int) (Ordering, error) {
panic("unimplemented")
}
// An Ordering holds the computed visual order of runs of a Paragraph. Calling
// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
// The methods of an Ordering should only be called by one goroutine at a time.
type Ordering struct{}
// Direction reports the directionality of the runs.
//
// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
func (o *Ordering) Direction() Direction {
panic("unimplemented")
}
// NumRuns returns the number of runs.
func (o *Ordering) NumRuns() int {
panic("unimplemented")
}
// Run returns the ith run within the ordering.
func (o *Ordering) Run(i int) Run {
panic("unimplemented")
}
// TODO: perhaps with options.
// // Reorder creates a reader that reads the runes in visual order per character.
// // Modifiers remain after the runes they modify.
// func (l *Runs) Reorder() io.Reader {
// panic("unimplemented")
// }
// A Run is a continuous sequence of characters of a single direction.
type Run struct {
}
// String returns the text of the run in its original order.
func (r *Run) String() string {
panic("unimplemented")
}
// Bytes returns the text of the run in its original order.
func (r *Run) Bytes() []byte {
panic("unimplemented")
}
// TODO: methods for
// - Display order
// - headers and footers
// - bracket replacement.
// Direction reports the direction of the run.
func (r *Run) Direction() Direction {
panic("unimplemented")
}
// Position of the Run within the text passed to SetBytes or SetString of the
// originating Paragraph value.
func (r *Run) Pos() (start, end int) {
panic("unimplemented")
}
// AppendReverse reverses the order of characters of in, appends them to out,
// and returns the result. Modifiers will still follow the runes they modify.
// Brackets are replaced with their counterparts.
func AppendReverse(out, in []byte) []byte {
panic("unimplemented")
}
// ReverseString reverses the order of characters in s and returns a new string.
// Modifiers will still follow the runes they modify. Brackets are replaced with
// their counterparts.
func ReverseString(s string) string {
panic("unimplemented")
}

View file

@ -1,335 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"container/list"
"fmt"
"sort"
)
// This file contains a port of the reference implementation of the
// Bidi Parentheses Algorithm:
// http://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java
//
// The implementation in this file covers definitions BD14-BD16 and rule N0
// of UAX#9.
//
// Some preprocessing is done for each rune before data is passed to this
// algorithm:
// - opening and closing brackets are identified
// - a bracket pair type, like '(' and ')' is assigned a unique identifier that
// is identical for the opening and closing bracket. It is left to do these
// mappings.
// - The BPA algorithm requires that bracket characters that are canonical
// equivalents of each other be able to be substituted for each other.
// It is the responsibility of the caller to do this canonicalization.
//
// In implementing BD16, this implementation departs slightly from the "logical"
// algorithm defined in UAX#9. In particular, the stack referenced there
// supports operations that go beyond a "basic" stack. An equivalent
// implementation based on a linked list is used here.
// Bidi_Paired_Bracket_Type
// BD14. An opening paired bracket is a character whose
// Bidi_Paired_Bracket_Type property value is Open.
//
// BD15. A closing paired bracket is a character whose
// Bidi_Paired_Bracket_Type property value is Close.
type bracketType byte
const (
bpNone bracketType = iota
bpOpen
bpClose
)
// bracketPair holds a pair of index values for opening and closing bracket
// location of a bracket pair.
type bracketPair struct {
opener int
closer int
}
func (b *bracketPair) String() string {
return fmt.Sprintf("(%v, %v)", b.opener, b.closer)
}
// bracketPairs is a slice of bracketPairs with a sort.Interface implementation.
type bracketPairs []bracketPair
func (b bracketPairs) Len() int { return len(b) }
func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener }
// resolvePairedBrackets runs the paired bracket part of the UBA algorithm.
//
// For each rune, it takes the indexes into the original string, the class the
// bracket type (in pairTypes) and the bracket identifier (pairValues). It also
// takes the direction type for the start-of-sentence and the embedding level.
//
// The identifiers for bracket types are the rune of the canonicalized opening
// bracket for brackets (open or close) or 0 for runes that are not brackets.
func resolvePairedBrackets(s *isolatingRunSequence) {
p := bracketPairer{
sos: s.sos,
openers: list.New(),
codesIsolatedRun: s.types,
indexes: s.indexes,
}
dirEmbed := L
if s.level&1 != 0 {
dirEmbed = R
}
p.locateBrackets(s.p.pairTypes, s.p.pairValues)
p.resolveBrackets(dirEmbed, s.p.initialTypes)
}
type bracketPairer struct {
sos Class // direction corresponding to start of sequence
// The following is a restatement of BD 16 using non-algorithmic language.
//
// A bracket pair is a pair of characters consisting of an opening
// paired bracket and a closing paired bracket such that the
// Bidi_Paired_Bracket property value of the former equals the latter,
// subject to the following constraints.
// - both characters of a pair occur in the same isolating run sequence
// - the closing character of a pair follows the opening character
// - any bracket character can belong at most to one pair, the earliest possible one
// - any bracket character not part of a pair is treated like an ordinary character
// - pairs may nest properly, but their spans may not overlap otherwise
// Bracket characters with canonical decompositions are supposed to be
// treated as if they had been normalized, to allow normalized and non-
// normalized text to give the same result. In this implementation that step
// is pushed out to the caller. The caller has to ensure that the pairValue
// slices contain the rune of the opening bracket after normalization for
// any opening or closing bracket.
openers *list.List // list of positions for opening brackets
// bracket pair positions sorted by location of opening bracket
pairPositions bracketPairs
codesIsolatedRun []Class // directional bidi codes for an isolated run
indexes []int // array of index values into the original string
}
// matchOpener reports whether characters at given positions form a matching
// bracket pair.
func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool {
return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]]
}
const maxPairingDepth = 63
// locateBrackets locates matching bracket pairs according to BD16.
//
// This implementation uses a linked list instead of a stack, because, while
// elements are added at the front (like a push) they are not generally removed
// in atomic 'pop' operations, reducing the benefit of the stack archetype.
func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) {
// traverse the run
// do that explicitly (not in a for-each) so we can record position
for i, index := range p.indexes {
// look at the bracket type for each character
if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON {
// continue scanning
continue
}
switch pairTypes[index] {
case bpOpen:
// check if maximum pairing depth reached
if p.openers.Len() == maxPairingDepth {
p.openers.Init()
return
}
// remember opener location, most recent first
p.openers.PushFront(i)
case bpClose:
// see if there is a match
count := 0
for elem := p.openers.Front(); elem != nil; elem = elem.Next() {
count++
opener := elem.Value.(int)
if p.matchOpener(pairValues, opener, i) {
// if the opener matches, add nested pair to the ordered list
p.pairPositions = append(p.pairPositions, bracketPair{opener, i})
// remove up to and including matched opener
for ; count > 0; count-- {
p.openers.Remove(p.openers.Front())
}
break
}
}
sort.Sort(p.pairPositions)
// if we get here, the closing bracket matched no openers
// and gets ignored
}
}
}
// Bracket pairs within an isolating run sequence are processed as units so
// that both the opening and the closing paired bracket in a pair resolve to
// the same direction.
//
// N0. Process bracket pairs in an isolating run sequence sequentially in
// the logical order of the text positions of the opening paired brackets
// using the logic given below. Within this scope, bidirectional types EN
// and AN are treated as R.
//
// Identify the bracket pairs in the current isolating run sequence
// according to BD16. For each bracket-pair element in the list of pairs of
// text positions:
//
// a Inspect the bidirectional types of the characters enclosed within the
// bracket pair.
//
// b If any strong type (either L or R) matching the embedding direction is
// found, set the type for both brackets in the pair to match the embedding
// direction.
//
// o [ e ] o -> o e e e o
//
// o [ o e ] -> o e o e e
//
// o [ NI e ] -> o e NI e e
//
// c Otherwise, if a strong type (opposite the embedding direction) is
// found, test for adjacent strong types as follows: 1 First, check
// backwards before the opening paired bracket until the first strong type
// (L, R, or sos) is found. If that first preceding strong type is opposite
// the embedding direction, then set the type for both brackets in the pair
// to that type. 2 Otherwise, set the type for both brackets in the pair to
// the embedding direction.
//
// o [ o ] e -> o o o o e
//
// o [ o NI ] o -> o o o NI o o
//
// e [ o ] o -> e e o e o
//
// e [ o ] e -> e e o e e
//
// e ( o [ o ] NI ) e -> e e o o o o NI e e
//
// d Otherwise, do not set the type for the current bracket pair. Note that
// if the enclosed text contains no strong types the paired brackets will
// both resolve to the same level when resolved individually using rules N1
// and N2.
//
// e ( NI ) o -> e ( NI ) o
// getStrongTypeN0 maps character's directional code to strong type as required
// by rule N0.
//
// TODO: have separate type for "strong" directionality.
func (p *bracketPairer) getStrongTypeN0(index int) Class {
switch p.codesIsolatedRun[index] {
// in the scope of N0, number types are treated as R
case EN, AN, AL, R:
return R
case L:
return L
default:
return ON
}
}
// classifyPairContent reports the strong types contained inside a Bracket Pair,
// assuming the given embedding direction.
//
// It returns ON if no strong type is found. If a single strong type is found,
// it returns this this type. Otherwise it returns the embedding direction.
//
// TODO: use separate type for "strong" directionality.
func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class {
dirOpposite := ON
for i := loc.opener + 1; i < loc.closer; i++ {
dir := p.getStrongTypeN0(i)
if dir == ON {
continue
}
if dir == dirEmbed {
return dir // type matching embedding direction found
}
dirOpposite = dir
}
// return ON if no strong type found, or class opposite to dirEmbed
return dirOpposite
}
// classBeforePair determines which strong types are present before a Bracket
// Pair. Return R or L if strong type found, otherwise ON.
func (p *bracketPairer) classBeforePair(loc bracketPair) Class {
for i := loc.opener - 1; i >= 0; i-- {
if dir := p.getStrongTypeN0(i); dir != ON {
return dir
}
}
// no strong types found, return sos
return p.sos
}
// assignBracketType implements rule N0 for a single bracket pair.
func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) {
// rule "N0, a", inspect contents of pair
dirPair := p.classifyPairContent(loc, dirEmbed)
// dirPair is now L, R, or N (no strong type found)
// the following logical tests are performed out of order compared to
// the statement of the rules but yield the same results
if dirPair == ON {
return // case "d" - nothing to do
}
if dirPair != dirEmbed {
// case "c": strong type found, opposite - check before (c.1)
dirPair = p.classBeforePair(loc)
if dirPair == dirEmbed || dirPair == ON {
// no strong opposite type found before - use embedding (c.2)
dirPair = dirEmbed
}
}
// else: case "b", strong type found matching embedding,
// no explicit action needed, as dirPair is already set to embedding
// direction
// set the bracket types to the type found
p.setBracketsToType(loc, dirPair, initialTypes)
}
func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) {
p.codesIsolatedRun[loc.opener] = dirPair
p.codesIsolatedRun[loc.closer] = dirPair
for i := loc.opener + 1; i < loc.closer; i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
for i := loc.closer + 1; i < len(p.indexes); i++ {
index := p.indexes[i]
if initialTypes[index] != NSM {
break
}
p.codesIsolatedRun[i] = dirPair
}
}
// resolveBrackets implements rule N0 for a list of pairs.
func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) {
for _, loc := range p.pairPositions {
p.assignBracketType(loc, dirEmbed, initialTypes)
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,224 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"flag"
"fmt"
"log"
"strconv"
"strings"
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/norm"
)
var testLevels = flag.Bool("levels", false, "enable testing of levels")
// TestBidiCore performs the tests in BidiTest.txt.
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt.
func TestBidiCore(t *testing.T) {
testtext.SkipIfNotLong(t)
r := gen.OpenUCDFile("BidiTest.txt")
defer r.Close()
var wantLevels, wantOrder []string
p := ucd.New(r, ucd.Part(func(p *ucd.Parser) {
s := strings.Split(p.String(0), ":")
switch s[0] {
case "Levels":
wantLevels = strings.Fields(s[1])
case "Reorder":
wantOrder = strings.Fields(s[1])
default:
log.Fatalf("Unknown part %q.", s[0])
}
}))
for p.Next() {
types := []Class{}
for _, s := range p.Strings(0) {
types = append(types, bidiClass[s])
}
// We ignore the bracketing part of the algorithm.
pairTypes := make([]bracketType, len(types))
pairValues := make([]rune, len(types))
for i := uint(0); i < 3; i++ {
if p.Uint(1)&(1<<i) == 0 {
continue
}
lev := level(int(i) - 1)
par := newParagraph(types, pairTypes, pairValues, lev)
if *testLevels {
levels := par.getLevels([]int{len(types)})
for i, s := range wantLevels {
if s == "x" {
continue
}
l, _ := strconv.ParseUint(s, 10, 8)
if level(l)&1 != levels[i]&1 {
t.Errorf("%s:%d:levels: got %v; want %v", p.String(0), lev, levels, wantLevels)
break
}
}
}
order := par.getReordering([]int{len(types)})
gotOrder := filterOrder(types, order)
if got, want := fmt.Sprint(gotOrder), fmt.Sprint(wantOrder); got != want {
t.Errorf("%s:%d:order: got %v; want %v\noriginal %v", p.String(0), lev, got, want, order)
}
}
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
}
var removeClasses = map[Class]bool{
LRO: true,
RLO: true,
RLE: true,
LRE: true,
PDF: true,
BN: true,
}
// TestBidiCharacters performs the tests in BidiCharacterTest.txt.
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
func TestBidiCharacters(t *testing.T) {
testtext.SkipIfNotLong(t)
ucd.Parse(gen.OpenUCDFile("BidiCharacterTest.txt"), func(p *ucd.Parser) {
var (
types []Class
pairTypes []bracketType
pairValues []rune
parLevel level
wantLevel = level(p.Int(2))
wantLevels = p.Strings(3)
wantVisualOrder = p.Strings(4)
)
switch l := p.Int(1); l {
case 0, 1:
parLevel = level(l)
case 2:
parLevel = implicitLevel
default:
// Spec says to ignore unknown parts.
}
runes := p.Runes(0)
for _, r := range runes {
// Assign the bracket type.
if d := norm.NFKD.PropertiesString(string(r)).Decomposition(); d != nil {
r = []rune(string(d))[0]
}
p, _ := LookupRune(r)
// Assign the class for this rune.
types = append(types, p.Class())
switch {
case !p.IsBracket():
pairTypes = append(pairTypes, bpNone)
pairValues = append(pairValues, 0)
case p.IsOpeningBracket():
pairTypes = append(pairTypes, bpOpen)
pairValues = append(pairValues, r)
default:
pairTypes = append(pairTypes, bpClose)
pairValues = append(pairValues, p.reverseBracket(r))
}
}
par := newParagraph(types, pairTypes, pairValues, parLevel)
// Test results:
if got := par.embeddingLevel; got != wantLevel {
t.Errorf("%v:level: got %d; want %d", string(runes), got, wantLevel)
}
if *testLevels {
gotLevels := getLevelStrings(types, par.getLevels([]int{len(types)}))
if got, want := fmt.Sprint(gotLevels), fmt.Sprint(wantLevels); got != want {
t.Errorf("%04X %q:%d: got %v; want %v\nval: %x\npair: %v", runes, string(runes), parLevel, got, want, pairValues, pairTypes)
}
}
order := par.getReordering([]int{len(types)})
order = filterOrder(types, order)
if got, want := fmt.Sprint(order), fmt.Sprint(wantVisualOrder); got != want {
t.Errorf("%04X %q:%d: got %v; want %v\ngot order: %s", runes, string(runes), parLevel, got, want, reorder(runes, order))
}
})
}
func getLevelStrings(cl []Class, levels []level) []string {
var results []string
for i, l := range levels {
if !removeClasses[cl[i]] {
results = append(results, fmt.Sprint(l))
} else {
results = append(results, "x")
}
}
return results
}
func filterOrder(cl []Class, order []int) []int {
no := []int{}
for _, o := range order {
if !removeClasses[cl[o]] {
no = append(no, o)
}
}
return no
}
func reorder(r []rune, order []int) string {
nr := make([]rune, len(order))
for i, o := range order {
nr[i] = r[o]
}
return string(nr)
}
// bidiClass names and codes taken from class "bc" in
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
var bidiClass = map[string]Class{
"AL": AL, // classArabicLetter,
"AN": AN, // classArabicNumber,
"B": B, // classParagraphSeparator,
"BN": BN, // classBoundaryNeutral,
"CS": CS, // classCommonSeparator,
"EN": EN, // classEuropeanNumber,
"ES": ES, // classEuropeanSeparator,
"ET": ET, // classEuropeanTerminator,
"L": L, // classLeftToRight,
"NSM": NSM, // classNonspacingMark,
"ON": ON, // classOtherNeutral,
"R": R, // classRightToLeft,
"S": S, // classSegmentSeparator,
"WS": WS, // classWhiteSpace,
"LRO": LRO, // classLeftToRightOverride,
"RLO": RLO, // classRightToLeftOverride,
"LRE": LRE, // classLeftToRightEmbedding,
"RLE": RLE, // classRightToLeftEmbedding,
"PDF": PDF, // classPopDirectionalFormat,
"LRI": LRI, // classLeftToRightIsolate,
"RLI": RLI, // classRightToLeftIsolate,
"FSI": FSI, // classFirstStrongIsolate,
"PDI": PDI, // classPopDirectionalIsolate,
}

View file

@ -1,133 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"flag"
"log"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
)
var outputFile = flag.String("out", "tables.go", "output file")
func main() {
gen.Init()
gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
genTables()
}
// bidiClass names and codes taken from class "bc" in
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
var bidiClass = map[string]Class{
"AL": AL, // ArabicLetter
"AN": AN, // ArabicNumber
"B": B, // ParagraphSeparator
"BN": BN, // BoundaryNeutral
"CS": CS, // CommonSeparator
"EN": EN, // EuropeanNumber
"ES": ES, // EuropeanSeparator
"ET": ET, // EuropeanTerminator
"L": L, // LeftToRight
"NSM": NSM, // NonspacingMark
"ON": ON, // OtherNeutral
"R": R, // RightToLeft
"S": S, // SegmentSeparator
"WS": WS, // WhiteSpace
"FSI": Control,
"PDF": Control,
"PDI": Control,
"LRE": Control,
"LRI": Control,
"LRO": Control,
"RLE": Control,
"RLI": Control,
"RLO": Control,
}
func genTables() {
if numClass > 0x0F {
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, "bidi")
gen.WriteUnicodeVersion(w)
t := triegen.NewTrie("bidi")
// Build data about bracket mapping. These bits need to be or-ed with
// any other bits.
orMask := map[rune]uint64{}
xorMap := map[rune]int{}
xorMasks := []rune{0} // First value is no-op.
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
r2 := p.Rune(1)
xor := r1 ^ r2
if _, ok := xorMap[xor]; !ok {
xorMap[xor] = len(xorMasks)
xorMasks = append(xorMasks, xor)
}
entry := uint64(xorMap[xor]) << xorMaskShift
switch p.String(2) {
case "o":
entry |= openMask
case "c", "n":
default:
log.Fatalf("Unknown bracket class %q.", p.String(2))
}
orMask[r1] = entry
})
w.WriteComment(`
xorMasks contains masks to be xor-ed with brackets to get the reverse
version.`)
w.WriteVar("xorMasks", xorMasks)
done := map[rune]bool{}
insert := func(r rune, c Class) {
if !done[r] {
t.Insert(r, orMask[r]|uint64(c))
done[r] = true
}
}
// Insert the derived BiDi properties.
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
class, ok := bidiClass[p.String(1)]
if !ok {
log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
}
insert(r, class)
})
visitDefaults(insert)
// TODO: use sparse blocks. This would reduce table size considerably
// from the looks of it.
sz, err := t.Gen(w)
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
// dummy values to make methods in gen_common compile. The real versions
// will be generated by this file to tables.go.
var (
xorMasks []rune
)

View file

@ -1,57 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/rangetable"
)
// These tables are hand-extracted from:
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
func visitDefaults(fn func(r rune, c Class)) {
// first write default values for ranges listed above.
visitRunes(fn, AL, []rune{
0x0600, 0x07BF, // Arabic
0x08A0, 0x08FF, // Arabic Extended-A
0xFB50, 0xFDCF, // Arabic Presentation Forms
0xFDF0, 0xFDFF,
0xFE70, 0xFEFF,
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
})
visitRunes(fn, R, []rune{
0x0590, 0x05FF, // Hebrew
0x07C0, 0x089F, // Nko et al.
0xFB1D, 0xFB4F,
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
0x0001E800, 0x0001EDFF,
0x0001EF00, 0x0001EFFF,
})
visitRunes(fn, ET, []rune{ // European Terminator
0x20A0, 0x20Cf, // Currency symbols
})
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, BN) // Boundary Neutral
})
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), BN) // Boundary Neutral
}
})
}
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
for i := 0; i < len(runes); i += 2 {
lo, hi := runes[i], runes[i+1]
for j := lo; j <= hi; j++ {
fn(j, c)
}
}
}

View file

@ -1,64 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// Class is the Unicode BiDi class. Each rune has a single class.
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
numClass
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
unknownClass = ^Class(0)
)
var controlToClass = map[rune]Class{
0x202D: LRO, // LeftToRightOverride,
0x202E: RLO, // RightToLeftOverride,
0x202A: LRE, // LeftToRightEmbedding,
0x202B: RLE, // RightToLeftEmbedding,
0x202C: PDF, // PopDirectionalFormat,
0x2066: LRI, // LeftToRightIsolate,
0x2067: RLI, // RightToLeftIsolate,
0x2068: FSI, // FirstStrongIsolate,
0x2069: PDI, // PopDirectionalIsolate,
}
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
// 3..0 Class type
const (
openMask = 0x10
xorMaskShift = 5
)

View file

@ -1,206 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import "unicode/utf8"
// Properties provides access to BiDi properties of runes.
type Properties struct {
entry uint8
last uint8
}
var trie = newBidiTrie(0)
// TODO: using this for bidirule reduces the running time by about 5%. Consider
// if this is worth exposing or if we can find a way to speed up the Class
// method.
//
// // CompactClass is like Class, but maps all of the BiDi control classes
// // (LRO, RLO, LRE, RLE, PDF, LRI, RLI, FSI, PDI) to the class Control.
// func (p Properties) CompactClass() Class {
// return Class(p.entry & 0x0F)
// }
// Class returns the Bidi class for p.
func (p Properties) Class() Class {
c := Class(p.entry & 0x0F)
if c == Control {
c = controlByteToClass[p.last&0xF]
}
return c
}
// IsBracket reports whether the rune is a bracket.
func (p Properties) IsBracket() bool { return p.entry&0xF0 != 0 }
// IsOpeningBracket reports whether the rune is an opening bracket.
// IsBracket must return true.
func (p Properties) IsOpeningBracket() bool { return p.entry&openMask != 0 }
// TODO: find a better API and expose.
func (p Properties) reverseBracket(r rune) rune {
return xorMasks[p.entry>>xorMaskShift] ^ r
}
var controlByteToClass = [16]Class{
0xD: LRO, // U+202D LeftToRightOverride,
0xE: RLO, // U+202E RightToLeftOverride,
0xA: LRE, // U+202A LeftToRightEmbedding,
0xB: RLE, // U+202B RightToLeftEmbedding,
0xC: PDF, // U+202C PopDirectionalFormat,
0x6: LRI, // U+2066 LeftToRightIsolate,
0x7: RLI, // U+2067 RightToLeftIsolate,
0x8: FSI, // U+2068 FirstStrongIsolate,
0x9: PDI, // U+2069 PopDirectionalIsolate,
}
// LookupRune returns properties for r.
func LookupRune(r rune) (p Properties, size int) {
var buf [4]byte
n := utf8.EncodeRune(buf[:], r)
return Lookup(buf[:n])
}
// TODO: these lookup methods are based on the generated trie code. The returned
// sizes have slightly different semantics from the generated code, in that it
// always returns size==1 for an illegal UTF-8 byte (instead of the length
// of the maximum invalid subsequence). Most Transformers, like unicode/norm,
// leave invalid UTF-8 untouched, in which case it has performance benefits to
// do so (without changing the semantics). Bidi requires the semantics used here
// for the bidirule implementation to be compatible with the Go semantics.
// They ultimately should perhaps be adopted by all trie implementations, for
// convenience sake.
// This unrolled code also boosts performance of the secure/bidirule package by
// about 30%.
// So, to remove this code:
// - add option to trie generator to define return type.
// - always return 1 byte size for ill-formed UTF-8 runes.
// Lookup returns properties for the first rune in s and the width in bytes of
// its encoding. The size will be 0 if s does not hold enough bytes to complete
// the encoding.
func Lookup(s []byte) (p Properties, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return Properties{entry: bidiValues[c0]}, 1
case c0 < 0xC2:
return Properties{}, 1
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
o = uint32(i)<<6 + uint32(c2)
i = bidiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
}
// Illegal rune
return Properties{}, 1
}
// LookupString returns properties for the first rune in s and the width in
// bytes of its encoding. The size will be 0 if s does not hold enough bytes to
// complete the encoding.
func LookupString(s string) (p Properties, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return Properties{entry: bidiValues[c0]}, 1
case c0 < 0xC2:
return Properties{}, 1
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c1)}, 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c2), last: c2}, 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return Properties{}, 0
}
i := bidiIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return Properties{}, 1
}
o := uint32(i)<<6 + uint32(c1)
i = bidiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return Properties{}, 1
}
o = uint32(i)<<6 + uint32(c2)
i = bidiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return Properties{}, 1
}
return Properties{entry: trie.lookupValue(uint32(i), c3)}, 4
}
// Illegal rune
return Properties{}, 1
}

View file

@ -1,53 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package bidi
import (
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/rangetable"
)
// These tables are hand-extracted from:
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
func visitDefaults(fn func(r rune, c Class)) {
// first write default values for ranges listed above.
visitRunes(fn, AL, []rune{
0x0600, 0x07BF, // Arabic
0x08A0, 0x08FF, // Arabic Extended-A
0xFB50, 0xFDCF, // Arabic Presentation Forms
0xFDF0, 0xFDFF,
0xFE70, 0xFEFF,
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
})
visitRunes(fn, R, []rune{
0x0590, 0x05FF, // Hebrew
0x07C0, 0x089F, // Nko et al.
0xFB1D, 0xFB4F,
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
0x0001E800, 0x0001EDFF,
0x0001EF00, 0x0001EFFF,
})
visitRunes(fn, ET, []rune{ // European Terminator
0x20A0, 0x20Cf, // Currency symbols
})
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, BN) // Boundary Neutral
})
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), BN) // Boundary Neutral
}
})
}
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
for i := 0; i < len(runes); i += 2 {
lo, hi := runes[i], runes[i+1]
for j := lo; j <= hi; j++ {
fn(j, c)
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,82 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
var labels = []string{
AL: "AL",
AN: "AN",
B: "B",
BN: "BN",
CS: "CS",
EN: "EN",
ES: "ES",
ET: "ET",
L: "L",
NSM: "NSM",
ON: "ON",
R: "R",
S: "S",
WS: "WS",
LRO: "LRO",
RLO: "RLO",
LRE: "LRE",
RLE: "RLE",
PDF: "PDF",
LRI: "LRI",
RLI: "RLI",
FSI: "FSI",
PDI: "PDI",
}
func TestTables(t *testing.T) {
testtext.SkipIfNotLong(t)
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
want := p.Rune(1)
e, _ := LookupRune(r1)
if got := e.reverseBracket(r1); got != want {
t.Errorf("Reverse(%U) = %U; want %U", r1, got, want)
}
})
done := map[rune]bool{}
test := func(name string, r rune, want string) {
str := string(r)
e, _ := LookupString(str)
if got := labels[e.Class()]; got != want {
t.Errorf("%s:%U: got %s; want %s", name, r, got, want)
}
if e2, sz := LookupRune(r); e != e2 || sz != len(str) {
t.Errorf("LookupRune(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
}
if e2, sz := Lookup([]byte(str)); e != e2 || sz != len(str) {
t.Errorf("Lookup(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
}
done[r] = true
}
// Insert the derived BiDi properties.
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
test("derived", r, p.String(1))
})
visitDefaults(func(r rune, c Class) {
if !done[r] {
test("default", r, labels[c])
}
})
}

View file

@ -1,60 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package bidi
// Class is the Unicode BiDi class. Each rune has a single class.
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
numClass
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
unknownClass = ^Class(0)
)
var controlToClass = map[rune]Class{
0x202D: LRO, // LeftToRightOverride,
0x202E: RLO, // RightToLeftOverride,
0x202A: LRE, // LeftToRightEmbedding,
0x202B: RLE, // RightToLeftEmbedding,
0x202C: PDF, // PopDirectionalFormat,
0x2066: LRI, // LeftToRightIsolate,
0x2067: RLI, // RightToLeftIsolate,
0x2068: FSI, // FirstStrongIsolate,
0x2069: PDI, // PopDirectionalIsolate,
}
// A trie entry has the following bits:
// 7..5 XOR mask for brackets
// 4 1: Bracket open, 0: Bracket close
// 3..0 Class type
const (
openMask = 0x10
xorMaskShift = 5
)

View file

@ -1,115 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bytes"
"flag"
"fmt"
"io"
"log"
"reflect"
"strings"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
var versionList = flag.String("versions", "",
"list of versions for which to generate RangeTables")
const bootstrapMessage = `No versions specified.
To bootstrap the code generation, run:
go run gen.go --versions=4.1.0,5.0.0,6.0.0,6.1.0,6.2.0,6.3.0,7.0.0
and ensure that the latest versions are included by checking:
http://www.unicode.org/Public/`
func getVersions() []string {
if *versionList == "" {
log.Fatal(bootstrapMessage)
}
c := collate.New(language.Und, collate.Numeric)
versions := strings.Split(*versionList, ",")
c.SortStrings(versions)
// Ensure that at least the current version is included.
for _, v := range versions {
if v == gen.UnicodeVersion() {
return versions
}
}
versions = append(versions, gen.UnicodeVersion())
c.SortStrings(versions)
return versions
}
func main() {
gen.Init()
versions := getVersions()
w := &bytes.Buffer{}
fmt.Fprintf(w, "//go:generate go run gen.go --versions=%s\n\n", strings.Join(versions, ","))
fmt.Fprintf(w, "import \"unicode\"\n\n")
vstr := func(s string) string { return strings.Replace(s, ".", "_", -1) }
fmt.Fprintf(w, "var assigned = map[string]*unicode.RangeTable{\n")
for _, v := range versions {
fmt.Fprintf(w, "\t%q: assigned%s,\n", v, vstr(v))
}
fmt.Fprintf(w, "}\n\n")
var size int
for _, v := range versions {
assigned := []rune{}
r := gen.Open("http://www.unicode.org/Public/", "", v+"/ucd/UnicodeData.txt")
ucd.Parse(r, func(p *ucd.Parser) {
assigned = append(assigned, p.Rune(0))
})
rt := rangetable.New(assigned...)
sz := int(reflect.TypeOf(unicode.RangeTable{}).Size())
sz += int(reflect.TypeOf(unicode.Range16{}).Size()) * len(rt.R16)
sz += int(reflect.TypeOf(unicode.Range32{}).Size()) * len(rt.R32)
fmt.Fprintf(w, "// size %d bytes (%d KiB)\n", sz, sz/1024)
fmt.Fprintf(w, "var assigned%s = ", vstr(v))
print(w, rt)
size += sz
}
fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024)
gen.WriteGoFile("tables.go", "rangetable", w.Bytes())
}
func print(w io.Writer, rt *unicode.RangeTable) {
fmt.Fprintln(w, "&unicode.RangeTable{")
fmt.Fprintln(w, "\tR16: []unicode.Range16{")
for _, r := range rt.R16 {
fmt.Fprintf(w, "\t\t{%#04x, %#04x, %d},\n", r.Lo, r.Hi, r.Stride)
}
fmt.Fprintln(w, "\t},")
fmt.Fprintln(w, "\tR32: []unicode.Range32{")
for _, r := range rt.R32 {
fmt.Fprintf(w, "\t\t{%#08x, %#08x, %d},\n", r.Lo, r.Hi, r.Stride)
}
fmt.Fprintln(w, "\t},")
fmt.Fprintf(w, "\tLatinOffset: %d,\n", rt.LatinOffset)
fmt.Fprintf(w, "}\n\n")
}

View file

@ -1,260 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rangetable
import (
"unicode"
)
// atEnd is used to mark a completed iteration.
const atEnd = unicode.MaxRune + 1
// Merge returns a new RangeTable that is the union of the given tables.
// It can also be used to compact user-created RangeTables. The entries in
// R16 and R32 for any given RangeTable should be sorted and non-overlapping.
//
// A lookup in the resulting table can be several times faster than using In
// directly on the ranges. Merge is an expensive operation, however, and only
// makes sense if one intends to use the result for more than a couple of
// hundred lookups.
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable {
rt := &unicode.RangeTable{}
if len(ranges) == 0 {
return rt
}
iter := tablesIter(make([]tableIndex, len(ranges)))
for i, t := range ranges {
iter[i] = tableIndex{t, 0, atEnd}
if len(t.R16) > 0 {
iter[i].next = rune(t.R16[0].Lo)
}
}
if r0 := iter.next16(); r0.Stride != 0 {
for {
r1 := iter.next16()
if r1.Stride == 0 {
rt.R16 = append(rt.R16, r0)
break
}
stride := r1.Lo - r0.Hi
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
// Fully merge the next range into the previous one.
r0.Hi, r0.Stride = r1.Hi, stride
continue
} else if stride == r0.Stride {
// Move the first element of r1 to r0. This may eliminate an
// entry.
r0.Hi = r1.Lo
r0.Stride = stride
r1.Lo = r1.Lo + r1.Stride
if r1.Lo > r1.Hi {
continue
}
}
rt.R16 = append(rt.R16, r0)
r0 = r1
}
}
for i, t := range ranges {
iter[i] = tableIndex{t, 0, atEnd}
if len(t.R32) > 0 {
iter[i].next = rune(t.R32[0].Lo)
}
}
if r0 := iter.next32(); r0.Stride != 0 {
for {
r1 := iter.next32()
if r1.Stride == 0 {
rt.R32 = append(rt.R32, r0)
break
}
stride := r1.Lo - r0.Hi
if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) {
// Fully merge the next range into the previous one.
r0.Hi, r0.Stride = r1.Hi, stride
continue
} else if stride == r0.Stride {
// Move the first element of r1 to r0. This may eliminate an
// entry.
r0.Hi = r1.Lo
r1.Lo = r1.Lo + r1.Stride
if r1.Lo > r1.Hi {
continue
}
}
rt.R32 = append(rt.R32, r0)
r0 = r1
}
}
for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ {
rt.LatinOffset = i + 1
}
return rt
}
type tableIndex struct {
t *unicode.RangeTable
p uint32
next rune
}
type tablesIter []tableIndex
// sortIter does an insertion sort using the next field of tableIndex. Insertion
// sort is a good sorting algorithm for this case.
func sortIter(t []tableIndex) {
for i := range t {
for j := i; j > 0 && t[j-1].next > t[j].next; j-- {
t[j], t[j-1] = t[j-1], t[j]
}
}
}
// next16 finds the ranged to be added to the table. If ranges overlap between
// multiple tables it clips the result to a non-overlapping range if the
// elements are not fully subsumed. It returns a zero range if there are no more
// ranges.
func (ti tablesIter) next16() unicode.Range16 {
sortIter(ti)
t0 := ti[0]
if t0.next == atEnd {
return unicode.Range16{}
}
r0 := t0.t.R16[t0.p]
r0.Lo = uint16(t0.next)
// We restrict the Hi of the current range if it overlaps with another range.
for i := range ti {
tn := ti[i]
// Since our tableIndices are sorted by next, we can break if the there
// is no overlap. The first value of a next range can always be merged
// into the current one, so we can break in case of equality as well.
if rune(r0.Hi) <= tn.next {
break
}
rn := tn.t.R16[tn.p]
rn.Lo = uint16(tn.next)
// Limit r0.Hi based on next ranges in list, but allow it to overlap
// with ranges as long as it subsumes it.
m := (rn.Lo - r0.Lo) % r0.Stride
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
// Overlap, take the min of the two Hi values: for simplicity's sake
// we only process one range at a time.
if r0.Hi > rn.Hi {
r0.Hi = rn.Hi
}
} else {
// Not a compatible stride. Set to the last possible value before
// rn.Lo, but ensure there is at least one value.
if x := rn.Lo - m; r0.Lo <= x {
r0.Hi = x
}
break
}
}
// Update the next values for each table.
for i := range ti {
tn := &ti[i]
if rune(r0.Hi) < tn.next {
break
}
rn := tn.t.R16[tn.p]
stride := rune(rn.Stride)
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
if rune(rn.Hi) < tn.next {
if tn.p++; int(tn.p) == len(tn.t.R16) {
tn.next = atEnd
} else {
tn.next = rune(tn.t.R16[tn.p].Lo)
}
}
}
if r0.Lo == r0.Hi {
r0.Stride = 1
}
return r0
}
// next32 finds the ranged to be added to the table. If ranges overlap between
// multiple tables it clips the result to a non-overlapping range if the
// elements are not fully subsumed. It returns a zero range if there are no more
// ranges.
func (ti tablesIter) next32() unicode.Range32 {
sortIter(ti)
t0 := ti[0]
if t0.next == atEnd {
return unicode.Range32{}
}
r0 := t0.t.R32[t0.p]
r0.Lo = uint32(t0.next)
// We restrict the Hi of the current range if it overlaps with another range.
for i := range ti {
tn := ti[i]
// Since our tableIndices are sorted by next, we can break if the there
// is no overlap. The first value of a next range can always be merged
// into the current one, so we can break in case of equality as well.
if rune(r0.Hi) <= tn.next {
break
}
rn := tn.t.R32[tn.p]
rn.Lo = uint32(tn.next)
// Limit r0.Hi based on next ranges in list, but allow it to overlap
// with ranges as long as it subsumes it.
m := (rn.Lo - r0.Lo) % r0.Stride
if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) {
// Overlap, take the min of the two Hi values: for simplicity's sake
// we only process one range at a time.
if r0.Hi > rn.Hi {
r0.Hi = rn.Hi
}
} else {
// Not a compatible stride. Set to the last possible value before
// rn.Lo, but ensure there is at least one value.
if x := rn.Lo - m; r0.Lo <= x {
r0.Hi = x
}
break
}
}
// Update the next values for each table.
for i := range ti {
tn := &ti[i]
if rune(r0.Hi) < tn.next {
break
}
rn := tn.t.R32[tn.p]
stride := rune(rn.Stride)
tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride))
if rune(rn.Hi) < tn.next {
if tn.p++; int(tn.p) == len(tn.t.R32) {
tn.next = atEnd
} else {
tn.next = rune(tn.t.R32[tn.p].Lo)
}
}
}
if r0.Lo == r0.Hi {
r0.Stride = 1
}
return r0
}

View file

@ -1,184 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rangetable
import (
"testing"
"unicode"
)
var (
maxRuneTable = &unicode.RangeTable{
R32: []unicode.Range32{
{unicode.MaxRune, unicode.MaxRune, 1},
},
}
overlap1 = &unicode.RangeTable{
R16: []unicode.Range16{
{0x100, 0xfffc, 4},
},
R32: []unicode.Range32{
{0x100000, 0x10fffc, 4},
},
}
overlap2 = &unicode.RangeTable{
R16: []unicode.Range16{
{0x101, 0xfffd, 4},
},
R32: []unicode.Range32{
{0x100001, 0x10fffd, 3},
},
}
// The following table should be compacted into two entries for R16 and R32.
optimize = &unicode.RangeTable{
R16: []unicode.Range16{
{0x1, 0x1, 1},
{0x2, 0x2, 1},
{0x3, 0x3, 1},
{0x5, 0x5, 1},
{0x7, 0x7, 1},
{0x9, 0x9, 1},
{0xb, 0xf, 2},
},
R32: []unicode.Range32{
{0x10001, 0x10001, 1},
{0x10002, 0x10002, 1},
{0x10003, 0x10003, 1},
{0x10005, 0x10005, 1},
{0x10007, 0x10007, 1},
{0x10009, 0x10009, 1},
{0x1000b, 0x1000f, 2},
},
}
)
func TestMerge(t *testing.T) {
for i, tt := range [][]*unicode.RangeTable{
{unicode.Cc, unicode.Cf},
{unicode.L, unicode.Ll},
{unicode.L, unicode.Ll, unicode.Lu},
{unicode.Ll, unicode.Lu},
{unicode.M},
unicode.GraphicRanges,
cased,
// Merge R16 only and R32 only and vice versa.
{unicode.Khmer, unicode.Khudawadi},
{unicode.Imperial_Aramaic, unicode.Radical},
// Merge with empty.
{&unicode.RangeTable{}},
{&unicode.RangeTable{}, &unicode.RangeTable{}},
{&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
{&unicode.RangeTable{}, unicode.Hiragana},
{unicode.Inherited, &unicode.RangeTable{}},
{&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
// Hypothetical tables.
{maxRuneTable},
{overlap1, overlap2},
// Optimization
{optimize},
} {
rt := Merge(tt...)
for r := rune(0); r <= unicode.MaxRune; r++ {
if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
}
}
// Test optimization and correctness for R16.
for k := 0; k < len(rt.R16)-1; k++ {
if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
}
if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
}
if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
i, k, rt.R16[k], rt.R16[k+1])
}
}
// Test optimization and correctness for R32.
for k := 0; k < len(rt.R32)-1; k++ {
if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
}
if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
}
if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
i, k, rt.R32[k], rt.R32[k+1])
}
}
}
}
const runes = "Hello World in 2015!,\U0010fffd"
func BenchmarkNotMerged(t *testing.B) {
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.In(r, unicode.GraphicRanges...)
}
}
}
func BenchmarkMerged(t *testing.B) {
rt := Merge(unicode.GraphicRanges...)
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.Is(rt, r)
}
}
}
var cased = []*unicode.RangeTable{
unicode.Lower,
unicode.Upper,
unicode.Title,
unicode.Other_Lowercase,
unicode.Other_Uppercase,
}
func BenchmarkNotMergedCased(t *testing.B) {
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.In(r, cased...)
}
}
}
func BenchmarkMergedCased(t *testing.B) {
// This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
// Unicode 7.0.0.
rt := Merge(cased...)
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.Is(rt, r)
}
}
}
func BenchmarkInit(t *testing.B) {
for i := 0; i < t.N; i++ {
Merge(cased...)
Merge(unicode.GraphicRanges...)
}
}
func BenchmarkInit2(t *testing.B) {
// Hypothetical near-worst-case performance.
for i := 0; i < t.N; i++ {
Merge(overlap1, overlap2)
}
}

View file

@ -1,70 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package rangetable provides utilities for creating and inspecting
// unicode.RangeTables.
package rangetable
import (
"sort"
"unicode"
)
// New creates a RangeTable from the given runes, which may contain duplicates.
func New(r ...rune) *unicode.RangeTable {
if len(r) == 0 {
return &unicode.RangeTable{}
}
sort.Sort(byRune(r))
// Remove duplicates.
k := 1
for i := 1; i < len(r); i++ {
if r[k-1] != r[i] {
r[k] = r[i]
k++
}
}
var rt unicode.RangeTable
for _, r := range r[:k] {
if r <= 0xFFFF {
rt.R16 = append(rt.R16, unicode.Range16{Lo: uint16(r), Hi: uint16(r), Stride: 1})
} else {
rt.R32 = append(rt.R32, unicode.Range32{Lo: uint32(r), Hi: uint32(r), Stride: 1})
}
}
// Optimize RangeTable.
return Merge(&rt)
}
type byRune []rune
func (r byRune) Len() int { return len(r) }
func (r byRune) Swap(i, j int) { r[i], r[j] = r[j], r[i] }
func (r byRune) Less(i, j int) bool { return r[i] < r[j] }
// Visit visits all runes in the given RangeTable in order, calling fn for each.
func Visit(rt *unicode.RangeTable, fn func(rune)) {
for _, r16 := range rt.R16 {
for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
fn(r)
}
}
for _, r32 := range rt.R32 {
for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
fn(r)
}
}
}
// Assigned returns a RangeTable with all assigned code points for a given
// Unicode version. This includes graphic, format, control, and private-use
// characters. It returns nil if the data for the given version is not
// available.
func Assigned(version string) *unicode.RangeTable {
return assigned[version]
}

View file

@ -1,55 +0,0 @@
package rangetable
import (
"reflect"
"testing"
"unicode"
)
var (
empty = &unicode.RangeTable{}
many = &unicode.RangeTable{
R16: []unicode.Range16{{0, 0xffff, 5}},
R32: []unicode.Range32{{0x10004, 0x10009, 5}},
LatinOffset: 0,
}
)
func TestVisit(t *testing.T) {
Visit(empty, func(got rune) {
t.Error("call from empty RangeTable")
})
var want rune
Visit(many, func(got rune) {
if got != want {
t.Errorf("got %U; want %U", got, want)
}
want += 5
})
if want -= 5; want != 0x10009 {
t.Errorf("last run was %U; want U+10009", want)
}
}
func TestNew(t *testing.T) {
for i, rt := range []*unicode.RangeTable{
empty,
unicode.Co,
unicode.Letter,
unicode.ASCII_Hex_Digit,
many,
maxRuneTable,
} {
var got, want []rune
Visit(rt, func(r rune) {
want = append(want, r)
})
Visit(New(want...), func(r rune) {
got = append(got, r)
})
if !reflect.DeepEqual(got, want) {
t.Errorf("%d:\ngot %v;\nwant %v", i, got, want)
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,59 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package runenames
// This file contains code common to gen.go and the package code.
// The mapping from rune to string (i.e. offset and length in the data string)
// is encoded as a two level table. The first level maps from contiguous rune
// ranges [runeOffset, runeOffset+runeLength) to entries. Entries are either
// direct (for repeated names such as "<CJK Ideograph>") or indirect (for runs
// of unique names such as "SPACE", "EXCLAMATION MARK", "QUOTATION MARK", ...).
//
// Each first level table element is 64 bits. The runeOffset (21 bits) and
// runeLength (16 bits) take the 37 high bits. The entry takes the 27 low bits,
// with directness encoded in the least significant bit.
//
// A direct entry encodes a dataOffset (18 bits) and dataLength (8 bits) in the
// data string. 18 bits is too short to encode the entire data string's length,
// but the data string's contents are arranged so that all of the few direct
// entries' offsets come before all of the many indirect entries' offsets.
//
// An indirect entry encodes a dataBase (10 bits) and a table1Offset (16 bits).
// The table1Offset is the start of a range in the second level table. The
// length of that range is the same as the runeLength.
//
// Each second level table element is 16 bits, an index into data, relative to
// a bias equal to (dataBase << dataBaseUnit). That (bias + index) is the
// (dataOffset + dataLength) in the data string. The dataOffset is implied by
// the previous table element (with the same implicit bias).
const (
bitsRuneOffset = 21
bitsRuneLength = 16
bitsDataOffset = 18
bitsDataLength = 8
bitsDirect = 1
bitsDataBase = 10
bitsTable1Offset = 16
shiftRuneOffset = 0 + bitsDirect + bitsDataLength + bitsDataOffset + bitsRuneLength
shiftRuneLength = 0 + bitsDirect + bitsDataLength + bitsDataOffset
shiftDataOffset = 0 + bitsDirect + bitsDataLength
shiftDataLength = 0 + bitsDirect
shiftDirect = 0
shiftDataBase = 0 + bitsDirect + bitsTable1Offset
shiftTable1Offset = 0 + bitsDirect
maskRuneLength = 1<<bitsRuneLength - 1
maskDataOffset = 1<<bitsDataOffset - 1
maskDataLength = 1<<bitsDataLength - 1
maskDirect = 1<<bitsDirect - 1
maskDataBase = 1<<bitsDataBase - 1
maskTable1Offset = 1<<bitsTable1Offset - 1
dataBaseUnit = 10
)

View file

@ -1,118 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runenames_test
import (
"fmt"
"golang.org/x/text/unicode/runenames"
)
func Example() {
runes := []rune{
-1,
'\U00000000',
'\U0000001f',
'\U00000020',
'\U00000021',
'\U00000041',
'\U0000007e',
'\U0000007f',
'\U00000080',
'\U000000e0',
'\U0000037f',
'\U00000380',
'\U00000381',
'\U00000382',
'\U00000383',
'\U00000384',
'\U00000385',
'\U00000386',
'\U000007c0',
'\U00002603',
'\U000033ff',
'\U00003400',
'\U00003401',
'\U00003402',
'\U00004dc0',
'\U00009fd5',
'\U00009fd6',
'\U00009fff',
'\U0000a000',
0xdc00, // '\U0000dc00' (Low Surrogate) is an invalid Go literal.
'\U0000f800',
'\U0000fffc',
'\U0000fffd',
'\U0000fffe',
'\U0000ffff',
'\U00010000',
'\U0001f574',
'\U0002fa1d',
'\U0002fa1e',
'\U000e0100',
'\U000e01ef',
'\U000e01f0',
'\U00100000',
'\U0010fffd',
'\U0010fffe',
'\U0010ffff',
}
for _, r := range runes {
fmt.Printf("%08x %q\n", r, runenames.Name(r))
}
// Output:
// -0000001 ""
// 00000000 "<control>"
// 0000001f "<control>"
// 00000020 "SPACE"
// 00000021 "EXCLAMATION MARK"
// 00000041 "LATIN CAPITAL LETTER A"
// 0000007e "TILDE"
// 0000007f "<control>"
// 00000080 "<control>"
// 000000e0 "LATIN SMALL LETTER A WITH GRAVE"
// 0000037f "GREEK CAPITAL LETTER YOT"
// 00000380 ""
// 00000381 ""
// 00000382 ""
// 00000383 ""
// 00000384 "GREEK TONOS"
// 00000385 "GREEK DIALYTIKA TONOS"
// 00000386 "GREEK CAPITAL LETTER ALPHA WITH TONOS"
// 000007c0 "NKO DIGIT ZERO"
// 00002603 "SNOWMAN"
// 000033ff "SQUARE GAL"
// 00003400 "<CJK Ideograph Extension A>"
// 00003401 "<CJK Ideograph Extension A>"
// 00003402 "<CJK Ideograph Extension A>"
// 00004dc0 "HEXAGRAM FOR THE CREATIVE HEAVEN"
// 00009fd5 "<CJK Ideograph>"
// 00009fd6 ""
// 00009fff ""
// 0000a000 "YI SYLLABLE IT"
// 0000dc00 "<Low Surrogate>"
// 0000f800 "<Private Use>"
// 0000fffc "OBJECT REPLACEMENT CHARACTER"
// 0000fffd "REPLACEMENT CHARACTER"
// 0000fffe ""
// 0000ffff ""
// 00010000 "LINEAR B SYLLABLE B008 A"
// 0001f574 "MAN IN BUSINESS SUIT LEVITATING"
// 0002fa1d "CJK COMPATIBILITY IDEOGRAPH-2FA1D"
// 0002fa1e ""
// 000e0100 "VARIATION SELECTOR-17"
// 000e01ef "VARIATION SELECTOR-256"
// 000e01f0 ""
// 00100000 "<Plane 16 Private Use>"
// 0010fffd "<Plane 16 Private Use>"
// 0010fffe ""
// 0010ffff ""
}

View file

@ -1,195 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"log"
"strings"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
)
// snippet is a slice of data; data is the concatenation of all of the names.
type snippet struct {
offset int
length int
s string
}
func makeTable0EntryDirect(rOffset, rLength, dOffset, dLength int) uint64 {
if rOffset >= 1<<bitsRuneOffset {
log.Fatalf("makeTable0EntryDirect: rOffset %d is too large", rOffset)
}
if rLength >= 1<<bitsRuneLength {
log.Fatalf("makeTable0EntryDirect: rLength %d is too large", rLength)
}
if dOffset >= 1<<bitsDataOffset {
log.Fatalf("makeTable0EntryDirect: dOffset %d is too large", dOffset)
}
if dLength >= 1<<bitsRuneLength {
log.Fatalf("makeTable0EntryDirect: dLength %d is too large", dLength)
}
return uint64(rOffset)<<shiftRuneOffset |
uint64(rLength)<<shiftRuneLength |
uint64(dOffset)<<shiftDataOffset |
uint64(dLength)<<shiftDataLength |
1 // Direct bit.
}
func makeTable0EntryIndirect(rOffset, rLength, dBase, t1Offset int) uint64 {
if rOffset >= 1<<bitsRuneOffset {
log.Fatalf("makeTable0EntryIndirect: rOffset %d is too large", rOffset)
}
if rLength >= 1<<bitsRuneLength {
log.Fatalf("makeTable0EntryIndirect: rLength %d is too large", rLength)
}
if dBase >= 1<<bitsDataBase {
log.Fatalf("makeTable0EntryIndirect: dBase %d is too large", dBase)
}
if t1Offset >= 1<<bitsTable1Offset {
log.Fatalf("makeTable0EntryIndirect: t1Offset %d is too large", t1Offset)
}
return uint64(rOffset)<<shiftRuneOffset |
uint64(rLength)<<shiftRuneLength |
uint64(dBase)<<shiftDataBase |
uint64(t1Offset)<<shiftTable1Offset |
0 // Direct bit.
}
func makeTable1Entry(x int) uint16 {
if x < 0 || 0xffff < x {
log.Fatalf("makeTable1Entry: entry %d is out of range", x)
}
return uint16(x)
}
var (
data []byte
snippets = make([]snippet, 1+unicode.MaxRune)
)
func main() {
gen.Init()
names, counts := parse()
appendRepeatNames(names, counts)
appendUniqueNames(names, counts)
table0, table1 := makeTables()
gen.Repackage("gen_bits.go", "bits.go", "runenames")
w := gen.NewCodeWriter()
w.WriteVar("table0", table0)
w.WriteVar("table1", table1)
w.WriteConst("data", string(data))
w.WriteGoFile("tables.go", "runenames")
}
func parse() (names []string, counts map[string]int) {
names = make([]string, 1+unicode.MaxRune)
counts = map[string]int{}
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r, s := p.Rune(0), p.String(ucd.Name)
if s == "" {
return
}
if s[0] == '<' {
const first = ", First>"
if i := strings.Index(s, first); i >= 0 {
s = s[:i] + ">"
}
}
names[r] = s
counts[s]++
})
return names, counts
}
func appendRepeatNames(names []string, counts map[string]int) {
alreadySeen := map[string]snippet{}
for r, s := range names {
if s == "" || counts[s] == 1 {
continue
}
if s[0] != '<' {
log.Fatalf("Repeated name %q does not start with a '<'", s)
}
if z, ok := alreadySeen[s]; ok {
snippets[r] = z
continue
}
z := snippet{
offset: len(data),
length: len(s),
s: s,
}
data = append(data, s...)
snippets[r] = z
alreadySeen[s] = z
}
}
func appendUniqueNames(names []string, counts map[string]int) {
for r, s := range names {
if s == "" || counts[s] != 1 {
continue
}
if s[0] == '<' {
log.Fatalf("Unique name %q starts with a '<'", s)
}
z := snippet{
offset: len(data),
length: len(s),
s: s,
}
data = append(data, s...)
snippets[r] = z
}
}
func makeTables() (table0 []uint64, table1 []uint16) {
for i := 0; i < len(snippets); {
zi := snippets[i]
if zi == (snippet{}) {
i++
continue
}
// Look for repeat names. If we have one, we only need a table0 entry.
j := i + 1
for ; j < len(snippets) && zi == snippets[j]; j++ {
}
if j > i+1 {
table0 = append(table0, makeTable0EntryDirect(i, j-i, zi.offset, zi.length))
i = j
continue
}
// Otherwise, we have a run of unique names. We need one table0 entry
// and two or more table1 entries.
base := zi.offset &^ (1<<dataBaseUnit - 1)
t1Offset := len(table1) + 1
table1 = append(table1, makeTable1Entry(zi.offset-base))
table1 = append(table1, makeTable1Entry(zi.offset+zi.length-base))
for ; j < len(snippets) && snippets[j] != (snippet{}); j++ {
zj := snippets[j]
if data[zj.offset] == '<' {
break
}
table1 = append(table1, makeTable1Entry(zj.offset+zj.length-base))
}
table0 = append(table0, makeTable0EntryIndirect(i, j-i, base>>dataBaseUnit, t1Offset))
i = j
}
return table0, table1
}

View file

@ -1,63 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains code common to gen.go and the package code.
// The mapping from rune to string (i.e. offset and length in the data string)
// is encoded as a two level table. The first level maps from contiguous rune
// ranges [runeOffset, runeOffset+runeLength) to entries. Entries are either
// direct (for repeated names such as "<CJK Ideograph>") or indirect (for runs
// of unique names such as "SPACE", "EXCLAMATION MARK", "QUOTATION MARK", ...).
//
// Each first level table element is 64 bits. The runeOffset (21 bits) and
// runeLength (16 bits) take the 37 high bits. The entry takes the 27 low bits,
// with directness encoded in the least significant bit.
//
// A direct entry encodes a dataOffset (18 bits) and dataLength (8 bits) in the
// data string. 18 bits is too short to encode the entire data string's length,
// but the data string's contents are arranged so that all of the few direct
// entries' offsets come before all of the many indirect entries' offsets.
//
// An indirect entry encodes a dataBase (10 bits) and a table1Offset (16 bits).
// The table1Offset is the start of a range in the second level table. The
// length of that range is the same as the runeLength.
//
// Each second level table element is 16 bits, an index into data, relative to
// a bias equal to (dataBase << dataBaseUnit). That (bias + index) is the
// (dataOffset + dataLength) in the data string. The dataOffset is implied by
// the previous table element (with the same implicit bias).
const (
bitsRuneOffset = 21
bitsRuneLength = 16
bitsDataOffset = 18
bitsDataLength = 8
bitsDirect = 1
bitsDataBase = 10
bitsTable1Offset = 16
shiftRuneOffset = 0 + bitsDirect + bitsDataLength + bitsDataOffset + bitsRuneLength
shiftRuneLength = 0 + bitsDirect + bitsDataLength + bitsDataOffset
shiftDataOffset = 0 + bitsDirect + bitsDataLength
shiftDataLength = 0 + bitsDirect
shiftDirect = 0
shiftDataBase = 0 + bitsDirect + bitsTable1Offset
shiftTable1Offset = 0 + bitsDirect
maskRuneLength = 1<<bitsRuneLength - 1
maskDataOffset = 1<<bitsDataOffset - 1
maskDataLength = 1<<bitsDataLength - 1
maskDirect = 1<<bitsDirect - 1
maskDataBase = 1<<bitsDataBase - 1
maskTable1Offset = 1<<bitsTable1Offset - 1
dataBaseUnit = 10
)

View file

@ -1,48 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_bits.go
// Package runenames provides rune names from the Unicode Character Database.
// For example, the name for '\u0100' is "LATIN CAPITAL LETTER A WITH MACRON".
//
// See http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
package runenames
import (
"sort"
)
// Name returns the name for r.
func Name(r rune) string {
i := sort.Search(len(table0), func(j int) bool {
e := table0[j]
rOffset := rune(e >> shiftRuneOffset)
return r < rOffset
})
if i == 0 {
return ""
}
e := table0[i-1]
rOffset := rune(e >> shiftRuneOffset)
rLength := rune(e>>shiftRuneLength) & maskRuneLength
if r >= rOffset+rLength {
return ""
}
if (e>>shiftDirect)&maskDirect != 0 {
o := int(e>>shiftDataOffset) & maskDataOffset
n := int(e>>shiftDataLength) & maskDataLength
return data[o : o+n]
}
base := uint32(e>>shiftDataBase) & maskDataBase
base <<= dataBaseUnit
j := rune(e>>shiftTable1Offset) & maskTable1Offset
j += r - rOffset
d0 := base + uint32(table1[j-1]) // dataOffset
d1 := base + uint32(table1[j-0]) // dataOffset + dataLength
return data[d0:d1]
}

View file

@ -1,46 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runenames
import (
"strings"
"testing"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
func TestName(t *testing.T) {
testtext.SkipIfNotLong(t)
wants := make([]string, 1+unicode.MaxRune)
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r, s := p.Rune(0), p.String(ucd.Name)
if s == "" {
return
}
if s[0] == '<' {
const first = ", First>"
if i := strings.Index(s, first); i >= 0 {
s = s[:i] + ">"
}
}
wants[r] = s
})
nErrors := 0
for r, want := range wants {
got := Name(rune(r))
if got != want {
t.Errorf("r=%#08x: got %q, want %q", r, got, want)
nErrors++
if nErrors == 100 {
t.Fatal("too many errors")
}
}
}
}

File diff suppressed because it is too large Load diff