dep prune

This commit is contained in:
dhax 2017-11-15 18:48:32 +01:00
parent 65441fa5b3
commit 3a2d24baca
1564 changed files with 0 additions and 638818 deletions

View file

@ -1,366 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package catmsg contains support types for package x/text/message/catalog.
//
// This package contains the low-level implementations of Message used by the
// catalog package and provides primitives for other packages to implement their
// own. For instance, the plural package provides functionality for selecting
// translation strings based on the plural category of substitution arguments.
//
//
// Encoding and Decoding
//
// Catalogs store Messages encoded as a single string. Compiling a message into
// a string both results in compacter representation and speeds up evaluation.
//
// A Message must implement a Compile method to convert its arbitrary
// representation to a string. The Compile method takes an Encoder which
// facilitates serializing the message. Encoders also provide more context of
// the messages's creation (such as for which language the message is intended),
// which may not be known at the time of the creation of the message.
//
// Each message type must also have an accompanying decoder registered to decode
// the message. This decoder takes a Decoder argument which provides the
// counterparts for the decoding.
//
//
// Renderers
//
// A Decoder must be initialized with a Renderer implementation. These
// implementations must be provided by packages that use Catalogs, typically
// formatting packages such as x/text/message. A typical user will not need to
// worry about this type; it is only relevant to packages that do string
// formatting and want to use the catalog package to handle localized strings.
//
// A package that uses catalogs for selecting strings receives selection results
// as sequence of substrings passed to the Renderer. The following snippet shows
// how to express the above example using the message package.
//
// message.Set(language.English, "You are %d minute(s) late.",
// catalog.Var("minutes", plural.Select(1, "one", "minute")),
// catalog.String("You are %[1]d ${minutes} late."))
//
// p := message.NewPrinter(language.English)
// p.Printf("You are %d minute(s) late.", 5) // always 5 minutes late.
//
// To evaluate the Printf, package message wraps the arguments in a Renderer
// that is passed to the catalog for message decoding. The call sequence that
// results from evaluating the above message, assuming the person is rather
// tardy, is:
//
// Render("You are %[1]d ")
// Arg(1)
// Render("minutes")
// Render(" late.")
//
// The calls to Arg is caused by the plural.Select execution, which evaluates
// the argument to determine whether the singular or plural message form should
// be selected. The calls to Render reports the partial results to the message
// package for further evaluation.
package catmsg
import (
"errors"
"fmt"
"strconv"
"strings"
"sync"
"golang.org/x/text/language"
)
// A Handle refers to a registered message type.
type Handle int
// First is used as a Handle to EncodeMessageType, followed by a series of calls
// to EncodeMessage, to implement selecting the first matching Message.
//
// TODO: this can be removed once we either can use type aliases or if the
// internals of this package are merged with the catalog package.
var First Handle = msgFirst
// A Handler decodes and evaluates data compiled by a Message and sends the
// result to the Decoder. The output may depend on the value of the substitution
// arguments, accessible by the Decoder's Arg method. The Handler returns false
// if there is no translation for the given substitution arguments.
type Handler func(d *Decoder) bool
// Register records the existence of a message type and returns a Handle that
// can be used in the Encoder's EncodeMessageType method to create such
// messages. The prefix of the name should be the package path followed by
// an optional disambiguating string.
// Register will panic if a handle for the same name was already registered.
func Register(name string, handler Handler) Handle {
mutex.Lock()
defer mutex.Unlock()
if _, ok := names[name]; ok {
panic(fmt.Errorf("catmsg: handler for %q already exists", name))
}
h := Handle(len(handlers))
names[name] = h
handlers = append(handlers, handler)
return h
}
// These handlers require fixed positions in the handlers slice.
const (
msgVars Handle = iota
msgFirst
msgRaw
msgString
numFixed
)
const prefix = "golang.org/x/text/internal/catmsg."
var (
mutex sync.Mutex
names = map[string]Handle{
prefix + "Vars": msgVars,
prefix + "First": msgFirst,
prefix + "Raw": msgRaw,
prefix + "String": msgString,
}
handlers = make([]Handler, numFixed)
)
func init() {
// This handler is a message type wrapper that initializes a decoder
// with a variable block. This message type, if present, is always at the
// start of an encoded message.
handlers[msgVars] = func(d *Decoder) bool {
blockSize := int(d.DecodeUint())
d.vars = d.data[:blockSize]
d.data = d.data[blockSize:]
return d.executeMessage()
}
// First takes the first message in a sequence that results in a match for
// the given substitution arguments.
handlers[msgFirst] = func(d *Decoder) bool {
for !d.Done() {
if d.ExecuteMessage() {
return true
}
}
return false
}
handlers[msgRaw] = func(d *Decoder) bool {
d.Render(d.data)
return true
}
// A String message alternates between a string constant and a variable
// substitution.
handlers[msgString] = func(d *Decoder) bool {
for !d.Done() {
if str := d.DecodeString(); str != "" {
d.Render(str)
}
if d.Done() {
break
}
d.ExecuteSubstitution()
}
return true
}
}
var (
// ErrIncomplete indicates a compiled message does not define translations
// for all possible argument values. If this message is returned, evaluating
// a message may result in the ErrNoMatch error.
ErrIncomplete = errors.New("catmsg: incomplete message; may not give result for all inputs")
// ErrNoMatch indicates no translation message matched the given input
// parameters when evaluating a message.
ErrNoMatch = errors.New("catmsg: no translation for inputs")
)
// A Message holds a collection of translations for the same phrase that may
// vary based on the values of substitution arguments.
type Message interface {
// Compile encodes the format string(s) of the message as a string for later
// evaluation.
//
// The first call Compile makes on the encoder must be EncodeMessageType.
// The handle passed to this call may either be a handle returned by
// Register to encode a single custom message, or HandleFirst followed by
// a sequence of calls to EncodeMessage.
//
// Compile must return ErrIncomplete if it is possible for evaluation to
// not match any translation for a given set of formatting parameters.
// For example, selecting a translation based on plural form may not yield
// a match if the form "Other" is not one of the selectors.
//
// Compile may return any other application-specific error. For backwards
// compatibility with package like fmt, which often do not do sanity
// checking of format strings ahead of time, Compile should still make an
// effort to have some sensible fallback in case of an error.
Compile(e *Encoder) error
}
// Compile converts a Message to a data string that can be stored in a Catalog.
// The resulting string can subsequently be decoded by passing to the Execute
// method of a Decoder.
func Compile(tag language.Tag, macros Dictionary, m Message) (data string, err error) {
// TODO: pass macros so they can be used for validation.
v := &Encoder{inBody: true} // encoder for variables
v.root = v
e := &Encoder{root: v, parent: v, tag: tag} // encoder for messages
err = m.Compile(e)
// This package serves te message package, which in turn is meant to be a
// drop-in replacement for fmt. With the fmt package, format strings are
// evaluated lazily and errors are handled by substituting strings in the
// result, rather then returning an error. Dealing with multiple languages
// makes it more important to check errors ahead of time. We chose to be
// consistent and compatible and allow graceful degradation in case of
// errors.
buf := e.buf[stripPrefix(e.buf):]
if len(v.buf) > 0 {
// Prepend variable block.
b := make([]byte, 1+maxVarintBytes+len(v.buf)+len(buf))
b[0] = byte(msgVars)
b = b[:1+encodeUint(b[1:], uint64(len(v.buf)))]
b = append(b, v.buf...)
b = append(b, buf...)
buf = b
}
if err == nil {
err = v.err
}
return string(buf), err
}
// Var defines a message that can be substituted for a placeholder of the same
// name. If an expression does not result in a string after evaluation, Name is
// used as the substitution. For example:
// Var{
// Name: "minutes",
// Message: plural.Select(1, "one", "minute"),
// }
// will resolve to minute for singular and minutes for plural forms.
type Var struct {
Name string
Message Message
}
var errIsVar = errors.New("catmsg: variable used as message")
// Compile implements Message.
//
// Note that this method merely registers a variable; it does not create an
// encoded message.
func (v *Var) Compile(e *Encoder) error {
if err := e.addVar(v.Name, v.Message); err != nil {
return err
}
// Using a Var by itself is an error. If it is in a sequence followed by
// other messages referring to it, this error will be ignored.
return errIsVar
}
// Raw is a message consisting of a single format string that is passed as is
// to the Renderer.
//
// Note that a Renderer may still do its own variable substitution.
type Raw string
// Compile implements Message.
func (r Raw) Compile(e *Encoder) (err error) {
e.EncodeMessageType(msgRaw)
// Special case: raw strings don't have a size encoding and so don't use
// EncodeString.
e.buf = append(e.buf, r...)
return nil
}
// String is a message consisting of a single format string which contains
// placeholders that may be substituted with variables.
//
// Variable substitutions are marked with placeholders and a variable name of
// the form ${name}. Any other substitutions such as Go templates or
// printf-style substitutions are left to be done by the Renderer.
//
// When evaluation a string interpolation, a Renderer will receive separate
// calls for each placeholder and interstitial string. For example, for the
// message: "%[1]v ${invites} %[2]v to ${their} party." The sequence of calls
// is:
// d.Render("%[1]v ")
// d.Arg(1)
// d.Render(resultOfInvites)
// d.Render(" %[2]v to ")
// d.Arg(2)
// d.Render(resultOfTheir)
// d.Render(" party.")
// where the messages for "invites" and "their" both use a plural.Select
// referring to the first argument.
//
// Strings may also invoke macros. Macros are essentially variables that can be
// reused. Macros may, for instance, be used to make selections between
// different conjugations of a verb. See the catalog package description for an
// overview of macros.
type String string
// Compile implements Message. It parses the placeholder formats and returns
// any error.
func (s String) Compile(e *Encoder) (err error) {
msg := string(s)
const subStart = "${"
hasHeader := false
p := 0
b := []byte{}
for {
i := strings.Index(msg[p:], subStart)
if i == -1 {
break
}
b = append(b, msg[p:p+i]...)
p += i + len(subStart)
if i = strings.IndexByte(msg[p:], '}'); i == -1 {
b = append(b, "$!(MISSINGBRACE)"...)
err = fmt.Errorf("catmsg: missing '}'")
p = len(msg)
break
}
name := strings.TrimSpace(msg[p : p+i])
if q := strings.IndexByte(name, '('); q == -1 {
if !hasHeader {
hasHeader = true
e.EncodeMessageType(msgString)
}
e.EncodeString(string(b))
e.EncodeSubstitution(name)
b = b[:0]
} else if j := strings.IndexByte(name[q:], ')'); j == -1 {
// TODO: what should the error be?
b = append(b, "$!(MISSINGPAREN)"...)
err = fmt.Errorf("catmsg: missing ')'")
} else if x, sErr := strconv.ParseUint(strings.TrimSpace(name[q+1:q+j]), 10, 32); sErr != nil {
// TODO: handle more than one argument
b = append(b, "$!(BADNUM)"...)
err = fmt.Errorf("catmsg: invalid number %q", strings.TrimSpace(name[q+1:q+j]))
} else {
if !hasHeader {
hasHeader = true
e.EncodeMessageType(msgString)
}
e.EncodeString(string(b))
e.EncodeSubstitution(name[:q], int(x))
b = b[:0]
}
p += i + 1
}
b = append(b, msg[p:]...)
if !hasHeader {
// Simplify string to a raw string.
Raw(string(b)).Compile(e)
} else if len(b) > 0 {
e.EncodeString(string(b))
}
return err
}

View file

@ -1,316 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
import (
"errors"
"strings"
"testing"
"golang.org/x/text/language"
)
type renderer struct {
args []int
result string
}
func (r *renderer) Arg(i int) interface{} {
if i >= len(r.args) {
return nil
}
return r.args[i]
}
func (r *renderer) Render(s string) {
if r.result != "" {
r.result += "|"
}
r.result += s
}
func TestCodec(t *testing.T) {
type test struct {
args []int
out string
decErr string
}
single := func(out, err string) []test { return []test{{out: out, decErr: err}} }
testCases := []struct {
desc string
m Message
enc string
encErr string
tests []test
}{{
desc: "unused variable",
m: &Var{"name", String("foo")},
encErr: errIsVar.Error(),
tests: single("", ""),
}, {
desc: "empty",
m: empty{},
tests: single("", ""),
}, {
desc: "sequence with empty",
m: seq{empty{}},
tests: single("", ""),
}, {
desc: "raw string",
m: Raw("foo"),
tests: single("foo", ""),
}, {
desc: "raw string no sub",
m: Raw("${foo}"),
enc: "\x02${foo}",
tests: single("${foo}", ""),
}, {
desc: "simple string",
m: String("foo"),
tests: single("foo", ""),
}, {
desc: "missing var",
m: String("foo${bar}"),
enc: "\x03\x03foo\x02\x03bar",
encErr: `unknown var "bar"`,
tests: single("foo|bar", ""),
}, {
desc: "empty var",
m: seq{
&Var{"bar", seq{}},
String("foo${bar}"),
},
enc: "\x00\x05\x04\x02bar\x03\x03foo\x00\x00",
// TODO: recognize that it is cheaper to substitute bar.
tests: single("foo|bar", ""),
}, {
desc: "var after value",
m: seq{
String("foo${bar}"),
&Var{"bar", String("baz")},
},
encErr: errIsVar.Error(),
tests: single("foo|bar", ""),
}, {
desc: "substitution",
m: seq{
&Var{"bar", String("baz")},
String("foo${bar}"),
},
tests: single("foo|baz", ""),
}, {
desc: "shadowed variable",
m: seq{
&Var{"bar", String("baz")},
seq{
&Var{"bar", String("BAZ")},
String("foo${bar}"),
},
},
tests: single("foo|BAZ", ""),
}, {
desc: "nested value",
m: nestedLang{nestedLang{empty{}}},
tests: single("nl|nl", ""),
}, {
desc: "not shadowed variable",
m: seq{
&Var{"bar", String("baz")},
seq{
String("foo${bar}"),
&Var{"bar", String("BAZ")},
},
},
encErr: errIsVar.Error(),
tests: single("foo|baz", ""),
}, {
desc: "duplicate variable",
m: seq{
&Var{"bar", String("baz")},
&Var{"bar", String("BAZ")},
String("${bar}"),
},
encErr: "catmsg: duplicate variable \"bar\"",
tests: single("baz", ""),
}, {
desc: "complete incomplete variable",
m: seq{
&Var{"bar", incomplete{}},
String("${bar}"),
},
enc: "\x00\t\b\x01\x01\x04\x04\x02bar\x03\x00\x00\x00",
// TODO: recognize that it is cheaper to substitute bar.
tests: single("bar", ""),
}, {
desc: "incomplete sequence",
m: seq{
incomplete{},
incomplete{},
},
encErr: ErrIncomplete.Error(),
tests: single("", ErrNoMatch.Error()),
}, {
desc: "compile error variable",
m: seq{
&Var{"bar", errorCompileMsg{}},
String("${bar}"),
},
encErr: errCompileTest.Error(),
tests: single("bar", ""),
}, {
desc: "compile error message",
m: errorCompileMsg{},
encErr: errCompileTest.Error(),
tests: single("", ""),
}, {
desc: "compile error sequence",
m: seq{
errorCompileMsg{},
errorCompileMsg{},
},
encErr: errCompileTest.Error(),
tests: single("", ""),
}, {
desc: "macro",
m: String("${exists(1)}"),
tests: single("you betya!", ""),
}, {
desc: "macro incomplete",
m: String("${incomplete(1)}"),
enc: "\x03\x00\x01\nincomplete\x01",
tests: single("incomplete", ""),
}, {
desc: "macro undefined at end",
m: String("${undefined(1)}"),
enc: "\x03\x00\x01\tundefined\x01",
tests: single("undefined", "catmsg: undefined macro \"undefined\""),
}, {
desc: "macro undefined with more text following",
m: String("${undefined(1)}."),
enc: "\x03\x00\x01\tundefined\x01\x01.",
tests: single("undefined|.", "catmsg: undefined macro \"undefined\""),
}, {
desc: "macro missing paren",
m: String("${missing(1}"),
encErr: "catmsg: missing ')'",
tests: single("$!(MISSINGPAREN)", ""),
}, {
desc: "macro bad num",
m: String("aa${bad(a)}"),
encErr: "catmsg: invalid number \"a\"",
tests: single("aa$!(BADNUM)", ""),
}, {
desc: "var missing brace",
m: String("a${missing"),
encErr: "catmsg: missing '}'",
tests: single("a$!(MISSINGBRACE)", ""),
}}
r := &renderer{}
dec := NewDecoder(language.Und, r, macros)
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
// Use a language other than Und so that we can test
// passing the language to nested values.
data, err := Compile(language.Dutch, macros, tc.m)
if failErr(err, tc.encErr) {
t.Errorf("encoding error: got %+q; want %+q", err, tc.encErr)
}
if tc.enc != "" && data != tc.enc {
t.Errorf("encoding: got %+q; want %+q", data, tc.enc)
}
for _, st := range tc.tests {
t.Run("", func(t *testing.T) {
*r = renderer{args: st.args}
if err = dec.Execute(data); failErr(err, st.decErr) {
t.Errorf("decoding error: got %+q; want %+q", err, st.decErr)
}
if r.result != st.out {
t.Errorf("decode: got %+q; want %+q", r.result, st.out)
}
})
}
})
}
}
func failErr(got error, want string) bool {
if got == nil {
return want != ""
}
return want == "" || !strings.Contains(got.Error(), want)
}
type seq []Message
func (s seq) Compile(e *Encoder) (err error) {
err = ErrIncomplete
e.EncodeMessageType(First)
for _, m := range s {
// Pass only the last error, but allow erroneous or complete messages
// here to allow testing different scenarios.
err = e.EncodeMessage(m)
}
return err
}
type empty struct{}
func (empty) Compile(e *Encoder) (err error) { return nil }
var msgIncomplete = Register(
"golang.org/x/text/internal/catmsg.incomplete",
func(d *Decoder) bool { return false })
type incomplete struct{}
func (incomplete) Compile(e *Encoder) (err error) {
e.EncodeMessageType(msgIncomplete)
return ErrIncomplete
}
var msgNested = Register(
"golang.org/x/text/internal/catmsg.nested",
func(d *Decoder) bool {
d.Render(d.DecodeString())
d.ExecuteMessage()
return true
})
type nestedLang struct{ Message }
func (n nestedLang) Compile(e *Encoder) (err error) {
e.EncodeMessageType(msgNested)
e.EncodeString(e.Language().String())
e.EncodeMessage(n.Message)
return nil
}
type errorCompileMsg struct{}
var errCompileTest = errors.New("catmsg: compile error test")
func (errorCompileMsg) Compile(e *Encoder) (err error) {
return errCompileTest
}
type dictionary struct{}
var (
macros = dictionary{}
dictMessages = map[string]string{
"exists": compile(String("you betya!")),
"incomplete": compile(incomplete{}),
}
)
func (d dictionary) Lookup(key string) (data string, ok bool) {
data, ok = dictMessages[key]
return
}
func compile(m Message) (data string) {
data, _ = Compile(language.Und, macros, m)
return data
}

View file

@ -1,407 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
import (
"errors"
"fmt"
"golang.org/x/text/language"
)
// A Renderer renders a Message.
type Renderer interface {
// Render renders the given string. The given string may be interpreted as a
// format string, such as the one used by the fmt package or a template.
Render(s string)
// Arg returns the i-th argument passed to format a message. This method
// should return nil if there is no such argument. Messages need access to
// arguments to allow selecting a message based on linguistic features of
// those arguments.
Arg(i int) interface{}
}
// A Dictionary specifies a source of messages, including variables or macros.
type Dictionary interface {
// Lookup returns the message for the given key. It returns false for ok if
// such a message could not be found.
Lookup(key string) (data string, ok bool)
// TODO: consider returning an interface, instead of a string. This will
// allow implementations to do their own message type decoding.
}
// An Encoder serializes a Message to a string.
type Encoder struct {
// The root encoder is used for storing encoded variables.
root *Encoder
// The parent encoder provides the surrounding scopes for resolving variable
// names.
parent *Encoder
tag language.Tag
// buf holds the encoded message so far. After a message completes encoding,
// the contents of buf, prefixed by the encoded length, are flushed to the
// parent buffer.
buf []byte
// vars is the lookup table of variables in the current scope.
vars []keyVal
err error
inBody bool // if false next call must be EncodeMessageType
}
type keyVal struct {
key string
offset int
}
// Language reports the language for which the encoded message will be stored
// in the Catalog.
func (e *Encoder) Language() language.Tag { return e.tag }
func (e *Encoder) setError(err error) {
if e.root.err == nil {
e.root.err = err
}
}
// EncodeUint encodes x.
func (e *Encoder) EncodeUint(x uint64) {
e.checkInBody()
var buf [maxVarintBytes]byte
n := encodeUint(buf[:], x)
e.buf = append(e.buf, buf[:n]...)
}
// EncodeString encodes s.
func (e *Encoder) EncodeString(s string) {
e.checkInBody()
e.EncodeUint(uint64(len(s)))
e.buf = append(e.buf, s...)
}
// EncodeMessageType marks the current message to be of type h.
//
// It must be the first call of a Message's Compile method.
func (e *Encoder) EncodeMessageType(h Handle) {
if e.inBody {
panic("catmsg: EncodeMessageType not the first method called")
}
e.inBody = true
e.EncodeUint(uint64(h))
}
// EncodeMessage serializes the given message inline at the current position.
func (e *Encoder) EncodeMessage(m Message) error {
e = &Encoder{root: e.root, parent: e, tag: e.tag}
err := m.Compile(e)
if _, ok := m.(*Var); !ok {
e.flushTo(e.parent)
}
return err
}
func (e *Encoder) checkInBody() {
if !e.inBody {
panic("catmsg: expected prior call to EncodeMessageType")
}
}
// stripPrefix indicates the number of prefix bytes that must be stripped to
// turn a single-element sequence into a message that is just this single member
// without its size prefix. If the message can be stripped, b[1:n] contains the
// size prefix.
func stripPrefix(b []byte) (n int) {
if len(b) > 0 && Handle(b[0]) == msgFirst {
x, n, _ := decodeUint(b[1:])
if 1+n+int(x) == len(b) {
return 1 + n
}
}
return 0
}
func (e *Encoder) flushTo(dst *Encoder) {
data := e.buf
p := stripPrefix(data)
if p > 0 {
data = data[1:]
} else {
// Prefix the size.
dst.EncodeUint(uint64(len(data)))
}
dst.buf = append(dst.buf, data...)
}
func (e *Encoder) addVar(key string, m Message) error {
for _, v := range e.parent.vars {
if v.key == key {
err := fmt.Errorf("catmsg: duplicate variable %q", key)
e.setError(err)
return err
}
}
scope := e.parent
// If a variable message is Incomplete, and does not evaluate to a message
// during execution, we fall back to the variable name. We encode this by
// appending the variable name if the message reports it's incomplete.
err := m.Compile(e)
if err != ErrIncomplete {
e.setError(err)
}
switch {
case len(e.buf) == 1 && Handle(e.buf[0]) == msgFirst: // empty sequence
e.buf = e.buf[:0]
e.inBody = false
fallthrough
case len(e.buf) == 0:
// Empty message.
if err := String(key).Compile(e); err != nil {
e.setError(err)
}
case err == ErrIncomplete:
if Handle(e.buf[0]) != msgFirst {
seq := &Encoder{root: e.root, parent: e}
seq.EncodeMessageType(First)
e.flushTo(seq)
e = seq
}
// e contains a sequence; append the fallback string.
e.EncodeMessage(String(key))
}
// Flush result to variable heap.
offset := len(e.root.buf)
e.flushTo(e.root)
e.buf = e.buf[:0]
// Record variable offset in current scope.
scope.vars = append(scope.vars, keyVal{key: key, offset: offset})
return err
}
const (
substituteVar = iota
substituteMacro
substituteError
)
// EncodeSubstitution inserts a resolved reference to a variable or macro.
//
// This call must be matched with a call to ExecuteSubstitution at decoding
// time.
func (e *Encoder) EncodeSubstitution(name string, arguments ...int) {
if arity := len(arguments); arity > 0 {
// TODO: also resolve macros.
e.EncodeUint(substituteMacro)
e.EncodeString(name)
for _, a := range arguments {
e.EncodeUint(uint64(a))
}
return
}
for scope := e; scope != nil; scope = scope.parent {
for _, v := range scope.vars {
if v.key != name {
continue
}
e.EncodeUint(substituteVar) // TODO: support arity > 0
e.EncodeUint(uint64(v.offset))
return
}
}
// TODO: refer to dictionary-wide scoped variables.
e.EncodeUint(substituteError)
e.EncodeString(name)
e.setError(fmt.Errorf("catmsg: unknown var %q", name))
}
// A Decoder deserializes and evaluates messages that are encoded by an encoder.
type Decoder struct {
tag language.Tag
dst Renderer
macros Dictionary
err error
vars string
data string
macroArg int // TODO: allow more than one argument
}
// NewDecoder returns a new Decoder.
//
// Decoders are designed to be reused for multiple invocations of Execute.
// Only one goroutine may call Execute concurrently.
func NewDecoder(tag language.Tag, r Renderer, macros Dictionary) *Decoder {
return &Decoder{
tag: tag,
dst: r,
macros: macros,
}
}
func (d *Decoder) setError(err error) {
if d.err == nil {
d.err = err
}
}
// Language returns the language in which the message is being rendered.
//
// The destination language may be a child language of the language used for
// encoding. For instance, a decoding language of "pt-PT"" is consistent with an
// encoding language of "pt".
func (d *Decoder) Language() language.Tag { return d.tag }
// Done reports whether there are more bytes to process in this message.
func (d *Decoder) Done() bool { return len(d.data) == 0 }
// Render implements Renderer.
func (d *Decoder) Render(s string) { d.dst.Render(s) }
// Arg implements Renderer.
//
// During evaluation of macros, the argument positions may be mapped to
// arguments that differ from the original call.
func (d *Decoder) Arg(i int) interface{} {
if d.macroArg != 0 {
if i != 1 {
panic("catmsg: only macros with single argument supported")
}
i = d.macroArg
}
return d.dst.Arg(i)
}
// DecodeUint decodes a number that was encoded with EncodeUint and advances the
// position.
func (d *Decoder) DecodeUint() uint64 {
x, n, err := decodeUintString(d.data)
d.data = d.data[n:]
if err != nil {
d.setError(err)
}
return x
}
// DecodeString decodes a string that was encoded with EncodeString and advances
// the position.
func (d *Decoder) DecodeString() string {
size := d.DecodeUint()
s := d.data[:size]
d.data = d.data[size:]
return s
}
// SkipMessage skips the message at the current location and advances the
// position.
func (d *Decoder) SkipMessage() {
n := int(d.DecodeUint())
d.data = d.data[n:]
}
// Execute decodes and evaluates msg.
//
// Only one goroutine may call execute.
func (d *Decoder) Execute(msg string) error {
d.err = nil
if !d.execute(msg) {
return ErrNoMatch
}
return d.err
}
func (d *Decoder) execute(msg string) bool {
saved := d.data
d.data = msg
ok := d.executeMessage()
d.data = saved
return ok
}
// executeMessageFromData is like execute, but also decodes a leading message
// size and clips the given string accordingly.
//
// It reports the number of bytes consumed and whether a message was selected.
func (d *Decoder) executeMessageFromData(s string) (n int, ok bool) {
saved := d.data
d.data = s
size := int(d.DecodeUint())
n = len(s) - len(d.data)
// Sanitize the setting. This allows skipping a size argument for
// RawString and method Done.
d.data = d.data[:size]
ok = d.executeMessage()
n += size - len(d.data)
d.data = saved
return n, ok
}
var errUnknownHandler = errors.New("catmsg: string contains unsupported handler")
// executeMessage reads the handle id, initializes the decoder and executes the
// message. It is assumed that all of d.data[d.p:] is the single message.
func (d *Decoder) executeMessage() bool {
if d.Done() {
// We interpret no data as a valid empty message.
return true
}
handle := d.DecodeUint()
var fn Handler
mutex.Lock()
if int(handle) < len(handlers) {
fn = handlers[handle]
}
mutex.Unlock()
if fn == nil {
d.setError(errUnknownHandler)
d.execute(fmt.Sprintf("\x02$!(UNKNOWNMSGHANDLER=%#x)", handle))
return true
}
return fn(d)
}
// ExecuteMessage decodes and executes the message at the current position.
func (d *Decoder) ExecuteMessage() bool {
n, ok := d.executeMessageFromData(d.data)
d.data = d.data[n:]
return ok
}
// ExecuteSubstitution executes the message corresponding to the substitution
// as encoded by EncodeSubstitution.
func (d *Decoder) ExecuteSubstitution() {
switch x := d.DecodeUint(); x {
case substituteVar:
offset := d.DecodeUint()
d.executeMessageFromData(d.vars[offset:])
case substituteMacro:
name := d.DecodeString()
data, ok := d.macros.Lookup(name)
old := d.macroArg
// TODO: support macros of arity other than 1.
d.macroArg = int(d.DecodeUint())
switch {
case !ok:
// TODO: detect this at creation time.
d.setError(fmt.Errorf("catmsg: undefined macro %q", name))
fallthrough
case !d.execute(data):
d.dst.Render(name) // fall back to macro name.
}
d.macroArg = old
case substituteError:
d.dst.Render(d.DecodeString())
default:
panic("catmsg: unreachable")
}
}

View file

@ -1,62 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
// This file implements varint encoding analogous to the one in encoding/binary.
// We need a string version of this function, so we add that here and then add
// the rest for consistency.
import "errors"
var (
errIllegalVarint = errors.New("catmsg: illegal varint")
errVarintTooLarge = errors.New("catmsg: varint too large for uint64")
)
const maxVarintBytes = 10 // maximum length of a varint
// encodeUint encodes x as a variable-sized integer into buf and returns the
// number of bytes written. buf must be at least maxVarintBytes long
func encodeUint(buf []byte, x uint64) (n int) {
for ; x > 127; n++ {
buf[n] = 0x80 | uint8(x&0x7F)
x >>= 7
}
buf[n] = uint8(x)
n++
return n
}
func decodeUintString(s string) (x uint64, size int, err error) {
i := 0
for shift := uint(0); shift < 64; shift += 7 {
if i >= len(s) {
return 0, i, errIllegalVarint
}
b := uint64(s[i])
i++
x |= (b & 0x7F) << shift
if b&0x80 == 0 {
return x, i, nil
}
}
return 0, i, errVarintTooLarge
}
func decodeUint(b []byte) (x uint64, size int, err error) {
i := 0
for shift := uint(0); shift < 64; shift += 7 {
if i >= len(b) {
return 0, i, errIllegalVarint
}
c := uint64(b[i])
i++
x |= (c & 0x7F) << shift
if c&0x80 == 0 {
return x, i, nil
}
}
return 0, i, errVarintTooLarge
}

View file

@ -1,123 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package catmsg
import (
"fmt"
"testing"
)
func TestEncodeUint(t *testing.T) {
testCases := []struct {
x uint64
enc string
}{
{0, "\x00"},
{1, "\x01"},
{2, "\x02"},
{0x7f, "\x7f"},
{0x80, "\x80\x01"},
{1 << 14, "\x80\x80\x01"},
{0xffffffff, "\xff\xff\xff\xff\x0f"},
{0xffffffffffffffff, "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01"},
}
for _, tc := range testCases {
buf := [maxVarintBytes]byte{}
got := string(buf[:encodeUint(buf[:], tc.x)])
if got != tc.enc {
t.Errorf("EncodeUint(%#x) = %q; want %q", tc.x, got, tc.enc)
}
}
}
func TestDecodeUint(t *testing.T) {
testCases := []struct {
x uint64
size int
enc string
err error
}{{
x: 0,
size: 0,
enc: "",
err: errIllegalVarint,
}, {
x: 0,
size: 1,
enc: "\x80",
err: errIllegalVarint,
}, {
x: 0,
size: 3,
enc: "\x80\x80\x80",
err: errIllegalVarint,
}, {
x: 0,
size: 1,
enc: "\x00",
}, {
x: 1,
size: 1,
enc: "\x01",
}, {
x: 2,
size: 1,
enc: "\x02",
}, {
x: 0x7f,
size: 1,
enc: "\x7f",
}, {
x: 0x80,
size: 2,
enc: "\x80\x01",
}, {
x: 1 << 14,
size: 3,
enc: "\x80\x80\x01",
}, {
x: 0xffffffff,
size: 5,
enc: "\xff\xff\xff\xff\x0f",
}, {
x: 0xffffffffffffffff,
size: 10,
enc: "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01",
}, {
x: 0xffffffffffffffff,
size: 10,
enc: "\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01\x00",
}, {
x: 0,
size: 10,
enc: "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01",
err: errVarintTooLarge,
}}
forms := []struct {
name string
decode func(s string) (x uint64, size int, err error)
}{
{"decode", func(s string) (x uint64, size int, err error) {
return decodeUint([]byte(s))
}},
{"decodeString", decodeUintString},
}
for _, f := range forms {
for _, tc := range testCases {
t.Run(fmt.Sprintf("%s:%q", f.name, tc.enc), func(t *testing.T) {
x, size, err := f.decode(tc.enc)
if err != tc.err {
t.Errorf("err = %q; want %q", err, tc.err)
}
if size != tc.size {
t.Errorf("size = %d; want %d", size, tc.size)
}
if x != tc.x {
t.Errorf("decode = %#x; want %#x", x, tc.x)
}
})
}
}
}

View file

@ -1,121 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab_test
// This file contains tests which need to import package collate, which causes
// an import cycle when done within package colltab itself.
import (
"bytes"
"testing"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
// assigned is used to only test runes that are inside the scope of the Unicode
// version used to generation the collation table.
var assigned = rangetable.Assigned(collate.UnicodeVersion)
func TestNonDigits(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Verify that all non-digit numbers sort outside of the number range.
for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ {
if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) {
continue
}
if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 {
t.Errorf("%+q non-digit number is collated as digit", a)
}
}
}
func TestNumericCompare(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Iterate over all digits.
for _, r16 := range unicode.Nd.R16 {
testDigitCompare(t, c, rune(r16.Lo), rune(r16.Hi))
}
for _, r32 := range unicode.Nd.R32 {
testDigitCompare(t, c, rune(r32.Lo), rune(r32.Hi))
}
}
func testDigitCompare(t *testing.T, c *collate.Collator, zero, nine rune) {
if !unicode.In(zero, assigned) {
return
}
n := int(nine - zero + 1)
if n%10 != 0 {
t.Fatalf("len([%+q, %+q]) = %d; want a multiple of 10", zero, nine, n)
}
for _, tt := range []struct {
prefix string
b [11]string
}{
{
prefix: "",
b: [11]string{
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
},
},
{
prefix: "1",
b: [11]string{
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
},
},
{
prefix: "0",
b: [11]string{
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
},
},
{
prefix: "00",
b: [11]string{
"000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010",
},
},
{
prefix: "9",
b: [11]string{
"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
},
},
} {
for k := 0; k <= n; k++ {
i := k % 10
a := tt.prefix + string(zero+rune(i))
for j, b := range tt.b {
want := 0
switch {
case i < j:
want = -1
case i > j:
want = 1
}
got := c.CompareString(a, b)
if got != want {
t.Errorf("Compare(%+q, %+q) = %d; want %d", a, b, got, want)
return
}
}
}
}
}
func BenchmarkNumericWeighter(b *testing.B) {
c := collate.New(language.English, collate.Numeric)
input := bytes.Repeat([]byte("Testing, testing 123..."), 100)
b.SetBytes(int64(2 * len(input)))
for i := 0; i < b.N; i++ {
c.Compare(input, input)
}
}

View file

@ -1,371 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"unicode"
)
// Level identifies the collation comparison level.
// The primary level corresponds to the basic sorting of text.
// The secondary level corresponds to accents and related linguistic elements.
// The tertiary level corresponds to casing and related concepts.
// The quaternary level is derived from the other levels by the
// various algorithms for handling variable elements.
type Level int
const (
Primary Level = iota
Secondary
Tertiary
Quaternary
Identity
NumLevels
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
MaxQuaternary = 0x1FFFFF // 21 bits.
)
// Elem is a representation of a collation element. This API provides ways to encode
// and decode Elems. Implementations of collation tables may use values greater
// or equal to PrivateUse for their own purposes. However, these should never be
// returned by AppendNext.
type Elem uint32
const (
maxCE Elem = 0xAFFFFFFF
PrivateUse = minContract
minContract = 0xC0000000
maxContract = 0xDFFFFFFF
minExpand = 0xE0000000
maxExpand = 0xEFFFFFFF
minDecomp = 0xF0000000
)
type ceType int
const (
ceNormal ceType = iota // ceNormal includes implicits (ce == 0)
ceContractionIndex // rune can be a start of a contraction
ceExpansionIndex // rune expands into a sequence of collation elements
ceDecompose // rune expands using NFKC decomposition
)
func (ce Elem) ctype() ceType {
if ce <= maxCE {
return ceNormal
}
if ce <= maxContract {
return ceContractionIndex
} else {
if ce <= maxExpand {
return ceExpansionIndex
}
return ceDecompose
}
panic("should not reach here")
return ceType(-1)
}
// For normal collation elements, we assume that a collation element either has
// a primary or non-default secondary value, not both.
// Collation elements with a primary value are of the form
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the canonical combining class
// - p* is the primary collation value
// Collation elements with a secondary value are of the form
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
// - q* quaternary value
const (
ceTypeMask = 0xC0000000
ceTypeMaskExt = 0xE0000000
ceIgnoreMask = 0xF00FFFFF
ceType1 = 0x40000000
ceType2 = 0x00000000
ceType3or4 = 0x80000000
ceType4 = 0xA0000000
ceTypeQ = 0xC0000000
Ignore = ceType4
firstNonPrimary = 0x80000000
lastSpecialPrimary = 0xA0000000
secondaryMask = 0x80000000
hasTertiaryMask = 0x40000000
primaryValueMask = 0x3FFFFE00
maxPrimaryBits = 21
compactPrimaryBits = 16
maxSecondaryBits = 12
maxTertiaryBits = 8
maxCCCBits = 8
maxSecondaryCompactBits = 8
maxSecondaryDiffBits = 4
maxTertiaryCompactBits = 5
primaryShift = 9
compactSecondaryShift = 5
minCompactSecondary = defaultSecondary - 4
)
func makeImplicitCE(primary int) Elem {
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
}
// MakeElem returns an Elem for the given values. It will return an error
// if the given combination of values is invalid.
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
if w := primary; w >= 1<<maxPrimaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
if w := secondary; w >= 1<<maxSecondaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits)
}
if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits)
}
ce := Elem(0)
if primary != 0 {
if ccc != 0 {
if primary >= 1<<compactPrimaryBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits)
}
if secondary != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc)
}
ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits))
ce |= Elem(ccc) << compactPrimaryBits
ce |= Elem(primary)
ce |= ceType3or4
} else if tertiary == defaultTertiary {
if secondary >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits)
}
ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary)
ce |= ceType1
} else {
d := secondary - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
if tertiary >= 1<<maxTertiaryCompactBits {
return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits)
}
ce = Elem(primary<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(tertiary)
}
} else {
ce = Elem(secondary<<maxTertiaryBits + tertiary)
ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= ceType4
}
return ce, nil
}
// MakeQuaternary returns an Elem with the given quaternary value.
func MakeQuaternary(v int) Elem {
return ceTypeQ | Elem(v<<primaryShift)
}
// Mask sets weights for any level smaller than l to 0.
// The resulting Elem can be used to test for equality with
// other Elems to which the same mask has been applied.
func (ce Elem) Mask(l Level) uint32 {
return 0
}
// CCC returns the canonical combining class associated with the underlying character,
// if applicable, or 0 otherwise.
func (ce Elem) CCC() uint8 {
if ce&ceType3or4 != 0 {
if ce&ceType4 == ceType3or4 {
return uint8(ce >> 16)
}
return uint8(ce >> 20)
}
return 0
}
// Primary returns the primary collation weight for ce.
func (ce Elem) Primary() int {
if ce >= firstNonPrimary {
if ce > lastSpecialPrimary {
return 0
}
return int(uint16(ce))
}
return int(ce&primaryValueMask) >> primaryShift
}
// Secondary returns the secondary collation weight for ce.
func (ce Elem) Secondary() int {
switch ce & ceTypeMask {
case ceType1:
return int(uint8(ce))
case ceType2:
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
case ceType3or4:
if ce < ceType4 {
return defaultSecondary
}
return int(ce>>8) & 0xFFF
case ceTypeQ:
return 0
}
panic("should not reach here")
}
// Tertiary returns the tertiary collation weight for ce.
func (ce Elem) Tertiary() uint8 {
if ce&hasTertiaryMask == 0 {
if ce&ceType3or4 == 0 {
return uint8(ce & 0x1F)
}
if ce&ceType4 == ceType4 {
return uint8(ce)
}
return uint8(ce>>24) & 0x1F // type 2
} else if ce&ceTypeMask == ceType1 {
return defaultTertiary
}
// ce is a quaternary value.
return 0
}
func (ce Elem) updateTertiary(t uint8) Elem {
if ce&ceTypeMask == ceType1 {
// convert to type 4
nce := ce & primaryValueMask
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
ce = nce
} else if ce&ceTypeMaskExt == ceType3or4 {
ce &= ^Elem(maxTertiary << 24)
return ce | (Elem(t) << 24)
} else {
// type 2 or 4
ce &= ^Elem(maxTertiary)
}
return ce | Elem(t)
}
// Quaternary returns the quaternary value if explicitly specified,
// 0 if ce == Ignore, or MaxQuaternary otherwise.
// Quaternary values are used only for shifted variants.
func (ce Elem) Quaternary() int {
if ce&ceTypeMask == ceTypeQ {
return int(ce&primaryValueMask) >> primaryShift
} else if ce&ceIgnoreMask == Ignore {
return 0
}
return MaxQuaternary
}
// Weight returns the collation weight for the given level.
func (ce Elem) Weight(l Level) int {
switch l {
case Primary:
return ce.Primary()
case Secondary:
return ce.Secondary()
case Tertiary:
return int(ce.Tertiary())
case Quaternary:
return ce.Quaternary()
}
return 0 // return 0 (ignore) for undefined levels.
}
// For contractions, collation elements are of the form
// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where
// - n* is the size of the first node in the contraction trie.
// - i* is the index of the first node in the contraction trie.
// - b* is the offset into the contraction collation element table.
// See contract.go for details on the contraction trie.
const (
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
func splitContractIndex(ce Elem) (index, n, offset int) {
n = int(ce & (1<<maxNBits - 1))
ce >>= maxNBits
index = int(ce & (1<<maxTrieIndexBits - 1))
ce >>= maxTrieIndexBits
offset = int(ce & (1<<maxContractOffsetBits - 1))
return
}
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const maxExpandIndexBits = 16
func splitExpandIndex(ce Elem) (index int) {
return int(uint16(ce))
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
func splitDecompose(ce Elem) (t1, t2 uint8) {
return uint8(ce), uint8(ce >> 8)
}
const (
// These constants were taken from http://www.unicode.org/versions/Unicode6.0.0/ch12.pdf.
minUnified rune = 0x4E00
maxUnified = 0x9FFF
minCompatibility = 0xF900
maxCompatibility = 0xFAFF
minRare = 0x3400
maxRare = 0x4DBF
)
const (
commonUnifiedOffset = 0x10000
rareUnifiedOffset = 0x20000 // largest rune in common is U+FAFF
otherOffset = 0x50000 // largest rune in rare is U+2FA1D
illegalOffset = otherOffset + int(unicode.MaxRune)
maxPrimary = illegalOffset + 1
)
// implicitPrimary returns the primary weight for the a rune
// for which there is no entry for the rune in the collation table.
// We take a different approach from the one specified in
// http://unicode.org/reports/tr10/#Implicit_Weights,
// but preserve the resulting relative ordering of the runes.
func implicitPrimary(r rune) int {
if unicode.Is(unicode.Ideographic, r) {
if r >= minUnified && r <= maxUnified {
// The most common case for CJK.
return int(r) + commonUnifiedOffset
}
if r >= minCompatibility && r <= maxCompatibility {
// This will typically not hit. The DUCET explicitly specifies mappings
// for all characters that do not decompose.
return int(r) + commonUnifiedOffset
}
return int(r) + rareUnifiedOffset
}
return int(r) + otherOffset
}

View file

@ -1,183 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"testing"
"unicode"
)
func (e Elem) String() string {
q := ""
if v := e.Quaternary(); v == MaxQuaternary {
q = "max"
} else {
q = fmt.Sprint(v)
}
return fmt.Sprintf("[%d, %d, %d, %s]",
e.Primary(),
e.Secondary(),
e.Tertiary(),
q)
}
type ceTest struct {
f func(inout []int) (Elem, ceType)
arg []int
}
func makeCE(weights []int) Elem {
ce, _ := MakeElem(weights[0], weights[1], weights[2], uint8(weights[3]))
return ce
}
var defaultValues = []int{0, defaultSecondary, defaultTertiary, 0}
func e(w ...int) Elem {
return makeCE(append(w, defaultValues[len(w):]...))
}
func makeContractIndex(index, n, offset int) Elem {
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
ce := Elem(contractID)
ce += Elem(offset << (maxNBits + maxTrieIndexBits))
ce += Elem(index << maxNBits)
ce += Elem(n)
return ce
}
func makeExpandIndex(index int) Elem {
const expandID = 0xE0000000
return expandID + Elem(index)
}
func makeDecompose(t1, t2 int) Elem {
const decompID = 0xF0000000
return Elem(t2<<8+t1) + decompID
}
func normalCE(inout []int) (ce Elem, t ceType) {
ce = makeCE(inout)
inout[0] = ce.Primary()
inout[1] = ce.Secondary()
inout[2] = int(ce.Tertiary())
inout[3] = int(ce.CCC())
return ce, ceNormal
}
func expandCE(inout []int) (ce Elem, t ceType) {
ce = makeExpandIndex(inout[0])
inout[0] = splitExpandIndex(ce)
return ce, ceExpansionIndex
}
func contractCE(inout []int) (ce Elem, t ceType) {
ce = makeContractIndex(inout[0], inout[1], inout[2])
i, n, o := splitContractIndex(ce)
inout[0], inout[1], inout[2] = i, n, o
return ce, ceContractionIndex
}
func decompCE(inout []int) (ce Elem, t ceType) {
ce = makeDecompose(inout[0], inout[1])
t1, t2 := splitDecompose(ce)
inout[0], inout[1] = int(t1), int(t2)
return ce, ceDecompose
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}},
{normalCE, []int{0, 30, 3, 0}},
{normalCE, []int{0, 30, 3, 0xFF}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
{normalCE, []int{100, defaultSecondary, 3, 0}},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},
{contractCE, []int{0, 0, 0}},
{contractCE, []int{1, 1, 1}},
{contractCE, []int{1, (1 << maxNBits) - 1, 1}},
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}},
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}},
{expandCE, []int{0}},
{expandCE, []int{5}},
{expandCE, []int{(1 << maxExpandIndexBits) - 1}},
{decompCE, []int{0, 0}},
{decompCE, []int{1, 1}},
{decompCE, []int{0x1F, 0x1F}},
}
func TestColElem(t *testing.T) {
for i, tt := range ceTests {
inout := make([]int, len(tt.arg))
copy(inout, tt.arg)
ce, typ := tt.f(inout)
if ce.ctype() != typ {
t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
}
for j, a := range tt.arg {
if inout[j] != a {
t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
}
}
}
}
type implicitTest struct {
r rune
p int
}
var implicitTests = []implicitTest{
{0x33FF, 0x533FF},
{0x3400, 0x23400},
{0x4DC0, 0x54DC0},
{0x4DFF, 0x54DFF},
{0x4E00, 0x14E00},
{0x9FCB, 0x19FCB},
{0xA000, 0x5A000},
{0xF8FF, 0x5F8FF},
{0xF900, 0x1F900},
{0xFA23, 0x1FA23},
{0xFAD9, 0x1FAD9},
{0xFB00, 0x5FB00},
{0x20000, 0x40000},
{0x2B81C, 0x4B81C},
{unicode.MaxRune, 0x15FFFF}, // maximum primary value
}
func TestImplicit(t *testing.T) {
for _, tt := range implicitTests {
if p := implicitPrimary(tt.r); p != tt.p {
t.Errorf("%U: was %X; want %X", tt.r, p, tt.p)
}
}
}
func TestUpdateTertiary(t *testing.T) {
tests := []struct {
in, out Elem
t uint8
}{
{0x4000FE20, 0x0000FE8A, 0x0A},
{0x4000FE21, 0x0000FEAA, 0x0A},
{0x0000FE8B, 0x0000FE83, 0x03},
{0x82FF0188, 0x9BFF0188, 0x1B},
{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
}
for i, tt := range tests {
if out := tt.in.updateTertiary(tt.t); out != tt.out {
t.Errorf("%d: was %X; want %X", i, out, tt.out)
}
}
}

View file

@ -1,105 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package colltab contains functionality related to collation tables.
// It is only to be used by the collate and search packages.
package colltab // import "golang.org/x/text/internal/colltab"
import (
"sort"
"golang.org/x/text/language"
)
// MatchLang finds the index of t in tags, using a matching algorithm used for
// collation and search. tags[0] must be language.Und, the remaining tags should
// be sorted alphabetically.
//
// Language matching for collation and search is different from the matching
// defined by language.Matcher: the (inferred) base language must be an exact
// match for the relevant fields. For example, "gsw" should not match "de".
// Also the parent relation is different, as a parent may have a different
// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
// zh.
func MatchLang(t language.Tag, tags []language.Tag) int {
// Canonicalize the values, including collapsing macro languages.
t, _ = language.All.Canonicalize(t)
base, conf := t.Base()
// Estimate the base language, but only use high-confidence values.
if conf < language.High {
// The root locale supports "search" and "standard". We assume that any
// implementation will only use one of both.
return 0
}
// Maximize base and script and normalize the tag.
if _, s, r := t.Raw(); (r != language.Region{}) {
p, _ := language.Raw.Compose(base, s, r)
// Taking the parent forces the script to be maximized.
p = p.Parent()
// Add back region and extensions.
t, _ = language.Raw.Compose(p, r, t.Extensions())
} else {
// Set the maximized base language.
t, _ = language.Raw.Compose(base, s, t.Extensions())
}
// Find start index of the language tag.
start := 1 + sort.Search(len(tags)-1, func(i int) bool {
b, _, _ := tags[i+1].Raw()
return base.String() <= b.String()
})
if start < len(tags) {
if b, _, _ := tags[start].Raw(); b != base {
return 0
}
}
// Besides the base language, script and region, only the collation type and
// the custom variant defined in the 'u' extension are used to distinguish a
// locale.
// Strip all variants and extensions and add back the custom variant.
tdef, _ := language.Raw.Compose(t.Raw())
tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))
// First search for a specialized collation type, if present.
try := []language.Tag{tdef}
if co := t.TypeForKey("co"); co != "" {
tco, _ := tdef.SetTypeForKey("co", co)
try = []language.Tag{tco, tdef}
}
for _, tx := range try {
for ; tx != language.Und; tx = parent(tx) {
for i, t := range tags[start:] {
if b, _, _ := t.Raw(); b != base {
break
}
if tx == t {
return start + i
}
}
}
}
return 0
}
// parent computes the structural parent. This means inheritance may change
// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
func parent(t language.Tag) language.Tag {
if t.TypeForKey("va") != "" {
t, _ = t.SetTypeForKey("va", "")
return t
}
result := language.Und
if b, s, r := t.Raw(); (r != language.Region{}) {
result, _ = language.Raw.Compose(b, s, t.Extensions())
} else if (s != language.Script{}) {
result, _ = language.Raw.Compose(b, t.Extensions())
} else if (b != language.Base{}) {
result, _ = language.Raw.Compose(t.Extensions())
}
return result
}

View file

@ -1,64 +0,0 @@
package colltab
import (
"testing"
"golang.org/x/text/language"
)
func TestMatchLang(t *testing.T) {
tags := []language.Tag{
0: language.Und,
1: language.MustParse("bs"),
2: language.German,
3: language.English,
4: language.AmericanEnglish,
5: language.MustParse("en-US-u-va-posix"),
6: language.Portuguese,
7: language.Serbian,
8: language.MustParse("sr-Latn"),
9: language.Chinese,
10: language.MustParse("zh-u-co-stroke"),
11: language.MustParse("zh-Hant-u-co-pinyin"),
12: language.TraditionalChinese,
}
for i, tc := range []struct {
x int
t language.Tag
}{
{0, language.Und},
{0, language.Persian}, // Default to first element when no match.
{3, language.English},
{4, language.AmericanEnglish},
{5, language.MustParse("en-US-u-va-posix")}, // Ext. variant match.
{4, language.MustParse("en-US-u-va-noposix")}, // Ext. variant mismatch.
{3, language.MustParse("en-UK-u-va-noposix")}, // Ext. variant mismatch.
{7, language.Serbian},
{0, language.Croatian}, // Don't match to close language!
{0, language.MustParse("gsw")}, // Don't match to close language!
{1, language.MustParse("bs-Cyrl")}, // Odd, but correct.
{1, language.MustParse("bs-Latn")}, // Estimated script drops.
{8, language.MustParse("sr-Latn")},
{9, language.Chinese},
{9, language.SimplifiedChinese},
{12, language.TraditionalChinese},
{11, language.MustParse("zh-Hant-u-co-pinyin")},
// TODO: should this be 12? Either inherited value (10) or default is
// fine in this case, though. Other locales are not affected.
{10, language.MustParse("zh-Hant-u-co-stroke")},
// There is no "phonebk" sorting order for zh-Hant, so use default.
{12, language.MustParse("zh-Hant-u-co-phonebk")},
{10, language.MustParse("zh-u-co-stroke")},
{12, language.MustParse("und-TW")}, // Infer script and language.
{12, language.MustParse("und-HK")}, // Infer script and language.
{6, language.MustParse("und-BR")}, // Infer script and language.
{6, language.MustParse("und-PT")}, // Infer script and language.
{2, language.MustParse("und-Latn-DE")}, // Infer language.
{0, language.MustParse("und-Jpan-BR")}, // Infers "ja", so no match.
{0, language.MustParse("zu")}, // No match past index.
} {
if x := MatchLang(tc.t, tags); x != tc.x {
t.Errorf("%d: MatchLang(%q, tags) = %d; want %d", i, tc.t, x, tc.x)
}
}
}

View file

@ -1,145 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import "unicode/utf8"
// For a description of ContractTrieSet, see text/collate/build/contract.go.
type ContractTrieSet []struct{ L, H, N, I uint8 }
// ctScanner is used to match a trie to an input sequence.
// A contraction may match a non-contiguous sequence of bytes in an input string.
// For example, if there is a contraction for <a, combining_ring>, it should match
// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does
// not block combining_ring.
// ctScanner does not automatically skip over non-blocking non-starters, but rather
// retains the state of the last match and leaves it up to the user to continue
// the match at the appropriate points.
type ctScanner struct {
states ContractTrieSet
s []byte
n int
index int
pindex int
done bool
}
type ctScannerString struct {
states ContractTrieSet
s string
n int
index int
pindex int
done bool
}
func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner {
return ctScanner{s: b, states: t[index:], n: n}
}
func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString {
return ctScannerString{s: str, states: t[index:], n: n}
}
// result returns the offset i and bytes consumed p so far. If no suffix
// matched, i and p will be 0.
func (s *ctScanner) result() (i, p int) {
return s.index, s.pindex
}
func (s *ctScannerString) result() (i, p int) {
return s.index, s.pindex
}
const (
final = 0
noIndex = 0xFF
)
// scan matches the longest suffix at the current location in the input
// and returns the number of bytes consumed.
func (s *ctScanner) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}
// scan is a verbatim copy of ctScanner.scan.
func (s *ctScannerString) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.L {
if e.L == c {
p++
if e.I != noIndex {
s.index = int(e.I)
s.pindex = p
}
if e.N != final {
i, states, n = 0, states[int(e.H)+n:], int(e.N)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.N == final && c <= e.H {
p++
s.done = true
s.index = int(c-e.L) + int(e.I)
s.pindex = p
return p
}
}
i++
}
return pr
}

View file

@ -1,131 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
type lookupStrings struct {
str string
offset int
n int // bytes consumed from input
}
type LookupTest struct {
lookup []lookupStrings
n int
tries ContractTrieSet
}
var lookupTests = []LookupTest{{
[]lookupStrings{
{"abc", 1, 3},
{"a", 0, 0},
{"b", 0, 0},
{"c", 0, 0},
{"d", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"abe", 3, 3},
{"a", 0, 0},
{"ab", 0, 0},
{"d", 0, 0},
{"f", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'e', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"ab", 2, 2},
{"a", 3, 1},
{"abcd", 1, 3},
{"abe", 2, 2},
},
1,
ContractTrieSet{
{'a', 0, 1, 3},
{'b', 0, 1, 2},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"ab", 3, 2},
{"ac", 4, 2},
{"a", 5, 1},
{"b", 6, 1},
{"ba", 6, 1},
},
2,
ContractTrieSet{
{'b', 'b', 0, 6},
{'a', 0, 2, 5},
{'c', 'c', 0, 4},
{'b', 0, 1, 3},
{'c', 'd', 0, 1},
},
}, {
[]lookupStrings{
{"bcde", 2, 4},
{"bc", 7, 2},
{"ab", 6, 2},
{"bcd", 5, 3},
{"abcd", 1, 4},
{"abc", 4, 3},
{"bcdf", 3, 4},
},
2,
ContractTrieSet{
{'b', 3, 1, 0xFF},
{'a', 0, 1, 0xFF},
{'b', 0, 1, 6},
{'c', 0, 1, 4},
{'d', 'd', 0, 1},
{'c', 0, 1, 7},
{'d', 0, 1, 5},
{'e', 'f', 0, 2},
},
}}
func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) {
scan := c.scanner(0, nnode, s)
scan.scan(0)
return scan.result()
}
func TestLookupContraction(t *testing.T) {
for i, tt := range lookupTests {
cts := ContractTrieSet(tt.tries)
for j, lu := range tt.lookup {
str := lu.str
for _, s := range []string{str, str + "X"} {
const msg = `%d:%d: %s of "%s" %v; want %v`
offset, n := lookup(&cts, tt.n, []byte(s))
if offset != lu.offset {
t.Errorf(msg, i, j, "offset", s, offset, lu.offset)
}
if n != lu.n {
t.Errorf(msg, i, j, "bytes consumed", s, n, len(str))
}
}
}
}
}

View file

@ -1,178 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// An Iter incrementally converts chunks of the input text to collation
// elements, while ensuring that the collation elements are in normalized order
// (that is, they are in the order as if the input text were normalized first).
type Iter struct {
Weighter Weighter
Elems []Elem
// N is the number of elements in Elems that will not be reordered on
// subsequent iterations, N <= len(Elems).
N int
bytes []byte
str string
// Because the Elems buffer may contain collation elements that are needed
// for look-ahead, we need two positions in the text (bytes or str): one for
// the end position in the text for the current iteration and one for the
// start of the next call to appendNext.
pEnd int // end position in text corresponding to N.
pNext int // pEnd <= pNext.
}
// Reset sets the position in the current input text to p and discards any
// results obtained so far.
func (i *Iter) Reset(p int) {
i.Elems = i.Elems[:0]
i.N = 0
i.pEnd = p
i.pNext = p
}
// Len returns the length of the input text.
func (i *Iter) Len() int {
if i.bytes != nil {
return len(i.bytes)
}
return len(i.str)
}
// Discard removes the collation elements up to N.
func (i *Iter) Discard() {
// TODO: change this such that only modifiers following starters will have
// to be copied.
i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])]
i.N = 0
}
// End returns the end position of the input text for which Next has returned
// results.
func (i *Iter) End() int {
return i.pEnd
}
// SetInput resets i to input s.
func (i *Iter) SetInput(s []byte) {
i.bytes = s
i.str = ""
i.Reset(0)
}
// SetInputString resets i to input s.
func (i *Iter) SetInputString(s string) {
i.str = s
i.bytes = nil
i.Reset(0)
}
func (i *Iter) done() bool {
return i.pNext >= len(i.str) && i.pNext >= len(i.bytes)
}
func (i *Iter) appendNext() bool {
if i.done() {
return false
}
var sz int
if i.bytes == nil {
i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:])
} else {
i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:])
}
if sz == 0 {
sz = 1
}
i.pNext += sz
return true
}
// Next appends Elems to the internal array. On each iteration, it will either
// add starters or modifiers. In the majority of cases, an Elem with a primary
// value > 0 will have a CCC of 0. The CCC values of collation elements are also
// used to detect if the input string was not normalized and to adjust the
// result accordingly.
func (i *Iter) Next() bool {
if i.N == len(i.Elems) && !i.appendNext() {
return false
}
// Check if the current segment starts with a starter.
prevCCC := i.Elems[len(i.Elems)-1].CCC()
if prevCCC == 0 {
i.N = len(i.Elems)
i.pEnd = i.pNext
return true
} else if i.Elems[i.N].CCC() == 0 {
// set i.N to only cover part of i.Elems for which prevCCC == 0 and
// use rest for the next call to next.
for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ {
}
i.pEnd = i.pNext
return true
}
// The current (partial) segment starts with modifiers. We need to collect
// all successive modifiers to ensure that they are normalized.
for {
p := len(i.Elems)
i.pEnd = i.pNext
if !i.appendNext() {
break
}
if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters {
// Leave the starter for the next iteration. This ensures that we
// do not return sequences of collation elements that cross two
// segments.
//
// TODO: handle large number of combining characters by fully
// normalizing the input segment before iteration. This ensures
// results are consistent across the text repo.
i.N = p
return true
} else if ccc < prevCCC {
i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC.
} else {
prevCCC = ccc
}
}
done := len(i.Elems) != i.N
i.N = len(i.Elems)
return done
}
// nextNoNorm is the same as next, but does not "normalize" the collation
// elements.
func (i *Iter) nextNoNorm() bool {
// TODO: remove this function. Using this instead of next does not seem
// to improve performance in any significant way. We retain this until
// later for evaluation purposes.
if i.done() {
return false
}
i.appendNext()
i.N = len(i.Elems)
return true
}
const maxCombiningCharacters = 30
// doNorm reorders the collation elements in i.Elems.
// It assumes that blocks of collation elements added with appendNext
// either start and end with the same CCC or start with CCC == 0.
// This allows for a single insertion point for the entire block.
// The correctness of this assumption is verified in builder.go.
func (i *Iter) doNorm(p int, ccc uint8) {
n := len(i.Elems)
k := p
for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- {
}
i.Elems = append(i.Elems, i.Elems[p:k]...)
copy(i.Elems[p:], i.Elems[k:])
i.Elems = i.Elems[:n]
}

View file

@ -1,63 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{{
in: []int{4, div, 3},
out: []int{3, 4},
}, {
in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
}, {
in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
}, {
in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := Iter{}
var w, p int
for k, cc := range tt.in {
if cc == div {
w = 100
p = k
continue
}
i.Elems = append(i.Elems, makeCE([]int{w, defaultSecondary, 2, cc}))
}
i.doNorm(p, i.Elems[p].CCC())
if len(i.Elems) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.Elems), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.Elems {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.Elems[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// Combining rune overflow is tested in search/pattern_test.go.
}

View file

@ -1,236 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode"
"unicode/utf8"
)
// NewNumericWeighter wraps w to replace individual digits to sort based on their
// numeric value.
//
// Weighter w must have a free primary weight after the primary weight for 9.
// If this is not the case, numeric value will sort at the same primary level
// as the first primary sorting after 9.
func NewNumericWeighter(w Weighter) Weighter {
getElem := func(s string) Elem {
elems, _ := w.AppendNextString(nil, s)
return elems[0]
}
nine := getElem("9")
// Numbers should order before zero, but the DUCET has no room for this.
// TODO: move before zero once we use fractional collation elements.
ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0)
return &numericWeighter{
Weighter: w,
// We assume that w sorts digits of different kinds in order of numeric
// value and that the tertiary weight order is preserved.
//
// TODO: evaluate whether it is worth basing the ranges on the Elem
// encoding itself once the move to fractional weights is complete.
zero: getElem("0"),
zeroSpecialLo: getElem(""), // U+FF10 FULLWIDTH DIGIT ZERO
zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO
nine: nine,
nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE
numberStart: ns,
}
}
// A numericWeighter translates a stream of digits into a stream of weights
// representing the numeric value.
type numericWeighter struct {
Weighter
// The Elems below all demarcate boundaries of specific ranges. With the
// current element encoding digits are in two ranges: normal (default
// tertiary value) and special. For most languages, digits have collation
// elements in the normal range.
//
// Note: the range tests are very specific for the element encoding used by
// this implementation. The tests in collate_test.go are designed to fail
// if this code is not updated when an encoding has changed.
zero Elem // normal digit zero
zeroSpecialLo Elem // special digit zero, low tertiary value
zeroSpecialHi Elem // special digit zero, high tertiary value
nine Elem // normal digit nine
nineSpecialHi Elem // special digit nine
numberStart Elem
}
// AppendNext calls the namesake of the underlying weigher, but replaces single
// digits with weights representing their value.
func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNext(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
b: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
// ce might have been grown already, so take it instead of buf.
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:])
nc.b = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
// AppendNextString calls the namesake of the underlying weigher, but replaces
// single digits with weights representing their value.
func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) {
ce, n = nw.Weighter.AppendNextString(buf, s)
nc := numberConverter{
elems: buf,
w: nw,
s: s,
}
isZero, ok := nc.checkNextDigit(ce)
if !ok {
return ce, n
}
nc.init(ce, len(buf), isZero)
for n < len(s) {
ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:])
nc.s = s
n += sz
if !nc.update(ce) {
break
}
}
return nc.result(), n
}
type numberConverter struct {
w *numericWeighter
elems []Elem
nDigits int
lenIndex int
s string // set if the input was of type string
b []byte // set if the input was of type []byte
}
// init completes initialization of a numberConverter and prepares it for adding
// more digits. elems is assumed to have a digit starting at oldLen.
func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) {
// Insert a marker indicating the start of a number and and a placeholder
// for the number of digits.
if isZero {
elems = append(elems[:oldLen], nc.w.numberStart, 0)
} else {
elems = append(elems, 0, 0)
copy(elems[oldLen+2:], elems[oldLen:])
elems[oldLen] = nc.w.numberStart
elems[oldLen+1] = 0
nc.nDigits = 1
}
nc.elems = elems
nc.lenIndex = oldLen + 1
}
// checkNextDigit reports whether bufNew adds a single digit relative to the old
// buffer. If it does, it also reports whether this digit is zero.
func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) {
if len(nc.elems) >= len(bufNew) {
return false, false
}
e := bufNew[len(nc.elems)]
if e < nc.w.zeroSpecialLo || nc.w.nine < e {
// Not a number.
return false, false
}
if e < nc.w.zero {
if e > nc.w.nineSpecialHi {
// Not a number.
return false, false
}
if !nc.isDigit() {
return false, false
}
isZero = e <= nc.w.zeroSpecialHi
} else {
// This is the common case if we encounter a digit.
isZero = e == nc.w.zero
}
// Test the remaining added collation elements have a zero primary value.
if n := len(bufNew) - len(nc.elems); n > 1 {
for i := len(nc.elems) + 1; i < len(bufNew); i++ {
if bufNew[i].Primary() != 0 {
return false, false
}
}
// In some rare cases, collation elements will encode runes in
// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371)
// are not in Nd. Also some digits that clearly belong in unicode.No,
// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have
// collation elements indistinguishable from normal digits.
// Unfortunately, this means we need to make this check for nearly all
// non-Latin digits.
//
// TODO: check the performance impact and find something better if it is
// an issue.
if !nc.isDigit() {
return false, false
}
}
return isZero, true
}
func (nc *numberConverter) isDigit() bool {
if nc.b != nil {
r, _ := utf8.DecodeRune(nc.b)
return unicode.In(r, unicode.Nd)
}
r, _ := utf8.DecodeRuneInString(nc.s)
return unicode.In(r, unicode.Nd)
}
// We currently support a maximum of about 2M digits (the number of primary
// values). Such numbers will compare correctly against small numbers, but their
// comparison against other large numbers is undefined.
//
// TODO: define a proper fallback, such as comparing large numbers textually or
// actually allowing numbers of unlimited length.
//
// TODO: cap this to a lower number (like 100) and maybe allow a larger number
// in an option?
const maxDigits = 1<<maxPrimaryBits - 1
func (nc *numberConverter) update(elems []Elem) bool {
isZero, ok := nc.checkNextDigit(elems)
if nc.nDigits == 0 && isZero {
return true
}
nc.elems = elems
if !ok {
return false
}
nc.nDigits++
return nc.nDigits < maxDigits
}
// result fills in the length element for the digit sequence and returns the
// completed collation elements.
func (nc *numberConverter) result() []Elem {
e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0)
nc.elems[nc.lenIndex] = e
return nc.elems
}

View file

@ -1,159 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"reflect"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
)
const (
digSec = defaultSecondary
digTert = defaultTertiary
)
var tPlus3 = e(0, 50, digTert+3)
// numWeighter is a testWeighter used for testing numericWeighter.
var numWeighter = testWeighter{
"0": p(100),
"": []Elem{e(100, digSec, digTert+1)}, // U+FF10 FULLWIDTH DIGIT ZERO
"₀": []Elem{e(100, digSec, digTert+5)}, // U+2080 SUBSCRIPT ZERO
"1": p(101),
// Allow non-primary collation elements to be inserted.
"١": append(p(101), tPlus3), // U+0661 ARABIC-INDIC DIGIT ONE
// Allow varying tertiary weight if the number is Nd.
"": []Elem{e(101, digSec, digTert+1)}, // U+FF11 FULLWIDTH DIGIT ONE
"2": p(102),
// Allow non-primary collation elements to be inserted.
"٢": append(p(102), tPlus3), // U+0662 ARABIC-INDIC DIGIT TWO
// Varying tertiary weights should be ignored.
"": []Elem{e(102, digSec, digTert+3)}, // U+FF12 FULLWIDTH DIGIT TWO
"3": p(103),
"4": p(104),
"5": p(105),
"6": p(106),
"7": p(107),
// Weights must be strictly monotonically increasing, but do not need to be
// consecutive.
"8": p(118),
"9": p(119),
// Allow non-primary collation elements to be inserted.
"٩": append(p(119), tPlus3), // U+0669 ARABIC-INDIC DIGIT NINE
// Varying tertiary weights should be ignored.
"": []Elem{e(119, digSec, digTert+1)}, // U+FF19 FULLWIDTH DIGIT NINE
"₉": []Elem{e(119, digSec, digTert+5)}, // U+2089 SUBSCRIPT NINE
"a": p(5),
"b": p(6),
"c": p(8, 2),
"klm": p(99),
"nop": p(121),
"x": p(200),
"y": p(201),
}
func p(w ...int) (elems []Elem) {
for _, x := range w {
e, _ := MakeElem(x, digSec, digTert, 0)
elems = append(elems, e)
}
return elems
}
func TestNumericAppendNext(t *testing.T) {
for _, tt := range []struct {
in string
w []Elem
}{
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{
"",
[]Elem{
e(120), e(3),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
},
},
// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
} {
nw := NewNumericWeighter(numWeighter)
b := []byte(tt.in)
got := []Elem(nil)
for n, sz := 0, 0; n < len(b); {
got, sz = nw.AppendNext(got, b[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNext(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
got = nil
for n, sz := 0, 0; n < len(tt.in); {
got, sz = nw.AppendNextString(got, tt.in[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNextString(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
}
}
func TestNumericOverflow(t *testing.T) {
manyDigits := strings.Repeat("9", maxDigits+1) + "a"
nw := NewNumericWeighter(numWeighter)
got, n := nw.AppendNextString(nil, manyDigits)
if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
}
if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
}
}
func TestNumericWeighterAlloc(t *testing.T) {
buf := make([]Elem, 100)
w := NewNumericWeighter(numWeighter)
s := "1234567890a"
nNormal := testtext.AllocsPerRun(3, func() { numWeighter.AppendNextString(buf, s) })
nNumeric := testtext.AllocsPerRun(3, func() { w.AppendNextString(buf, s) })
if n := nNumeric - nNormal; n > 0 {
t.Errorf("got %f; want 0", n)
}
}

View file

@ -1,275 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
// Table holds all collation data for a given collation ordering.
type Table struct {
Index Trie // main trie
// expansion info
ExpandElem []uint32
// contraction info
ContractTries ContractTrieSet
ContractElem []uint32
MaxContractLen int
VariableTop uint32
}
func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
return t.appendNext(w, source{bytes: b})
}
func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
return t.appendNext(w, source{str: s})
}
func (t *Table) Start(p int, b []byte) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) StartString(p int, s string) int {
// TODO: implement
panic("not implemented")
}
func (t *Table) Domain() []string {
// TODO: implement
panic("not implemented")
}
func (t *Table) Top() uint32 {
return t.VariableTop
}
type source struct {
str string
bytes []byte
}
func (src *source) lookup(t *Table) (ce Elem, sz int) {
if src.bytes == nil {
return t.Index.lookupString(src.str)
}
return t.Index.lookup(src.bytes)
}
func (src *source) tail(sz int) {
if src.bytes == nil {
src.str = src.str[sz:]
} else {
src.bytes = src.bytes[sz:]
}
}
func (src *source) nfd(buf []byte, end int) []byte {
if src.bytes == nil {
return norm.NFD.AppendString(buf[:0], src.str[:end])
}
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
}
func (src *source) rune() (r rune, sz int) {
if src.bytes == nil {
return utf8.DecodeRuneInString(src.str)
}
return utf8.DecodeRune(src.bytes)
}
func (src *source) properties(f norm.Form) norm.Properties {
if src.bytes == nil {
return f.PropertiesString(src.str)
}
return f.Properties(src.bytes)
}
// appendNext appends the weights corresponding to the next rune or
// contraction in s. If a contraction is matched to a discontinuous
// sequence of runes, the weights for the interstitial runes are
// appended as well. It returns a new slice that includes the appended
// weights and the number of bytes consumed from s.
func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) {
ce, sz := src.lookup(t)
tp := ce.ctype()
if tp == ceNormal {
if ce == 0 {
r, _ := src.rune()
const (
hangulSize = 3
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r >= firstHangul && r <= lastHangul {
// TODO: performance can be considerably improved here.
n = sz
var buf [16]byte // Used for decomposing Hangul.
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
ce, sz = t.Index.lookup(b)
w = append(w, ce)
}
return w, n
}
ce = makeImplicitCE(implicitPrimary(r))
}
w = append(w, ce)
} else if tp == ceExpansionIndex {
w = t.appendExpansion(w, ce)
} else if tp == ceContractionIndex {
n := 0
src.tail(sz)
if src.bytes == nil {
w, n = t.matchContractionString(w, ce, src.str)
} else {
w, n = t.matchContraction(w, ce, src.bytes)
}
sz += n
} else if tp == ceDecompose {
// Decompose using NFKD and replace tertiary weights.
t1, t2 := splitDecompose(ce)
i := len(w)
nfkd := src.properties(norm.NFKD).Decomposition()
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
w, p = t.appendNext(w, source{bytes: nfkd})
}
w[i] = w[i].updateTertiary(t1)
if i++; i < len(w) {
w[i] = w[i].updateTertiary(t2)
for i++; i < len(w); i++ {
w[i] = w[i].updateTertiary(maxTertiary)
}
}
}
return w, sz
}
func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem {
i := splitExpandIndex(ce)
n := int(t.ExpandElem[i])
i++
for _, ce := range t.ExpandElem[i : i+n] {
w = append(w, Elem(ce))
}
return w
}
func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scanner(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.Properties(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.Properties(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
// TODO: unify the two implementations. This is best done after first simplifying
// the algorithm taking into account the inclusion of both NFC and NFD forms
// in the table.
func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.ContractTries.scannerString(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.PropertiesString(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.PropertiesString(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.ContractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}

View file

@ -1,159 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The trie in this file is used to associate the first full character in an
// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table of collation elements.
// For a full description, see go.text/collate/build/trie.go.
package colltab
const blockSize = 64
type Trie struct {
Index0 []uint16 // index for first byte (0xC0-0xFF)
Values0 []uint32 // index for first byte (0x00-0x7F)
Index []uint16
Values []uint32
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
)
func (t *Trie) lookupValue(n uint16, b byte) Elem {
return Elem(t.Values[int(n)<<6+int(b)])
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *Trie) lookup(s []byte) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// The body of lookupString is a verbatim copy of that of lookup.
func (t *Trie) lookupString(s string) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.Values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.Index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.Index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.Index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}

View file

@ -1,106 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
// Test cases for illegal runes.
type trietest struct {
size int
bytes []byte
}
var tests = []trietest{
// illegal runes
{1, []byte{0x80}},
{1, []byte{0xFF}},
{1, []byte{t2, tx - 1}},
{1, []byte{t2, t2}},
{2, []byte{t3, tx, tx - 1}},
{2, []byte{t3, tx, t2}},
{1, []byte{t3, tx - 1, tx}},
{3, []byte{t4, tx, tx, tx - 1}},
{3, []byte{t4, tx, tx, t2}},
{1, []byte{t4, t2, tx, tx - 1}},
{2, []byte{t4, tx, t2, tx - 1}},
// short runes
{0, []byte{t2}},
{0, []byte{t3, tx}},
{0, []byte{t4, tx, tx}},
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
{1, []byte{t5, tx, tx, tx, tx}},
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func TestLookupTrie(t *testing.T) {
for i, r := range testRunes {
b := []byte(string(r))
v, sz := testTrie.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", r, v, i)
}
if sz != len(b) {
t.Errorf("lookup(%U): found size %d, expected %d", r, sz, len(b))
}
}
for i, tt := range tests {
v, sz := testTrie.lookup(tt.bytes)
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
// test data is taken from exp/collate/locale/build/trie_test.go
var testValues = [832]uint32{
0x000c: 0x00000001,
0x007f: 0x00000002,
0x00c0: 0x00000003,
0x0100: 0x00000004,
0x0140: 0x0000000c, 0x0141: 0x0000000d, 0x0142: 0x0000000e,
0x0150: 0x0000000f,
0x0155: 0x00000010,
0x01bf: 0x00000005,
0x01c0: 0x00000006,
0x0219: 0x00000007,
0x027f: 0x00000008,
0x0280: 0x00000009,
0x02c1: 0x0000000a,
0x033f: 0x0000000b,
}
var testLookup = [640]uint16{
0x0e0: 0x05, 0x0e6: 0x06,
0x13f: 0x07,
0x140: 0x08, 0x144: 0x09,
0x190: 0x03,
0x1ff: 0x0a,
0x20f: 0x05,
0x242: 0x01, 0x244: 0x02,
0x248: 0x03,
0x25f: 0x04,
0x260: 0x01,
0x26f: 0x02,
0x270: 0x04, 0x274: 0x06,
}
var testTrie = Trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]}

View file

@ -1,31 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab // import "golang.org/x/text/internal/colltab"
// A Weighter can be used as a source for Collator and Searcher.
type Weighter interface {
// Start finds the start of the segment that includes position p.
Start(p int, b []byte) int
// StartString finds the start of the segment that includes position p.
StartString(p int, s string) int
// AppendNext appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
// AppendNextString appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
// Domain returns a slice of all single characters and contractions for which
// collation elements are defined in this table.
Domain() []string
// Top returns the highest variable primary value.
Top() uint32
}

View file

@ -1,42 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// testWeighter is a simple Weighter that returns weights from a user-defined map.
type testWeighter map[string][]Elem
func (t testWeighter) Start(int, []byte) int { return 0 }
func (t testWeighter) StartString(int, string) int { return 0 }
func (t testWeighter) Domain() []string { return nil }
func (t testWeighter) Top() uint32 { return 0 }
// maxContractBytes is the maximum length of any key in the map.
const maxContractBytes = 10
func (t testWeighter) AppendNext(buf []Elem, s []byte) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[string(s[:i])]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + string(s))
}
func (t testWeighter) AppendNextString(buf []Elem, s string) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[s[:i]]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + s)
}

View file

@ -1,4 +0,0 @@
The export directory contains packages that are generated using the x/text
infrastructure, but live elsewhere.
At some point we can expose some of the infrastructure, but for now this
is not done.

View file

@ -1,55 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package idna
// This file contains code that is common between the generation code and the
// package's test code.
import (
"log"
"golang.org/x/text/internal/ucd"
)
func catFromEntry(p *ucd.Parser) (cat category) {
r := p.Rune(0)
switch s := p.String(1); s {
case "valid":
cat = valid
case "disallowed":
cat = disallowed
case "disallowed_STD3_valid":
cat = disallowedSTD3Valid
case "disallowed_STD3_mapped":
cat = disallowedSTD3Mapped
case "mapped":
cat = mapped
case "deviation":
cat = deviation
case "ignored":
cat = ignored
default:
log.Fatalf("%U: Unknown category %q", r, s)
}
if s := p.String(3); s != "" {
if cat != valid {
log.Fatalf(`%U: %s defined for %q; want "valid"`, r, s, p.String(1))
}
switch s {
case "NV8":
cat = validNV8
case "XV8":
cat = validXV8
default:
log.Fatalf("%U: Unexpected exception %q", r, s)
}
}
return cat
}
var joinType = map[string]info{
"L": joiningL,
"D": joiningD,
"T": joiningT,
"R": joiningR,
}

View file

@ -1,68 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna_test
import (
"fmt"
"golang.org/x/text/internal/export/idna"
)
func ExampleProfile() {
// Raw Punycode has no restrictions and does no mappings.
fmt.Println(idna.ToASCII(""))
fmt.Println(idna.ToASCII("*.faß.com"))
fmt.Println(idna.Punycode.ToASCII("*.faß.com"))
// Rewrite IDN for lookup. This (currently) uses transitional mappings to
// find a balance between IDNA2003 and IDNA2008 compatibility.
fmt.Println(idna.Lookup.ToASCII(""))
fmt.Println(idna.Lookup.ToASCII("www.faß.com"))
// Convert an IDN to ASCII for registration purposes. This changes the
// encoding, but reports an error if the input was illformed.
fmt.Println(idna.Registration.ToASCII(""))
fmt.Println(idna.Registration.ToASCII("www.faß.com"))
// Output:
// <nil>
// *.xn--fa-hia.com <nil>
// *.xn--fa-hia.com <nil>
// <nil>
// www.fass.com <nil>
// idna: invalid label ""
// www.xn--fa-hia.com <nil>
}
func ExampleNew() {
var p *idna.Profile
// Raw Punycode has no restrictions and does no mappings.
p = idna.New()
fmt.Println(p.ToASCII("*.faß.com"))
// Do mappings. Note that star is not allowed in a DNS lookup.
p = idna.New(
idna.MapForLookup(),
idna.Transitional(true)) // Map ß -> ss
fmt.Println(p.ToASCII("*.faß.com"))
// Lookup for registration. Also does not allow '*'.
p = idna.New(idna.ValidateForRegistration())
fmt.Println(p.ToUnicode("*.faß.com"))
// Set up a profile maps for lookup, but allows wild cards.
p = idna.New(
idna.MapForLookup(),
idna.Transitional(true), // Map ß -> ss
idna.StrictDomainName(false)) // Set more permissive ASCII rules.
fmt.Println(p.ToASCII("*.faß.com"))
// Output:
// *.xn--fa-hia.com <nil>
// *.fass.com idna: disallowed rune U+002A
// *.faß.com idna: disallowed rune U+002A
// *.fass.com <nil>
}

View file

@ -1,276 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program generates the trie for idna operations. The Unicode casing
// algorithm requires the lookup of various properties and mappings for each
// rune. The table generated by this generator combines several of the most
// frequently used of these into a single trie so that they can be accessed
// with a single lookup.
package main
import (
"fmt"
"io"
"log"
"unicode"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/triegen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/bidi"
)
func main() {
gen.Init()
genTables()
gen.Repackage("gen_trieval.go", "trieval.go", "idna")
gen.Repackage("gen_common.go", "common_test.go", "idna")
}
var runes = map[rune]info{}
func genTables() {
t := triegen.NewTrie("idna")
ucd.Parse(gen.OpenUCDFile("DerivedNormalizationProps.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
if p.String(1) == "NFC_QC" { // p.String(2) is "N" or "M"
runes[r] = mayNeedNorm
}
})
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
const cccVirama = 9
if p.Int(ucd.CanonicalCombiningClass) == cccVirama {
runes[p.Rune(0)] = viramaModifier
}
switch {
case unicode.In(r, unicode.Mark):
runes[r] |= modifier | mayNeedNorm
}
// TODO: by using UnicodeData.txt we don't mark undefined codepoints
// that are earmarked as RTL properly. However, an undefined cp will
// always fail, so there is no need to store this info.
switch p, _ := bidi.LookupRune(r); p.Class() {
case bidi.R, bidi.AL, bidi.AN:
if x := runes[r]; x != 0 && x != mayNeedNorm {
log.Fatalf("%U: rune both modifier and RTL letter/number", r)
}
runes[r] = rtl
}
})
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
switch v := p.String(1); v {
case "L", "D", "T", "R":
runes[p.Rune(0)] |= joinType[v] << joinShift
}
})
ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
// The mappings table explicitly defines surrogates as invalid.
if !utf8.ValidRune(r) {
return
}
cat := catFromEntry(p)
isMapped := cat == mapped || cat == disallowedSTD3Mapped || cat == deviation
if !isMapped {
// Only include additional category information for non-mapped
// runes. The additional information is only used after mapping and
// the bits would clash with mapping information.
// TODO: it would be possible to inline this data and avoid
// additional lookups. This is quite tedious, though, so let's first
// see if we need this.
cat |= category(runes[r])
}
s := string(p.Runes(2))
if s != "" && !isMapped {
log.Fatalf("%U: Mapping with non-mapping category %d", r, cat)
}
t.Insert(r, uint64(makeEntry(r, s))+uint64(cat))
})
w := gen.NewCodeWriter()
defer w.WriteGoFile("tables.go", "idna")
gen.WriteUnicodeVersion(w)
w.WriteVar("mappings", string(mappings))
w.WriteVar("xorData", string(xorData))
sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
if err != nil {
log.Fatal(err)
}
w.Size += sz
}
var (
// mappings contains replacement strings for mapped runes, each prefixed
// with a byte containing the length of the following string.
mappings = []byte{}
mapCache = map[string]int{}
// xorData is like mappings, except that it contains XOR data.
// We split these two tables so that we don't get an overflow.
xorData = []byte{}
xorCache = map[string]int{}
)
// makeEntry creates a trie entry.
func makeEntry(r rune, mapped string) info {
orig := string(r)
if len(orig) != len(mapped) {
// Store the mapped value as is in the mappings table.
index := len(mappings)
if x, ok := mapCache[mapped]; ok {
index = x
} else {
mapCache[mapped] = index
mappings = append(mappings, byte(len(mapped)))
mappings = append(mappings, mapped...)
}
return info(index) << indexShift
}
// Create per-byte XOR mask.
var b []byte
for i := 0; i < len(orig); i++ {
b = append(b, orig[i]^mapped[i])
}
// Remove leading 0 bytes, but keep at least one byte.
for ; len(b) > 1 && b[0] == 0; b = b[1:] {
}
if len(b) == 1 {
return xorBit | inlineXOR | info(b[0])<<indexShift
}
mapped = string(b)
// Store the mapped value as is in the mappings table.
index := len(xorData)
if x, ok := xorCache[mapped]; ok {
index = x
} else {
xorCache[mapped] = index
xorData = append(xorData, byte(len(mapped)))
xorData = append(xorData, mapped...)
}
return xorBit | info(index)<<indexShift
}
// The following code implements a triegen.Compacter that was originally
// designed for normalization. The IDNA table has some similarities with the
// norm table. Using this compacter, together with the XOR pattern approach,
// reduces the table size by roughly 100K. It can probably be compressed further
// by also including elements of the compacter used by cases, but for now it is
// good enough.
const maxSparseEntries = 16
type normCompacter struct {
sparseBlocks [][]uint64
sparseOffset []uint16
sparseCount int
}
func mostFrequentStride(a []uint64) int {
counts := make(map[int]int)
var v int
for _, x := range a {
if stride := int(x) - v; v != 0 && stride >= 0 {
counts[stride]++
}
v = int(x)
}
var maxs, maxc int
for stride, cnt := range counts {
if cnt > maxc || (cnt == maxc && stride < maxs) {
maxs, maxc = stride, cnt
}
}
return maxs
}
func countSparseEntries(a []uint64) int {
stride := mostFrequentStride(a)
var v, count int
for _, tv := range a {
if int(tv)-v != stride {
if tv != 0 {
count++
}
}
v = int(tv)
}
return count
}
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
if n := countSparseEntries(v); n <= maxSparseEntries {
return (n+1)*4 + 2, true
}
return 0, false
}
func (c *normCompacter) Store(v []uint64) uint32 {
h := uint32(len(c.sparseOffset))
c.sparseBlocks = append(c.sparseBlocks, v)
c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
c.sparseCount += countSparseEntries(v) + 1
return h
}
func (c *normCompacter) Handler() string {
return "idnaSparse.lookup"
}
func (c *normCompacter) Print(w io.Writer) (retErr error) {
p := func(f string, x ...interface{}) {
if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
retErr = err
}
}
ls := len(c.sparseBlocks)
p("// idnaSparseOffset: %d entries, %d bytes\n", ls, ls*2)
p("var idnaSparseOffset = %#v\n\n", c.sparseOffset)
ns := c.sparseCount
p("// idnaSparseValues: %d entries, %d bytes\n", ns, ns*4)
p("var idnaSparseValues = [%d]valueRange {", ns)
for i, b := range c.sparseBlocks {
p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
var v int
stride := mostFrequentStride(b)
n := countSparseEntries(b)
p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
for i, nv := range b {
if int(nv)-v != stride {
if v != 0 {
p(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
}
}
v = int(nv)
}
if v != 0 {
p(",hi:%#02x},", 0x80+len(b)-1)
}
}
p("\n}\n\n")
return
}

View file

@ -1,59 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains code that is common between the generation code and the
// package's test code.
import (
"log"
"golang.org/x/text/internal/ucd"
)
func catFromEntry(p *ucd.Parser) (cat category) {
r := p.Rune(0)
switch s := p.String(1); s {
case "valid":
cat = valid
case "disallowed":
cat = disallowed
case "disallowed_STD3_valid":
cat = disallowedSTD3Valid
case "disallowed_STD3_mapped":
cat = disallowedSTD3Mapped
case "mapped":
cat = mapped
case "deviation":
cat = deviation
case "ignored":
cat = ignored
default:
log.Fatalf("%U: Unknown category %q", r, s)
}
if s := p.String(3); s != "" {
if cat != valid {
log.Fatalf(`%U: %s defined for %q; want "valid"`, r, s, p.String(1))
}
switch s {
case "NV8":
cat = validNV8
case "XV8":
cat = validXV8
default:
log.Fatalf("%U: Unexpected exception %q", r, s)
}
}
return cat
}
var joinType = map[string]info{
"L": joiningL,
"D": joiningD,
"T": joiningT,
"R": joiningR,
}

View file

@ -1,91 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
import (
"testing"
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
func TestTables(t *testing.T) {
testtext.SkipIfNotLong(t)
lookup := func(r rune) info {
v, _ := trie.lookupString(string(r))
return info(v)
}
ucd.Parse(gen.OpenUnicodeFile("idna", "", "IdnaMappingTable.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
if got, want := x.category(), catFromEntry(p); got != want {
t.Errorf("%U:category: got %x; want %x", r, got, want)
}
mapped := false
switch p.String(1) {
case "mapped", "disallowed_STD3_mapped", "deviation":
mapped = true
}
if x.isMapped() != mapped {
t.Errorf("%U:isMapped: got %v; want %v", r, x.isMapped(), mapped)
}
if !mapped {
return
}
want := string(p.Runes(2))
got := string(x.appendMapping(nil, string(r)))
if got != want {
t.Errorf("%U:mapping: got %+q; want %+q", r, got, want)
}
if x.isMapped() {
return
}
wantMark := unicode.In(r, unicode.Mark)
gotMark := x.isModifier()
if gotMark != wantMark {
t.Errorf("IsMark(%U) = %v; want %v", r, gotMark, wantMark)
}
})
ucd.Parse(gen.OpenUCDFile("UnicodeData.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
got := x.isViramaModifier()
const cccVirama = 9
want := p.Int(ucd.CanonicalCombiningClass) == cccVirama
if got != want {
t.Errorf("IsVirama(%U) = %v; want %v", r, got, want)
}
rtl := false
switch p.String(ucd.BidiClass) {
case "R", "AL", "AN":
rtl = true
}
if got := x.isBidi("A"); got != rtl && !x.isMapped() {
t.Errorf("IsBidi(%U) = %v; want %v", r, got, rtl)
}
})
ucd.Parse(gen.OpenUCDFile("extracted/DerivedJoiningType.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
x := lookup(r)
if x.isMapped() {
return
}
got := x.joinType()
want := joinType[p.String(1)]
if got != want {
t.Errorf("JoinType(%U) = %x; want %x", r, got, want)
}
})
}

View file

@ -1,123 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This file contains definitions for interpreting the trie value of the idna
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds information from the IDNA mapping table for a single rune. It is
// the value returned by a trie lookup. In most cases, all information fits in
// a 16-bit value. For mappings, this value may contain an index into a slice
// with the mapped string. Such mappings can consist of the actual mapped value
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
// input rune. This technique is used by the cases packages and reduces the
// table size significantly.
//
// The per-rune values have the following format:
//
// if mapped {
// if inlinedXOR {
// 15..13 inline XOR marker
// 12..11 unused
// 10..3 inline XOR mask
// } else {
// 15..3 index into xor or mapping table
// }
// } else {
// 15..14 unused
// 13 mayNeedNorm
// 12..11 attributes
// 10..8 joining type
// 7..3 category type
// }
// 2 use xor pattern
// 1..0 mapped category
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
catSmallMask = 0x3
catBigMask = 0xF8
indexShift = 3
xorBit = 0x4 // interpret the index as an xor pattern
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
joinShift = 8
joinMask = 0x07
// Attributes
attributesMask = 0x1800
viramaModifier = 0x1800
modifier = 0x1000
rtl = 0x0800
mayNeedNorm = 0x2000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not currently defined in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
)
const (
valid category = 0x08
validNV8 category = 0x18
validXV8 category = 0x28
disallowed category = 0x40
disallowedSTD3Valid category = 0x80
ignored category = 0xC0
)
// join types and additional rune information
const (
joiningL = (iota + 1)
joiningD
joiningT
joiningR
//the following types are derived during processing
joinZWJ
joinZWNJ
joinVirama
numJoinTypes
)
func (c info) isMapped() bool {
return c&0x3 != 0
}
func (c info) category() category {
small := c & catSmallMask
if small != 0 {
return category(small)
}
return category(c & catBigMask)
}
func (c info) joinType() info {
if c.isMapped() {
return 0
}
return (c >> joinShift) & joinMask
}
func (c info) isModifier() bool {
return c&(modifier|catSmallMask) == modifier
}
func (c info) isViramaModifier() bool {
return c&(attributesMask|catSmallMask) == viramaModifier
}

View file

@ -1,717 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_trieval.go gen_common.go
// Package idna implements IDNA2008 using the compatibility processing
// defined by UTS (Unicode Technical Standard) #46, which defines a standard to
// deal with the transition from IDNA2003.
//
// IDNA2008 (Internationalized Domain Names for Applications), is defined in RFC
// 5890, RFC 5891, RFC 5892, RFC 5893 and RFC 5894.
// UTS #46 is defined in http://www.unicode.org/reports/tr46.
// See http://unicode.org/cldr/utility/idna.jsp for a visualization of the
// differences between these two standards.
package idna // import "golang.org/x/text/internal/export/idna"
import (
"fmt"
"strings"
"unicode/utf8"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/unicode/bidi"
"golang.org/x/text/unicode/norm"
)
// NOTE: Unlike common practice in Go APIs, the functions will return a
// sanitized domain name in case of errors. Browsers sometimes use a partially
// evaluated string as lookup.
// TODO: the current error handling is, in my opinion, the least opinionated.
// Other strategies are also viable, though:
// Option 1) Return an empty string in case of error, but allow the user to
// specify explicitly which errors to ignore.
// Option 2) Return the partially evaluated string if it is itself a valid
// string, otherwise return the empty string in case of error.
// Option 3) Option 1 and 2.
// Option 4) Always return an empty string for now and implement Option 1 as
// needed, and document that the return string may not be empty in case of
// error in the future.
// I think Option 1 is best, but it is quite opinionated.
// ToASCII is a wrapper for Punycode.ToASCII.
func ToASCII(s string) (string, error) {
return Punycode.process(s, true)
}
// ToUnicode is a wrapper for Punycode.ToUnicode.
func ToUnicode(s string) (string, error) {
return Punycode.process(s, false)
}
// An Option configures a Profile at creation time.
type Option func(*options)
// Transitional sets a Profile to use the Transitional mapping as defined in UTS
// #46. This will cause, for example, "ß" to be mapped to "ss". Using the
// transitional mapping provides a compromise between IDNA2003 and IDNA2008
// compatibility. It is used by most browsers when resolving domain names. This
// option is only meaningful if combined with MapForLookup.
func Transitional(transitional bool) Option {
return func(o *options) { o.transitional = true }
}
// VerifyDNSLength sets whether a Profile should fail if any of the IDN parts
// are longer than allowed by the RFC.
func VerifyDNSLength(verify bool) Option {
return func(o *options) { o.verifyDNSLength = verify }
}
// RemoveLeadingDots removes leading label separators. Leading runes that map to
// dots, such as U+3002 IDEOGRAPHIC FULL STOP, are removed as well.
//
// This is the behavior suggested by the UTS #46 and is adopted by some
// browsers.
func RemoveLeadingDots(remove bool) Option {
return func(o *options) { o.removeLeadingDots = remove }
}
// ValidateLabels sets whether to check the mandatory label validation criteria
// as defined in Section 5.4 of RFC 5891. This includes testing for correct use
// of hyphens ('-'), normalization, validity of runes, and the context rules.
func ValidateLabels(enable bool) Option {
return func(o *options) {
// Don't override existing mappings, but set one that at least checks
// normalization if it is not set.
if o.mapping == nil && enable {
o.mapping = normalize
}
o.trie = trie
o.validateLabels = enable
o.fromPuny = validateFromPunycode
}
}
// StrictDomainName limits the set of permissible ASCII characters to those
// allowed in domain names as defined in RFC 1034 (A-Z, a-z, 0-9 and the
// hyphen). This is set by default for MapForLookup and ValidateForRegistration.
//
// This option is useful, for instance, for browsers that allow characters
// outside this range, for example a '_' (U+005F LOW LINE). See
// http://www.rfc-editor.org/std/std3.txt for more details This option
// corresponds to the UseSTD3ASCIIRules option in UTS #46.
func StrictDomainName(use bool) Option {
return func(o *options) {
o.trie = trie
o.useSTD3Rules = use
o.fromPuny = validateFromPunycode
}
}
// NOTE: the following options pull in tables. The tables should not be linked
// in as long as the options are not used.
// BidiRule enables the Bidi rule as defined in RFC 5893. Any application
// that relies on proper validation of labels should include this rule.
func BidiRule() Option {
return func(o *options) { o.bidirule = bidirule.ValidString }
}
// ValidateForRegistration sets validation options to verify that a given IDN is
// properly formatted for registration as defined by Section 4 of RFC 5891.
func ValidateForRegistration() Option {
return func(o *options) {
o.mapping = validateRegistration
StrictDomainName(true)(o)
ValidateLabels(true)(o)
VerifyDNSLength(true)(o)
BidiRule()(o)
}
}
// MapForLookup sets validation and mapping options such that a given IDN is
// transformed for domain name lookup according to the requirements set out in
// Section 5 of RFC 5891. The mappings follow the recommendations of RFC 5894,
// RFC 5895 and UTS 46. It does not add the Bidi Rule. Use the BidiRule option
// to add this check.
//
// The mappings include normalization and mapping case, width and other
// compatibility mappings.
func MapForLookup() Option {
return func(o *options) {
o.mapping = validateAndMap
StrictDomainName(true)(o)
ValidateLabels(true)(o)
}
}
type options struct {
transitional bool
useSTD3Rules bool
validateLabels bool
verifyDNSLength bool
removeLeadingDots bool
trie *idnaTrie
// fromPuny calls validation rules when converting A-labels to U-labels.
fromPuny func(p *Profile, s string) error
// mapping implements a validation and mapping step as defined in RFC 5895
// or UTS 46, tailored to, for example, domain registration or lookup.
mapping func(p *Profile, s string) (mapped string, isBidi bool, err error)
// bidirule, if specified, checks whether s conforms to the Bidi Rule
// defined in RFC 5893.
bidirule func(s string) bool
}
// A Profile defines the configuration of an IDNA mapper.
type Profile struct {
options
}
func apply(o *options, opts []Option) {
for _, f := range opts {
f(o)
}
}
// New creates a new Profile.
//
// With no options, the returned Profile is the most permissive and equals the
// Punycode Profile. Options can be passed to further restrict the Profile. The
// MapForLookup and ValidateForRegistration options set a collection of options,
// for lookup and registration purposes respectively, which can be tailored by
// adding more fine-grained options, where later options override earlier
// options.
func New(o ...Option) *Profile {
p := &Profile{}
apply(&p.options, o)
return p
}
// ToASCII converts a domain or domain label to its ASCII form. For example,
// ToASCII("bücher.example.com") is "xn--bcher-kva.example.com", and
// ToASCII("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToASCII(s string) (string, error) {
return p.process(s, true)
}
// ToUnicode converts a domain or domain label to its Unicode form. For example,
// ToUnicode("xn--bcher-kva.example.com") is "bücher.example.com", and
// ToUnicode("golang") is "golang". If an error is encountered it will return
// an error and a (partially) processed result.
func (p *Profile) ToUnicode(s string) (string, error) {
pp := *p
pp.transitional = false
return pp.process(s, false)
}
// String reports a string with a description of the profile for debugging
// purposes. The string format may change with different versions.
func (p *Profile) String() string {
s := ""
if p.transitional {
s = "Transitional"
} else {
s = "NonTransitional"
}
if p.useSTD3Rules {
s += ":UseSTD3Rules"
}
if p.validateLabels {
s += ":ValidateLabels"
}
if p.verifyDNSLength {
s += ":VerifyDNSLength"
}
return s
}
var (
// Punycode is a Profile that does raw punycode processing with a minimum
// of validation.
Punycode *Profile = punycode
// Lookup is the recommended profile for looking up domain names, according
// to Section 5 of RFC 5891. The exact configuration of this profile may
// change over time.
Lookup *Profile = lookup
// Display is the recommended profile for displaying domain names.
// The configuration of this profile may change over time.
Display *Profile = display
// Registration is the recommended profile for checking whether a given
// IDN is valid for registration, according to Section 4 of RFC 5891.
Registration *Profile = registration
punycode = &Profile{}
lookup = &Profile{options{
transitional: true,
useSTD3Rules: true,
validateLabels: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
display = &Profile{options{
useSTD3Rules: true,
validateLabels: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateAndMap,
bidirule: bidirule.ValidString,
}}
registration = &Profile{options{
useSTD3Rules: true,
validateLabels: true,
verifyDNSLength: true,
trie: trie,
fromPuny: validateFromPunycode,
mapping: validateRegistration,
bidirule: bidirule.ValidString,
}}
// TODO: profiles
// Register: recommended for approving domain names: don't do any mappings
// but rather reject on invalid input. Bundle or block deviation characters.
)
type labelError struct{ label, code_ string }
func (e labelError) code() string { return e.code_ }
func (e labelError) Error() string {
return fmt.Sprintf("idna: invalid label %q", e.label)
}
type runeError rune
func (e runeError) code() string { return "P1" }
func (e runeError) Error() string {
return fmt.Sprintf("idna: disallowed rune %U", e)
}
// process implements the algorithm described in section 4 of UTS #46,
// see http://www.unicode.org/reports/tr46.
func (p *Profile) process(s string, toASCII bool) (string, error) {
var err error
var isBidi bool
if p.mapping != nil {
s, isBidi, err = p.mapping(p, s)
}
// Remove leading empty labels.
if p.removeLeadingDots {
for ; len(s) > 0 && s[0] == '.'; s = s[1:] {
}
}
// TODO: allow for a quick check the tables data.
// It seems like we should only create this error on ToASCII, but the
// UTS 46 conformance tests suggests we should always check this.
if err == nil && p.verifyDNSLength && s == "" {
err = &labelError{s, "A4"}
}
labels := labelIter{orig: s}
for ; !labels.done(); labels.next() {
label := labels.label()
if label == "" {
// Empty labels are not okay. The label iterator skips the last
// label if it is empty.
if err == nil && p.verifyDNSLength {
err = &labelError{s, "A4"}
}
continue
}
if strings.HasPrefix(label, acePrefix) {
u, err2 := decode(label[len(acePrefix):])
if err2 != nil {
if err == nil {
err = err2
}
// Spec says keep the old label.
continue
}
isBidi = isBidi || bidirule.DirectionString(u) != bidi.LeftToRight
labels.set(u)
if err == nil && p.validateLabels {
err = p.fromPuny(p, u)
}
if err == nil {
// This should be called on NonTransitional, according to the
// spec, but that currently does not have any effect. Use the
// original profile to preserve options.
err = p.validateLabel(u)
}
} else if err == nil {
err = p.validateLabel(label)
}
}
if isBidi && p.bidirule != nil && err == nil {
for labels.reset(); !labels.done(); labels.next() {
if !p.bidirule(labels.label()) {
err = &labelError{s, "B"}
break
}
}
}
if toASCII {
for labels.reset(); !labels.done(); labels.next() {
label := labels.label()
if !ascii(label) {
a, err2 := encode(acePrefix, label)
if err == nil {
err = err2
}
label = a
labels.set(a)
}
n := len(label)
if p.verifyDNSLength && err == nil && (n == 0 || n > 63) {
err = &labelError{label, "A4"}
}
}
}
s = labels.result()
if toASCII && p.verifyDNSLength && err == nil {
// Compute the length of the domain name minus the root label and its dot.
n := len(s)
if n > 0 && s[n-1] == '.' {
n--
}
if len(s) < 1 || n > 253 {
err = &labelError{s, "A4"}
}
}
return s, err
}
func normalize(p *Profile, s string) (mapped string, isBidi bool, err error) {
// TODO: consider first doing a quick check to see if any of these checks
// need to be done. This will make it slower in the general case, but
// faster in the common case.
mapped = norm.NFC.String(s)
isBidi = bidirule.DirectionString(mapped) == bidi.RightToLeft
return mapped, isBidi, nil
}
func validateRegistration(p *Profile, s string) (idem string, bidi bool, err error) {
// TODO: filter need for normalization in loop below.
if !norm.NFC.IsNormalString(s) {
return s, false, &labelError{s, "V1"}
}
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
bidi = bidi || info(v).isBidi(s[i:])
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
// TODO: handle the NV8 defined in the Unicode idna data set to allow
// for strict conformance to IDNA2008.
case valid, deviation:
case disallowed, mapped, unknown, ignored:
r, _ := utf8.DecodeRuneInString(s[i:])
return s, bidi, runeError(r)
}
i += sz
}
return s, bidi, nil
}
func (c info) isBidi(s string) bool {
if !c.isMapped() {
return c&attributesMask == rtl
}
// TODO: also store bidi info for mapped data. This is possible, but a bit
// cumbersome and not for the common case.
p, _ := bidi.LookupString(s)
switch p.Class() {
case bidi.R, bidi.AL, bidi.AN:
return true
}
return false
}
func validateAndMap(p *Profile, s string) (vm string, bidi bool, err error) {
var (
b []byte
k int
)
// combinedInfoBits contains the or-ed bits of all runes. We use this
// to derive the mayNeedNorm bit later. This may trigger normalization
// overeagerly, but it will not do so in the common case. The end result
// is another 10% saving on BenchmarkProfile for the common case.
var combinedInfoBits info
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
combinedInfoBits |= info(v)
bidi = bidi || info(v).isBidi(s[i:])
start := i
i += sz
// Copy bytes not copied so far.
switch p.simplify(info(v).category()) {
case valid:
continue
case disallowed:
if err == nil {
r, _ := utf8.DecodeRuneInString(s[start:])
err = runeError(r)
}
continue
case mapped, deviation:
b = append(b, s[k:start]...)
b = info(v).appendMapping(b, s[start:i])
case ignored:
b = append(b, s[k:start]...)
// drop the rune
case unknown:
b = append(b, s[k:start]...)
b = append(b, "\ufffd"...)
}
k = i
}
if k == 0 {
// No changes so far.
if combinedInfoBits&mayNeedNorm != 0 {
s = norm.NFC.String(s)
}
} else {
b = append(b, s[k:]...)
if norm.NFC.QuickSpan(b) != len(b) {
b = norm.NFC.Bytes(b)
}
// TODO: the punycode converters require strings as input.
s = string(b)
}
return s, bidi, err
}
// A labelIter allows iterating over domain name labels.
type labelIter struct {
orig string
slice []string
curStart int
curEnd int
i int
}
func (l *labelIter) reset() {
l.curStart = 0
l.curEnd = 0
l.i = 0
}
func (l *labelIter) done() bool {
return l.curStart >= len(l.orig)
}
func (l *labelIter) result() string {
if l.slice != nil {
return strings.Join(l.slice, ".")
}
return l.orig
}
func (l *labelIter) label() string {
if l.slice != nil {
return l.slice[l.i]
}
p := strings.IndexByte(l.orig[l.curStart:], '.')
l.curEnd = l.curStart + p
if p == -1 {
l.curEnd = len(l.orig)
}
return l.orig[l.curStart:l.curEnd]
}
// next sets the value to the next label. It skips the last label if it is empty.
func (l *labelIter) next() {
l.i++
if l.slice != nil {
if l.i >= len(l.slice) || l.i == len(l.slice)-1 && l.slice[l.i] == "" {
l.curStart = len(l.orig)
}
} else {
l.curStart = l.curEnd + 1
if l.curStart == len(l.orig)-1 && l.orig[l.curStart] == '.' {
l.curStart = len(l.orig)
}
}
}
func (l *labelIter) set(s string) {
if l.slice == nil {
l.slice = strings.Split(l.orig, ".")
}
l.slice[l.i] = s
}
// acePrefix is the ASCII Compatible Encoding prefix.
const acePrefix = "xn--"
func (p *Profile) simplify(cat category) category {
switch cat {
case disallowedSTD3Mapped:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = mapped
}
case disallowedSTD3Valid:
if p.useSTD3Rules {
cat = disallowed
} else {
cat = valid
}
case deviation:
if !p.transitional {
cat = valid
}
case validNV8, validXV8:
// TODO: handle V2008
cat = valid
}
return cat
}
func validateFromPunycode(p *Profile, s string) error {
if !norm.NFC.IsNormalString(s) {
return &labelError{s, "V1"}
}
// TODO: detect whether string may have to be normalized in the following
// loop.
for i := 0; i < len(s); {
v, sz := trie.lookupString(s[i:])
if c := p.simplify(info(v).category()); c != valid && c != deviation {
return &labelError{s, "V6"}
}
i += sz
}
return nil
}
const (
zwnj = "\u200c"
zwj = "\u200d"
)
type joinState int8
const (
stateStart joinState = iota
stateVirama
stateBefore
stateBeforeVirama
stateAfter
stateFAIL
)
var joinStates = [][numJoinTypes]joinState{
stateStart: {
joiningL: stateBefore,
joiningD: stateBefore,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateVirama,
},
stateVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
},
stateBefore: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
joinZWNJ: stateAfter,
joinZWJ: stateFAIL,
joinVirama: stateBeforeVirama,
},
stateBeforeVirama: {
joiningL: stateBefore,
joiningD: stateBefore,
joiningT: stateBefore,
},
stateAfter: {
joiningL: stateFAIL,
joiningD: stateBefore,
joiningT: stateAfter,
joiningR: stateStart,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateAfter, // no-op as we can't accept joiners here
},
stateFAIL: {
0: stateFAIL,
joiningL: stateFAIL,
joiningD: stateFAIL,
joiningT: stateFAIL,
joiningR: stateFAIL,
joinZWNJ: stateFAIL,
joinZWJ: stateFAIL,
joinVirama: stateFAIL,
},
}
// validateLabel validates the criteria from Section 4.1. Item 1, 4, and 6 are
// already implicitly satisfied by the overall implementation.
func (p *Profile) validateLabel(s string) (err error) {
if s == "" {
if p.verifyDNSLength {
return &labelError{s, "A4"}
}
return nil
}
if !p.validateLabels {
return nil
}
trie := p.trie // p.validateLabels is only set if trie is set.
if len(s) > 4 && s[2] == '-' && s[3] == '-' {
return &labelError{s, "V2"}
}
if s[0] == '-' || s[len(s)-1] == '-' {
return &labelError{s, "V3"}
}
// TODO: merge the use of this in the trie.
v, sz := trie.lookupString(s)
x := info(v)
if x.isModifier() {
return &labelError{s, "V5"}
}
// Quickly return in the absence of zero-width (non) joiners.
if strings.Index(s, zwj) == -1 && strings.Index(s, zwnj) == -1 {
return nil
}
st := stateStart
for i := 0; ; {
jt := x.joinType()
if s[i:i+sz] == zwj {
jt = joinZWJ
} else if s[i:i+sz] == zwnj {
jt = joinZWNJ
}
st = joinStates[st][jt]
if x.isViramaModifier() {
st = joinStates[st][joinVirama]
}
if i += sz; i == len(s) {
break
}
v, sz = trie.lookupString(s[i:])
x = info(v)
}
if st == stateFAIL || st == stateAfter {
return &labelError{s, "C"}
}
return nil
}
func ascii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] >= utf8.RuneSelf {
return false
}
}
return true
}

View file

@ -1,308 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
import (
"fmt"
"strconv"
"strings"
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
func TestAllocToUnicode(t *testing.T) {
avg := testtext.AllocsPerRun(1000, func() {
ToUnicode("www.golang.org")
})
if avg > 0 {
t.Errorf("got %f; want 0", avg)
}
}
func TestAllocToASCII(t *testing.T) {
avg := testtext.AllocsPerRun(1000, func() {
ToASCII("www.golang.org")
})
if avg > 0 {
t.Errorf("got %f; want 0", avg)
}
}
func TestProfiles(t *testing.T) {
testCases := []struct {
name string
want, got *Profile
}{
{"Punycode", punycode, New()},
{"Registration", registration, New(ValidateForRegistration())},
{"Registration", registration, New(
ValidateForRegistration(),
VerifyDNSLength(true),
BidiRule(),
)},
{"Lookup", lookup, New(MapForLookup(), BidiRule(), Transitional(true))},
{"Display", display, New(MapForLookup(), BidiRule())},
}
for _, tc := range testCases {
// Functions are not comparable, but the printed version will include
// their pointers.
got := fmt.Sprintf("%#v", tc.got)
want := fmt.Sprintf("%#v", tc.want)
if got != want {
t.Errorf("%s: \ngot %#v,\nwant %#v", tc.name, got, want)
}
}
}
// doTest performs a single test f(input) and verifies that the output matches
// out and that the returned error is expected. The errors string contains
// all allowed error codes as categorized in
// http://www.unicode.org/Public/idna/9.0.0/IdnaTest.txt:
// P: Processing
// V: Validity
// A: to ASCII
// B: Bidi
// C: Context J
func doTest(t *testing.T, f func(string) (string, error), name, input, want, errors string) {
errors = strings.Trim(errors, "[]")
test := "ok"
if errors != "" {
test = "err:" + errors
}
// Replace some of the escape sequences to make it easier to single out
// tests on the command name.
in := strings.Trim(strconv.QuoteToASCII(input), `"`)
in = strings.Replace(in, `\u`, "#", -1)
in = strings.Replace(in, `\U`, "#", -1)
name = fmt.Sprintf("%s/%s/%s", name, in, test)
testtext.Run(t, name, func(t *testing.T) {
got, err := f(input)
if err != nil {
code := err.(interface {
code() string
}).code()
if strings.Index(errors, code) == -1 {
t.Errorf("error %q not in set of expected errors {%v}", code, errors)
}
} else if errors != "" {
t.Errorf("no errors; want error in {%v}", errors)
}
if want != "" && got != want {
t.Errorf(`string: got %+q; want %+q`, got, want)
}
})
}
// TestLabelErrors tests strings returned in case of error. All results should
// be identical to the reference implementation and can be verified at
// http://unicode.org/cldr/utility/idna.jsp. The reference implementation,
// however, seems to not display Bidi and ContextJ errors.
//
// In some cases the behavior of browsers is added as a comment. In all cases,
// whenever a resolve search returns an error here, Chrome will treat the input
// string as a search string (including those for Bidi and Context J errors),
// unless noted otherwise.
func TestLabelErrors(t *testing.T) {
encode := func(s string) string { s, _ = encode(acePrefix, s); return s }
type kind struct {
name string
f func(string) (string, error)
}
punyA := kind{"PunycodeA", punycode.ToASCII}
resolve := kind{"ResolveA", Lookup.ToASCII}
display := kind{"ToUnicode", Display.ToUnicode}
p := New(VerifyDNSLength(true), MapForLookup(), BidiRule())
lengthU := kind{"CheckLengthU", p.ToUnicode}
lengthA := kind{"CheckLengthA", p.ToASCII}
p = New(MapForLookup(), StrictDomainName(false))
std3 := kind{"STD3", p.ToASCII}
testCases := []struct {
kind
input string
want string
wantErr string
}{
{lengthU, "", "", "A4"}, // From UTS 46 conformance test.
{lengthA, "", "", "A4"},
{lengthU, "xn--", "", "A4"},
{lengthU, "foo.xn--", "foo.", "A4"}, // TODO: is dropping xn-- correct?
{lengthU, "xn--.foo", ".foo", "A4"},
{lengthU, "foo.xn--.bar", "foo..bar", "A4"},
{display, "xn--", "", ""},
{display, "foo.xn--", "foo.", ""}, // TODO: is dropping xn-- correct?
{display, "xn--.foo", ".foo", ""},
{display, "foo.xn--.bar", "foo..bar", ""},
{lengthA, "a..b", "a..b", "A4"},
{punyA, ".b", ".b", ""},
// For backwards compatibility, the Punycode profile does not map runes.
{punyA, "\u3002b", "xn--b-83t", ""},
{punyA, "..b", "..b", ""},
{lengthA, ".b", ".b", "A4"},
{lengthA, "\u3002b", ".b", "A4"},
{lengthA, "..b", "..b", "A4"},
{lengthA, "b..", "b..", ""},
// Sharpened Bidi rules for Unicode 10.0.0. Apply for ALL labels in ANY
// of the labels is RTL.
{lengthA, "\ufe05\u3002\u3002\U0002603e\u1ce0", "..xn--t6f5138v", "A4"},
{lengthA, "FAX\u2a77\U0001d186\u3002\U0001e942\U000e0181\u180c", "", "B6"},
{resolve, "a..b", "a..b", ""},
// Note that leading dots are not stripped. This is to be consistent
// with the Punycode profile as well as the conformance test.
{resolve, ".b", ".b", ""},
{resolve, "\u3002b", ".b", ""},
{resolve, "..b", "..b", ""},
{resolve, "b..", "b..", ""},
// Raw punycode
{punyA, "", "", ""},
{punyA, "*.foo.com", "*.foo.com", ""},
{punyA, "Foo.com", "Foo.com", ""},
// STD3 rules
{display, "*.foo.com", "*.foo.com", "P1"},
{std3, "*.foo.com", "*.foo.com", ""},
// Don't map U+2490 (DIGIT NINE FULL STOP). This is the behavior of
// Chrome, Safari, and IE. Firefox will first map ⒐ to 9. and return
// lab9.be.
{resolve, "lab⒐be", "xn--labbe-zh9b", "P1"}, // encode("lab⒐be")
{display, "lab⒐be", "lab⒐be", "P1"},
{resolve, "plan⒐faß.de", "xn--planfass-c31e.de", "P1"}, // encode("plan⒐fass") + ".de"
{display, "Plan⒐faß.de", "plan⒐faß.de", "P1"},
// Chrome 54.0 recognizes the error and treats this input verbatim as a
// search string.
// Safari 10.0 (non-conform spec) decomposes "⒈" and computes the
// punycode on the result using transitional mapping.
// Firefox 49.0.1 goes haywire on this string and prints a bunch of what
// seems to be nested punycode encodings.
{resolve, "日本⒈co.ßßß.de", "xn--co-wuw5954azlb.ssssss.de", "P1"},
{display, "日本⒈co.ßßß.de", "日本⒈co.ßßß.de", "P1"},
{resolve, "a\u200Cb", "ab", ""},
{display, "a\u200Cb", "a\u200Cb", "C"},
{resolve, encode("a\u200Cb"), encode("a\u200Cb"), "C"},
{display, "a\u200Cb", "a\u200Cb", "C"},
{resolve, "grﻋﺮﺑﻲ.de", "xn--gr-gtd9a1b0g.de", "B"},
{
// Notice how the string gets transformed, even with an error.
// Chrome will use the original string if it finds an error, so not
// the transformed one.
display,
"gr\ufecb\ufeae\ufe91\ufef2.de",
"gr\u0639\u0631\u0628\u064a.de",
"B",
},
{resolve, "\u0671.\u03c3\u07dc", "xn--qib.xn--4xa21s", "B"}, // ٱ.σߜ
{display, "\u0671.\u03c3\u07dc", "\u0671.\u03c3\u07dc", "B"},
// normalize input
{resolve, "a\u0323\u0322", "xn--jta191l", ""}, // ạ̢
{display, "a\u0323\u0322", "\u1ea1\u0322", ""},
// Non-normalized strings are not normalized when they originate from
// punycode. Despite the error, Chrome, Safari and Firefox will attempt
// to look up the input punycode.
{resolve, encode("a\u0323\u0322") + ".com", "xn--a-tdbc.com", "V1"},
{display, encode("a\u0323\u0322") + ".com", "a\u0323\u0322.com", "V1"},
}
for _, tc := range testCases {
doTest(t, tc.f, tc.name, tc.input, tc.want, tc.wantErr)
}
}
func TestConformance(t *testing.T) {
testtext.SkipIfNotLong(t)
r := gen.OpenUnicodeFile("idna", "", "IdnaTest.txt")
defer r.Close()
section := "main"
started := false
p := ucd.New(r, ucd.CommentHandler(func(s string) {
if started {
section = strings.ToLower(strings.Split(s, " ")[0])
}
}))
transitional := New(Transitional(true), VerifyDNSLength(true), BidiRule(), MapForLookup())
nonTransitional := New(VerifyDNSLength(true), BidiRule(), MapForLookup())
for p.Next() {
started = true
// What to test
profiles := []*Profile{}
switch p.String(0) {
case "T":
profiles = append(profiles, transitional)
case "N":
profiles = append(profiles, nonTransitional)
case "B":
profiles = append(profiles, transitional)
profiles = append(profiles, nonTransitional)
}
src := unescape(p.String(1))
wantToUnicode := unescape(p.String(2))
if wantToUnicode == "" {
wantToUnicode = src
}
wantToASCII := unescape(p.String(3))
if wantToASCII == "" {
wantToASCII = wantToUnicode
}
wantErrToUnicode := ""
if strings.HasPrefix(wantToUnicode, "[") {
wantErrToUnicode = wantToUnicode
wantToUnicode = ""
}
wantErrToASCII := ""
if strings.HasPrefix(wantToASCII, "[") {
wantErrToASCII = wantToASCII
wantToASCII = ""
}
// TODO: also do IDNA tests.
// invalidInIDNA2008 := p.String(4) == "NV8"
for _, p := range profiles {
name := fmt.Sprintf("%s:%s", section, p)
doTest(t, p.ToUnicode, name+":ToUnicode", src, wantToUnicode, wantErrToUnicode)
doTest(t, p.ToASCII, name+":ToASCII", src, wantToASCII, wantErrToASCII)
}
}
}
func unescape(s string) string {
s, err := strconv.Unquote(`"` + s + `"`)
if err != nil {
panic(err)
}
return s
}
func BenchmarkProfile(b *testing.B) {
for i := 0; i < b.N; i++ {
Lookup.ToASCII("www.yahoogle.com")
}
}

View file

@ -1,201 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// This file implements the Punycode algorithm from RFC 3492.
import (
"math"
"strings"
"unicode/utf8"
)
// These parameter values are specified in section 5.
//
// All computation is done with int32s, so that overflow behavior is identical
// regardless of whether int is 32-bit or 64-bit.
const (
base int32 = 36
damp int32 = 700
initialBias int32 = 72
initialN int32 = 128
skew int32 = 38
tmax int32 = 26
tmin int32 = 1
)
func punyError(s string) error { return &labelError{s, "A3"} }
// decode decodes a string as specified in section 6.2.
func decode(encoded string) (string, error) {
if encoded == "" {
return "", nil
}
pos := 1 + strings.LastIndex(encoded, "-")
if pos == 1 {
return "", punyError(encoded)
}
if pos == len(encoded) {
return encoded[:len(encoded)-1], nil
}
output := make([]rune, 0, len(encoded))
if pos != 0 {
for _, r := range encoded[:pos-1] {
output = append(output, r)
}
}
i, n, bias := int32(0), initialN, initialBias
for pos < len(encoded) {
oldI, w := i, int32(1)
for k := base; ; k += base {
if pos == len(encoded) {
return "", punyError(encoded)
}
digit, ok := decodeDigit(encoded[pos])
if !ok {
return "", punyError(encoded)
}
pos++
i += digit * w
if i < 0 {
return "", punyError(encoded)
}
t := k - bias
if t < tmin {
t = tmin
} else if t > tmax {
t = tmax
}
if digit < t {
break
}
w *= base - t
if w >= math.MaxInt32/base {
return "", punyError(encoded)
}
}
x := int32(len(output) + 1)
bias = adapt(i-oldI, x, oldI == 0)
n += i / x
i %= x
if n > utf8.MaxRune || len(output) >= 1024 {
return "", punyError(encoded)
}
output = append(output, 0)
copy(output[i+1:], output[i:])
output[i] = n
i++
}
return string(output), nil
}
// encode encodes a string as specified in section 6.3 and prepends prefix to
// the result.
//
// The "while h < length(input)" line in the specification becomes "for
// remaining != 0" in the Go code, because len(s) in Go is in bytes, not runes.
func encode(prefix, s string) (string, error) {
output := make([]byte, len(prefix), len(prefix)+1+2*len(s))
copy(output, prefix)
delta, n, bias := int32(0), initialN, initialBias
b, remaining := int32(0), int32(0)
for _, r := range s {
if r < 0x80 {
b++
output = append(output, byte(r))
} else {
remaining++
}
}
h := b
if b > 0 {
output = append(output, '-')
}
for remaining != 0 {
m := int32(0x7fffffff)
for _, r := range s {
if m > r && r >= n {
m = r
}
}
delta += (m - n) * (h + 1)
if delta < 0 {
return "", punyError(s)
}
n = m
for _, r := range s {
if r < n {
delta++
if delta < 0 {
return "", punyError(s)
}
continue
}
if r > n {
continue
}
q := delta
for k := base; ; k += base {
t := k - bias
if t < tmin {
t = tmin
} else if t > tmax {
t = tmax
}
if q < t {
break
}
output = append(output, encodeDigit(t+(q-t)%(base-t)))
q = (q - t) / (base - t)
}
output = append(output, encodeDigit(q))
bias = adapt(delta, h+1, h == b)
delta = 0
h++
remaining--
}
delta++
n++
}
return string(output), nil
}
func decodeDigit(x byte) (digit int32, ok bool) {
switch {
case '0' <= x && x <= '9':
return int32(x - ('0' - 26)), true
case 'A' <= x && x <= 'Z':
return int32(x - 'A'), true
case 'a' <= x && x <= 'z':
return int32(x - 'a'), true
}
return 0, false
}
func encodeDigit(digit int32) byte {
switch {
case 0 <= digit && digit < 26:
return byte(digit + 'a')
case 26 <= digit && digit < 36:
return byte(digit + ('0' - 26))
}
panic("idna: internal error in punycode encoding")
}
// adapt is the bias adaptation function specified in section 6.1.
func adapt(delta, numPoints int32, firstTime bool) int32 {
if firstTime {
delta /= damp
} else {
delta /= 2
}
delta += delta / numPoints
k := int32(0)
for delta > ((base-tmin)*tmax)/2 {
delta /= base - tmin
k += base
}
return k + (base-tmin+1)*delta/(delta+skew)
}

View file

@ -1,198 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
import (
"strings"
"testing"
)
var punycodeTestCases = [...]struct {
s, encoded string
}{
{"", ""},
{"-", "--"},
{"-a", "-a-"},
{"-a-", "-a--"},
{"a", "a-"},
{"a-", "a--"},
{"a-b", "a-b-"},
{"books", "books-"},
{"bücher", "bcher-kva"},
{"Hello世界", "Hello-ck1hg65u"},
{"ü", "tda"},
{"üý", "tdac"},
// The test cases below come from RFC 3492 section 7.1 with Errata 3026.
{
// (A) Arabic (Egyptian).
"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" +
"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
"egbpdaj6bu4bxfgehfvwxn",
},
{
// (B) Chinese (simplified).
"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
"ihqwcrb4cv8a8dqg056pqjye",
},
{
// (C) Chinese (traditional).
"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
"ihqwctvzc91f659drss3x8bo0yb",
},
{
// (D) Czech.
"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074" +
"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D" +
"\u0065\u0073\u006B\u0079",
"Proprostnemluvesky-uyb24dma41a",
},
{
// (E) Hebrew.
"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8" +
"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2" +
"\u05D1\u05E8\u05D9\u05EA",
"4dbcagdahymbxekheh6e0a7fei0b",
},
{
// (F) Hindi (Devanagari).
"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D" +
"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939" +
"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947" +
"\u0939\u0948\u0902",
"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd",
},
{
// (G) Japanese (kanji and hiragana).
"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092" +
"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa",
},
{
// (H) Korean (Hangul syllables).
"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774" +
"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74" +
"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j" +
"psd879ccm6fea98c",
},
{
// (I) Russian (Cyrillic).
"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E" +
"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440" +
"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A" +
"\u0438",
"b1abfaaepdrnnbgefbadotcwatmq2g4l",
},
{
// (J) Spanish.
"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070" +
"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070" +
"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061" +
"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070" +
"\u0061\u00F1\u006F\u006C",
"PorqunopuedensimplementehablarenEspaol-fmd56a",
},
{
// (K) Vietnamese.
"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B" +
"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068" +
"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067" +
"\u0056\u0069\u1EC7\u0074",
"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g",
},
{
// (L) 3<nen>B<gumi><kinpachi><sensei>.
"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
"3B-ww4c5e180e575a65lsy2b",
},
{
// (M) <amuro><namie>-with-SUPER-MONKEYS.
"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074" +
"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D" +
"\u004F\u004E\u004B\u0045\u0059\u0053",
"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n",
},
{
// (N) Hello-Another-Way-<sorezore><no><basho>.
"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F" +
"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D" +
"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
"Hello-Another-Way--fc4qua05auwb3674vfr0b",
},
{
// (O) <hitotsu><yane><no><shita>2.
"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
"2-u9tlzr9756bt3uc0v",
},
{
// (P) Maji<de>Koi<suru>5<byou><mae>
"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059" +
"\u308B\u0035\u79D2\u524D",
"MajiKoi5-783gue6qz075azm5e",
},
{
// (Q) <pafii>de<runba>
"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
"de-jg4avhby1noc0d",
},
{
// (R) <sono><supiido><de>
"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
"d9juau41awczczp",
},
{
// (S) -> $1.00 <-
"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020" +
"\u003C\u002D",
"-> $1.00 <--",
},
}
func TestPunycode(t *testing.T) {
for _, tc := range punycodeTestCases {
if got, err := decode(tc.encoded); err != nil {
t.Errorf("decode(%q): %v", tc.encoded, err)
} else if got != tc.s {
t.Errorf("decode(%q): got %q, want %q", tc.encoded, got, tc.s)
}
if got, err := encode("", tc.s); err != nil {
t.Errorf(`encode("", %q): %v`, tc.s, err)
} else if got != tc.encoded {
t.Errorf(`encode("", %q): got %q, want %q`, tc.s, got, tc.encoded)
}
}
}
var punycodeErrorTestCases = [...]string{
"decode -", // A sole '-' is invalid.
"decode foo\x00bar", // '\x00' is not in [0-9A-Za-z].
"decode foo#bar", // '#' is not in [0-9A-Za-z].
"decode foo\u00A3bar", // '\u00A3' is not in [0-9A-Za-z].
"decode 9", // "9a" decodes to codepoint \u00A3; "9" is truncated.
"decode 99999a", // "99999a" decodes to codepoint \U0048A3C1, which is > \U0010FFFF.
"decode 9999999999a", // "9999999999a" overflows the int32 calculation.
"encode " + strings.Repeat("x", 65536) + "\uff00", // int32 overflow.
}
func TestPunycodeErrors(t *testing.T) {
for _, tc := range punycodeErrorTestCases {
var err error
switch {
case strings.HasPrefix(tc, "decode "):
_, err = decode(tc[7:])
case strings.HasPrefix(tc, "encode "):
_, err = encode("", tc[7:])
}
if err == nil {
if len(tc) > 256 {
tc = tc[:100] + "..." + tc[len(tc)-100:]
}
t.Errorf("no error for %s", tc)
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,70 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package idna
// appendMapping appends the mapping for the respective rune. isMapped must be
// true. A mapping is a categorization of a rune as defined in UTS #46.
func (c info) appendMapping(b []byte, s string) []byte {
index := int(c >> indexShift)
if c&xorBit == 0 {
s := mappings[index:]
return append(b, s[1:s[0]+1]...)
}
b = append(b, s...)
if c&inlineXOR == inlineXOR {
// TODO: support and handle two-byte inline masks
b[len(b)-1] ^= byte(index)
} else {
for p := len(b) - int(xorData[index]); p < len(b); p++ {
index++
b[p] ^= xorData[index]
}
}
return b
}
// Sparse block handling code.
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type sparseBlocks struct {
values []valueRange
offset []uint16
}
var idnaSparse = sparseBlocks{
values: idnaSparseValues[:],
offset: idnaSparseOffset[:],
}
// Don't use newIdnaTrie to avoid unconditional linking in of the table.
var trie = &idnaTrie{}
// lookup determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
offset := t.offset[n]
header := t.values[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.values[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}

View file

@ -1,119 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package idna
// This file contains definitions for interpreting the trie value of the idna
// trie generated by "go run gen*.go". It is shared by both the generator
// program and the resultant package. Sharing is achieved by the generator
// copying gen_trieval.go to trieval.go and changing what's above this comment.
// info holds information from the IDNA mapping table for a single rune. It is
// the value returned by a trie lookup. In most cases, all information fits in
// a 16-bit value. For mappings, this value may contain an index into a slice
// with the mapped string. Such mappings can consist of the actual mapped value
// or an XOR pattern to be applied to the bytes of the UTF8 encoding of the
// input rune. This technique is used by the cases packages and reduces the
// table size significantly.
//
// The per-rune values have the following format:
//
// if mapped {
// if inlinedXOR {
// 15..13 inline XOR marker
// 12..11 unused
// 10..3 inline XOR mask
// } else {
// 15..3 index into xor or mapping table
// }
// } else {
// 15..14 unused
// 13 mayNeedNorm
// 12..11 attributes
// 10..8 joining type
// 7..3 category type
// }
// 2 use xor pattern
// 1..0 mapped category
//
// See the definitions below for a more detailed description of the various
// bits.
type info uint16
const (
catSmallMask = 0x3
catBigMask = 0xF8
indexShift = 3
xorBit = 0x4 // interpret the index as an xor pattern
inlineXOR = 0xE000 // These bits are set if the XOR pattern is inlined.
joinShift = 8
joinMask = 0x07
// Attributes
attributesMask = 0x1800
viramaModifier = 0x1800
modifier = 0x1000
rtl = 0x0800
mayNeedNorm = 0x2000
)
// A category corresponds to a category defined in the IDNA mapping table.
type category uint16
const (
unknown category = 0 // not currently defined in unicode.
mapped category = 1
disallowedSTD3Mapped category = 2
deviation category = 3
)
const (
valid category = 0x08
validNV8 category = 0x18
validXV8 category = 0x28
disallowed category = 0x40
disallowedSTD3Valid category = 0x80
ignored category = 0xC0
)
// join types and additional rune information
const (
joiningL = (iota + 1)
joiningD
joiningT
joiningR
//the following types are derived during processing
joinZWJ
joinZWNJ
joinVirama
numJoinTypes
)
func (c info) isMapped() bool {
return c&0x3 != 0
}
func (c info) category() category {
small := c & catSmallMask
if small != 0 {
return category(small)
}
return category(c & catBigMask)
}
func (c info) joinType() info {
if c.isMapped() {
return 0
}
return (c >> joinShift) & joinMask
}
func (c info) isModifier() bool {
return c&(modifier|catSmallMask) == modifier
}
func (c info) isViramaModifier() bool {
return c&(attributesMask|catSmallMask) == viramaModifier
}

View file

@ -1,41 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package format contains types for defining language-specific formatting of
// values.
//
// This package is internal now, but will eventually be exposed after the API
// settles.
package format // import "golang.org/x/text/internal/format"
import (
"fmt"
"golang.org/x/text/language"
)
// State represents the printer state passed to custom formatters. It provides
// access to the fmt.State interface and the sentence and language-related
// context.
type State interface {
fmt.State
// Language reports the requested language in which to render a message.
Language() language.Tag
// TODO: consider this and removing rune from the Format method in the
// Formatter interface.
//
// Verb returns the format variant to render, analogous to the types used
// in fmt. Use 'v' for the default or only variant.
// Verb() rune
// TODO: more info:
// - sentence context such as linguistic features passed by the translator.
}
// Formatter is analogous to fmt.Formatter.
type Formatter interface {
Format(state State, verb rune)
}

View file

@ -1,40 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package number
import "unicode/utf8"
// A system identifies a CLDR numbering system.
type system byte
type systemData struct {
id system
digitSize byte // number of UTF-8 bytes per digit
zero [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
}
// A SymbolType identifies a symbol of a specific kind.
type SymbolType int
const (
SymDecimal SymbolType = iota
SymGroup
SymList
SymPercentSign
SymPlusSign
SymMinusSign
SymExponential
SymSuperscriptingExponent
SymPerMille
SymInfinity
SymNan
SymTimeSeparator
NumSymbolTypes
)
type altSymData struct {
compactTag uint16
system system
symIndex byte
}

View file

@ -1,498 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate stringer -type RoundingMode
package number
import (
"math"
"strconv"
)
// RoundingMode determines how a number is rounded to the desired precision.
type RoundingMode byte
const (
ToNearestEven RoundingMode = iota // towards the nearest integer, or towards an even number if equidistant.
ToNearestZero // towards the nearest integer, or towards zero if equidistant.
ToNearestAway // towards the nearest integer, or away from zero if equidistant.
ToPositiveInf // towards infinity
ToNegativeInf // towards negative infinity
ToZero // towards zero
AwayFromZero // away from zero
numModes
)
const maxIntDigits = 20
// A Decimal represents a floating point number in decimal format.
// Digits represents a number [0, 1.0), and the absolute value represented by
// Decimal is Digits * 10^Exp. Leading and trailing zeros may be omitted and Exp
// may point outside a valid position in Digits.
//
// Examples:
// Number Decimal
// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5
// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2
// 12000 Digits: [1, 2], Exp: 5
// 12000.00 Digits: [1, 2], Exp: 5
// 0.00123 Digits: [1, 2, 3], Exp: -2
// 0 Digits: [], Exp: 0
type Decimal struct {
digits
buf [maxIntDigits]byte
}
type digits struct {
Digits []byte // mantissa digits, big-endian
Exp int32 // exponent
Neg bool
Inf bool // Takes precedence over Digits and Exp.
NaN bool // Takes precedence over Inf.
}
// Digits represents a floating point number represented in digits of the
// base in which a number is to be displayed. It is similar to Decimal, but
// keeps track of trailing fraction zeros and the comma placement for
// engineering notation. Digits must have at least one digit.
//
// Examples:
// Number Decimal
// decimal
// 12345 Digits: [1, 2, 3, 4, 5], Exp: 5 End: 5
// 12.345 Digits: [1, 2, 3, 4, 5], Exp: 2 End: 5
// 12000 Digits: [1, 2], Exp: 5 End: 5
// 12000.00 Digits: [1, 2], Exp: 5 End: 7
// 0.00123 Digits: [1, 2, 3], Exp: -2 End: 3
// 0 Digits: [], Exp: 0 End: 1
// scientific (actual exp is Exp - Comma)
// 0e0 Digits: [0], Exp: 1, End: 1, Comma: 1
// .0e0 Digits: [0], Exp: 0, End: 1, Comma: 0
// 0.0e0 Digits: [0], Exp: 1, End: 2, Comma: 1
// 1.23e4 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 1
// .123e5 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 0
// engineering
// 12.3e3 Digits: [1, 2, 3], Exp: 5, End: 3, Comma: 2
type Digits struct {
digits
// End indicates the end position of the number.
End int32 // For decimals Exp <= End. For scientific len(Digits) <= End.
// Comma is used for the comma position for scientific (always 0 or 1) and
// engineering notation (always 0, 1, 2, or 3).
Comma uint8
// IsScientific indicates whether this number is to be rendered as a
// scientific number.
IsScientific bool
}
func (d *Digits) NumFracDigits() int {
if d.Exp >= d.End {
return 0
}
return int(d.End - d.Exp)
}
// normalize returns a new Decimal with leading and trailing zeros removed.
func (d *Decimal) normalize() (n Decimal) {
n = *d
b := n.Digits
// Strip leading zeros. Resulting number of digits is significant digits.
for len(b) > 0 && b[0] == 0 {
b = b[1:]
n.Exp--
}
// Strip trailing zeros
for len(b) > 0 && b[len(b)-1] == 0 {
b = b[:len(b)-1]
}
if len(b) == 0 {
n.Exp = 0
}
n.Digits = b
return n
}
func (d *Decimal) clear() {
b := d.Digits
if b == nil {
b = d.buf[:0]
}
*d = Decimal{}
d.Digits = b[:0]
}
func (x *Decimal) String() string {
if x.NaN {
return "NaN"
}
var buf []byte
if x.Neg {
buf = append(buf, '-')
}
if x.Inf {
buf = append(buf, "Inf"...)
return string(buf)
}
switch {
case len(x.Digits) == 0:
buf = append(buf, '0')
case x.Exp <= 0:
// 0.00ddd
buf = append(buf, "0."...)
buf = appendZeros(buf, -int(x.Exp))
buf = appendDigits(buf, x.Digits)
case /* 0 < */ int(x.Exp) < len(x.Digits):
// dd.ddd
buf = appendDigits(buf, x.Digits[:x.Exp])
buf = append(buf, '.')
buf = appendDigits(buf, x.Digits[x.Exp:])
default: // len(x.Digits) <= x.Exp
// ddd00
buf = appendDigits(buf, x.Digits)
buf = appendZeros(buf, int(x.Exp)-len(x.Digits))
}
return string(buf)
}
func appendDigits(buf []byte, digits []byte) []byte {
for _, c := range digits {
buf = append(buf, c+'0')
}
return buf
}
// appendZeros appends n 0 digits to buf and returns buf.
func appendZeros(buf []byte, n int) []byte {
for ; n > 0; n-- {
buf = append(buf, '0')
}
return buf
}
func (d *digits) round(mode RoundingMode, n int) {
if n >= len(d.Digits) {
return
}
// Make rounding decision: The result mantissa is truncated ("rounded down")
// by default. Decide if we need to increment, or "round up", the (unsigned)
// mantissa.
inc := false
switch mode {
case ToNegativeInf:
inc = d.Neg
case ToPositiveInf:
inc = !d.Neg
case ToZero:
// nothing to do
case AwayFromZero:
inc = true
case ToNearestEven:
inc = d.Digits[n] > 5 || d.Digits[n] == 5 &&
(len(d.Digits) > n+1 || n == 0 || d.Digits[n-1]&1 != 0)
case ToNearestAway:
inc = d.Digits[n] >= 5
case ToNearestZero:
inc = d.Digits[n] > 5 || d.Digits[n] == 5 && len(d.Digits) > n+1
default:
panic("unreachable")
}
if inc {
d.roundUp(n)
} else {
d.roundDown(n)
}
}
// roundFloat rounds a floating point number.
func (r RoundingMode) roundFloat(x float64) float64 {
// Make rounding decision: The result mantissa is truncated ("rounded down")
// by default. Decide if we need to increment, or "round up", the (unsigned)
// mantissa.
abs := x
if x < 0 {
abs = -x
}
i, f := math.Modf(abs)
if f == 0.0 {
return x
}
inc := false
switch r {
case ToNegativeInf:
inc = x < 0
case ToPositiveInf:
inc = x >= 0
case ToZero:
// nothing to do
case AwayFromZero:
inc = true
case ToNearestEven:
// TODO: check overflow
inc = f > 0.5 || f == 0.5 && int64(i)&1 != 0
case ToNearestAway:
inc = f >= 0.5
case ToNearestZero:
inc = f > 0.5
default:
panic("unreachable")
}
if inc {
i += 1
}
if abs != x {
i = -i
}
return i
}
func (x *digits) roundUp(n int) {
if n < 0 || n >= len(x.Digits) {
return // nothing to do
}
// find first digit < 9
for n > 0 && x.Digits[n-1] >= 9 {
n--
}
if n == 0 {
// all digits are 9s => round up to 1 and update exponent
x.Digits[0] = 1 // ok since len(x.Digits) > n
x.Digits = x.Digits[:1]
x.Exp++
return
}
x.Digits[n-1]++
x.Digits = x.Digits[:n]
// x already trimmed
}
func (x *digits) roundDown(n int) {
if n < 0 || n >= len(x.Digits) {
return // nothing to do
}
x.Digits = x.Digits[:n]
trim(x)
}
// trim cuts off any trailing zeros from x's mantissa;
// they are meaningless for the value of x.
func trim(x *digits) {
i := len(x.Digits)
for i > 0 && x.Digits[i-1] == 0 {
i--
}
x.Digits = x.Digits[:i]
if i == 0 {
x.Exp = 0
}
}
// A Converter converts a number into decimals according to the given rounding
// criteria.
type Converter interface {
Convert(d *Decimal, r RoundingContext)
}
const (
signed = true
unsigned = false
)
// Convert converts the given number to the decimal representation using the
// supplied RoundingContext.
func (d *Decimal) Convert(r RoundingContext, number interface{}) {
switch f := number.(type) {
case Converter:
d.clear()
f.Convert(d, r)
case float32:
d.ConvertFloat(r, float64(f), 32)
case float64:
d.ConvertFloat(r, f, 64)
case int:
d.ConvertInt(r, signed, uint64(f))
case int8:
d.ConvertInt(r, signed, uint64(f))
case int16:
d.ConvertInt(r, signed, uint64(f))
case int32:
d.ConvertInt(r, signed, uint64(f))
case int64:
d.ConvertInt(r, signed, uint64(f))
case uint:
d.ConvertInt(r, unsigned, uint64(f))
case uint8:
d.ConvertInt(r, unsigned, uint64(f))
case uint16:
d.ConvertInt(r, unsigned, uint64(f))
case uint32:
d.ConvertInt(r, unsigned, uint64(f))
case uint64:
d.ConvertInt(r, unsigned, f)
default:
d.NaN = true
// TODO:
// case string: if produced by strconv, allows for easy arbitrary pos.
// case reflect.Value:
// case big.Float
// case big.Int
// case big.Rat?
// catch underlyings using reflect or will this already be done by the
// message package?
}
}
// ConvertInt converts an integer to decimals.
func (d *Decimal) ConvertInt(r RoundingContext, signed bool, x uint64) {
if r.Increment > 0 {
// TODO: if uint64 is too large, fall back to float64
if signed {
d.ConvertFloat(r, float64(int64(x)), 64)
} else {
d.ConvertFloat(r, float64(x), 64)
}
return
}
d.clear()
if signed && int64(x) < 0 {
x = uint64(-int64(x))
d.Neg = true
}
d.fillIntDigits(x)
d.Exp = int32(len(d.Digits))
}
// ConvertFloat converts a floating point number to decimals.
func (d *Decimal) ConvertFloat(r RoundingContext, x float64, size int) {
d.clear()
if math.IsNaN(x) {
d.NaN = true
return
}
// Simple case: decimal notation
if r.Increment > 0 {
scale := int(r.IncrementScale)
mult := 1.0
if scale > len(scales) {
mult = math.Pow(10, float64(scale))
} else {
mult = scales[scale]
}
// We multiply x instead of dividing inc as it gives less rounding
// issues.
x *= mult
x /= float64(r.Increment)
x = r.Mode.roundFloat(x)
x *= float64(r.Increment)
x /= mult
}
abs := x
if x < 0 {
d.Neg = true
abs = -x
}
if math.IsInf(abs, 1) {
d.Inf = true
return
}
// By default we get the exact decimal representation.
verb := byte('g')
prec := -1
// As the strconv API does not return the rounding accuracy, we can only
// round using ToNearestEven.
if r.Mode == ToNearestEven {
if n := r.RoundSignificantDigits(); n >= 0 {
prec = n
} else if n = r.RoundFractionDigits(); n >= 0 {
prec = n
verb = 'f'
}
} else {
// TODO: At this point strconv's rounding is imprecise to the point that
// it is not useable for this purpose.
// See https://github.com/golang/go/issues/21714
// If rounding is requested, we ask for a large number of digits and
// round from there to simulate rounding only once.
// Ideally we would have strconv export an AppendDigits that would take
// a rounding mode and/or return an accuracy. Something like this would
// work:
// AppendDigits(dst []byte, x float64, base, size, prec int) (digits []byte, exp, accuracy int)
hasPrec := r.RoundSignificantDigits() >= 0
hasScale := r.RoundFractionDigits() >= 0
if hasPrec || hasScale {
// prec is the number of mantissa bits plus some extra for safety.
// We need at least the number of mantissa bits as decimals to
// accurately represent the floating point without rounding, as each
// bit requires one more decimal to represent: 0.5, 0.25, 0.125, ...
prec = 60
}
}
b := strconv.AppendFloat(d.Digits[:0], abs, verb, prec, size)
i := 0
k := 0
beforeDot := 1
for i < len(b) {
if c := b[i]; '0' <= c && c <= '9' {
b[k] = c - '0'
k++
d.Exp += int32(beforeDot)
} else if c == '.' {
beforeDot = 0
d.Exp = int32(k)
} else {
break
}
i++
}
d.Digits = b[:k]
if i != len(b) {
i += len("e")
pSign := i
exp := 0
for i++; i < len(b); i++ {
exp *= 10
exp += int(b[i] - '0')
}
if b[pSign] == '-' {
exp = -exp
}
d.Exp = int32(exp) + 1
}
}
func (d *Decimal) fillIntDigits(x uint64) {
if cap(d.Digits) < maxIntDigits {
d.Digits = d.buf[:]
} else {
d.Digits = d.buf[:maxIntDigits]
}
i := 0
for ; x > 0; x /= 10 {
d.Digits[i] = byte(x % 10)
i++
}
d.Digits = d.Digits[:i]
for p := 0; p < i; p++ {
i--
d.Digits[p], d.Digits[i] = d.Digits[i], d.Digits[p]
}
}
var scales [70]float64
func init() {
x := 1.0
for i := range scales {
scales[i] = x
x *= 10
}
}

View file

@ -1,329 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"fmt"
"math"
"strconv"
"strings"
"testing"
)
func mkfloat(num string) float64 {
u, _ := strconv.ParseUint(num, 10, 32)
return float64(u)
}
// mkdec creates a decimal from a string. All ASCII digits are converted to
// digits in the decimal. The dot is used to indicate the scale by which the
// digits are shifted. Numbers may have an additional exponent or be the special
// value NaN, Inf, or -Inf.
func mkdec(num string) (d Decimal) {
var r RoundingContext
d.Convert(r, dec(num))
return
}
type dec string
func (s dec) Convert(d *Decimal, _ RoundingContext) {
num := string(s)
if num[0] == '-' {
d.Neg = true
num = num[1:]
}
switch num {
case "NaN":
d.NaN = true
return
case "Inf":
d.Inf = true
return
}
if p := strings.IndexAny(num, "eE"); p != -1 {
i64, err := strconv.ParseInt(num[p+1:], 10, 32)
if err != nil {
panic(err)
}
d.Exp = int32(i64)
num = num[:p]
}
if p := strings.IndexByte(num, '.'); p != -1 {
d.Exp += int32(p)
num = num[:p] + num[p+1:]
} else {
d.Exp += int32(len(num))
}
d.Digits = []byte(num)
for i := range d.Digits {
d.Digits[i] -= '0'
}
*d = d.normalize()
}
func byteNum(s string) []byte {
b := make([]byte, len(s))
for i := 0; i < len(s); i++ {
if c := s[i]; '0' <= c && c <= '9' {
b[i] = s[i] - '0'
} else {
b[i] = s[i] - 'a' + 10
}
}
return b
}
func strNum(s string) string {
return string(byteNum(s))
}
func TestDecimalString(t *testing.T) {
for _, test := range []struct {
x Decimal
want string
}{
{want: "0"},
{Decimal{digits: digits{Digits: nil, Exp: 1000}}, "0"}, // exponent of 1000 is ignored
{Decimal{digits: digits{Digits: byteNum("12345"), Exp: 0}}, "0.12345"},
{Decimal{digits: digits{Digits: byteNum("12345"), Exp: -3}}, "0.00012345"},
{Decimal{digits: digits{Digits: byteNum("12345"), Exp: +3}}, "123.45"},
{Decimal{digits: digits{Digits: byteNum("12345"), Exp: +10}}, "1234500000"},
} {
if got := test.x.String(); got != test.want {
t.Errorf("%v == %q; want %q", test.x, got, test.want)
}
}
}
func TestRounding(t *testing.T) {
testCases := []struct {
x string
n int
// modes is the result for modes. Signs are left out of the result.
// The results are stored in the following order:
// zero, negInf
// nearZero, nearEven, nearAway
// away, posInf
modes [numModes]string
}{
{"0", 1, [numModes]string{
"0", "0",
"0", "0", "0",
"0", "0"}},
{"1", 1, [numModes]string{
"1", "1",
"1", "1", "1",
"1", "1"}},
{"5", 1, [numModes]string{
"5", "5",
"5", "5", "5",
"5", "5"}},
{"15", 1, [numModes]string{
"10", "10",
"10", "20", "20",
"20", "20"}},
{"45", 1, [numModes]string{
"40", "40",
"40", "40", "50",
"50", "50"}},
{"95", 1, [numModes]string{
"90", "90",
"90", "100", "100",
"100", "100"}},
{"12344999", 4, [numModes]string{
"12340000", "12340000",
"12340000", "12340000", "12340000",
"12350000", "12350000"}},
{"12345000", 4, [numModes]string{
"12340000", "12340000",
"12340000", "12340000", "12350000",
"12350000", "12350000"}},
{"12345001", 4, [numModes]string{
"12340000", "12340000",
"12350000", "12350000", "12350000",
"12350000", "12350000"}},
{"12345100", 4, [numModes]string{
"12340000", "12340000",
"12350000", "12350000", "12350000",
"12350000", "12350000"}},
{"23454999", 4, [numModes]string{
"23450000", "23450000",
"23450000", "23450000", "23450000",
"23460000", "23460000"}},
{"23455000", 4, [numModes]string{
"23450000", "23450000",
"23450000", "23460000", "23460000",
"23460000", "23460000"}},
{"23455001", 4, [numModes]string{
"23450000", "23450000",
"23460000", "23460000", "23460000",
"23460000", "23460000"}},
{"23455100", 4, [numModes]string{
"23450000", "23450000",
"23460000", "23460000", "23460000",
"23460000", "23460000"}},
{"99994999", 4, [numModes]string{
"99990000", "99990000",
"99990000", "99990000", "99990000",
"100000000", "100000000"}},
{"99995000", 4, [numModes]string{
"99990000", "99990000",
"99990000", "100000000", "100000000",
"100000000", "100000000"}},
{"99999999", 4, [numModes]string{
"99990000", "99990000",
"100000000", "100000000", "100000000",
"100000000", "100000000"}},
{"12994999", 4, [numModes]string{
"12990000", "12990000",
"12990000", "12990000", "12990000",
"13000000", "13000000"}},
{"12995000", 4, [numModes]string{
"12990000", "12990000",
"12990000", "13000000", "13000000",
"13000000", "13000000"}},
{"12999999", 4, [numModes]string{
"12990000", "12990000",
"13000000", "13000000", "13000000",
"13000000", "13000000"}},
}
modes := []RoundingMode{
ToZero, ToNegativeInf,
ToNearestZero, ToNearestEven, ToNearestAway,
AwayFromZero, ToPositiveInf,
}
for _, tc := range testCases {
// Create negative counterpart tests: the sign is reversed and
// ToPositiveInf and ToNegativeInf swapped.
negModes := tc.modes
negModes[1], negModes[6] = negModes[6], negModes[1]
for i, res := range negModes {
negModes[i] = "-" + res
}
for i, m := range modes {
t.Run(fmt.Sprintf("x:%s/n:%d/%s", tc.x, tc.n, m), func(t *testing.T) {
d := mkdec(tc.x)
d.round(m, tc.n)
if got := d.String(); got != tc.modes[i] {
t.Errorf("pos decimal: got %q; want %q", d.String(), tc.modes[i])
}
mult := math.Pow(10, float64(len(tc.x)-tc.n))
f := mkfloat(tc.x)
f = m.roundFloat(f/mult) * mult
if got := fmt.Sprintf("%.0f", f); got != tc.modes[i] {
t.Errorf("pos float: got %q; want %q", got, tc.modes[i])
}
// Test the negative case. This is the same as the positive
// case, but with ToPositiveInf and ToNegativeInf swapped.
d = mkdec(tc.x)
d.Neg = true
d.round(m, tc.n)
if got, want := d.String(), negModes[i]; got != want {
t.Errorf("neg decimal: got %q; want %q", d.String(), want)
}
f = -mkfloat(tc.x)
f = m.roundFloat(f/mult) * mult
if got := fmt.Sprintf("%.0f", f); got != negModes[i] {
t.Errorf("neg float: got %q; want %q", got, negModes[i])
}
})
}
}
}
func TestConvert(t *testing.T) {
scale2 := RoundingContext{}
scale2.SetScale(2)
scale2away := RoundingContext{Mode: AwayFromZero}
scale2away.SetScale(2)
inc0_05 := RoundingContext{Increment: 5, IncrementScale: 2}
inc0_05.SetScale(2)
inc50 := RoundingContext{Increment: 50}
prec3 := RoundingContext{}
prec3.SetPrecision(3)
roundShift := RoundingContext{DigitShift: 2, MaxFractionDigits: 2}
testCases := []struct {
x interface{}
rc RoundingContext
out string
}{
{-0.001, scale2, "-0.00"},
{0.1234, prec3, "0.123"},
{1234.0, prec3, "1230"},
{1.2345e10, prec3, "12300000000"},
{int8(-34), scale2, "-34"},
{int16(-234), scale2, "-234"},
{int32(-234), scale2, "-234"},
{int64(-234), scale2, "-234"},
{int(-234), scale2, "-234"},
{uint8(234), scale2, "234"},
{uint16(234), scale2, "234"},
{uint32(234), scale2, "234"},
{uint64(234), scale2, "234"},
{uint(234), scale2, "234"},
{-1e9, scale2, "-1000000000.00"},
// The following two causes this result to have a lot of digits:
// 1) 0.234 cannot be accurately represented as a float64, and
// 2) as strconv does not support the rounding AwayFromZero, Convert
// leaves the rounding to caller.
{0.234, scale2away,
"0.2340000000000000135447209004269097931683063507080078125"},
{0.0249, inc0_05, "0.00"},
{0.025, inc0_05, "0.00"},
{0.0251, inc0_05, "0.05"},
{0.03, inc0_05, "0.05"},
{0.049, inc0_05, "0.05"},
{0.05, inc0_05, "0.05"},
{0.051, inc0_05, "0.05"},
{0.0749, inc0_05, "0.05"},
{0.075, inc0_05, "0.10"},
{0.0751, inc0_05, "0.10"},
{324, inc50, "300"},
{325, inc50, "300"},
{326, inc50, "350"},
{349, inc50, "350"},
{350, inc50, "350"},
{351, inc50, "350"},
{374, inc50, "350"},
{375, inc50, "400"},
{376, inc50, "400"},
// Here the scale is 2, but the digits get shifted left. As we use
// AppendFloat to do the rounding an exta 0 gets added.
{0.123, roundShift, "0.1230"},
{converter(3), scale2, "100"},
{math.Inf(1), inc50, "Inf"},
{math.Inf(-1), inc50, "-Inf"},
{math.NaN(), inc50, "NaN"},
{"clearly not a number", scale2, "NaN"},
}
for _, tc := range testCases {
var d Decimal
t.Run(fmt.Sprintf("%T:%v-%v", tc.x, tc.x, tc.rc), func(t *testing.T) {
d.Convert(tc.rc, tc.x)
if got := d.String(); got != tc.out {
t.Errorf("got %q; want %q", got, tc.out)
}
})
}
}
type converter int
func (c converter) Convert(d *Decimal, r RoundingContext) {
d.Digits = append(d.Digits, 1, 0, 0)
d.Exp = 3
}

View file

@ -1,540 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"strconv"
"unicode/utf8"
"golang.org/x/text/language"
)
// TODO:
// - grouping of fractions
// - allow user-defined superscript notation (such as <sup>4</sup>)
// - same for non-breaking spaces, like &nbsp;
// A VisibleDigits computes digits, comma placement and trailing zeros as they
// will be shown to the user.
type VisibleDigits interface {
Digits(buf []byte, t language.Tag, scale int) Digits
// TODO: Do we also need to add the verb or pass a format.State?
}
// Formatting proceeds along the following lines:
// 0) Compose rounding information from format and context.
// 1) Convert a number into a Decimal.
// 2) Sanitize Decimal by adding trailing zeros, removing leading digits, and
// (non-increment) rounding. The Decimal that results from this is suitable
// for determining the plural form.
// 3) Render the Decimal in the localized form.
// Formatter contains all the information needed to render a number.
type Formatter struct {
Pattern
Info
}
func (f *Formatter) init(t language.Tag, index []uint8) {
f.Info = InfoFromTag(t)
for ; ; t = t.Parent() {
if ci, ok := language.CompactIndex(t); ok {
f.Pattern = formats[index[ci]]
break
}
}
}
// InitPattern initializes a Formatter for the given Pattern.
func (f *Formatter) InitPattern(t language.Tag, pat *Pattern) {
f.Info = InfoFromTag(t)
f.Pattern = *pat
}
// InitDecimal initializes a Formatter using the default Pattern for the given
// language.
func (f *Formatter) InitDecimal(t language.Tag) {
f.init(t, tagToDecimal)
}
// InitScientific initializes a Formatter using the default Pattern for the
// given language.
func (f *Formatter) InitScientific(t language.Tag) {
f.init(t, tagToScientific)
f.Pattern.MinFractionDigits = 0
f.Pattern.MaxFractionDigits = -1
}
// InitEngineering initializes a Formatter using the default Pattern for the
// given language.
func (f *Formatter) InitEngineering(t language.Tag) {
f.init(t, tagToScientific)
f.Pattern.MinFractionDigits = 0
f.Pattern.MaxFractionDigits = -1
f.Pattern.MaxIntegerDigits = 3
f.Pattern.MinIntegerDigits = 1
}
// InitPercent initializes a Formatter using the default Pattern for the given
// language.
func (f *Formatter) InitPercent(t language.Tag) {
f.init(t, tagToPercent)
}
// InitPerMille initializes a Formatter using the default Pattern for the given
// language.
func (f *Formatter) InitPerMille(t language.Tag) {
f.init(t, tagToPercent)
f.Pattern.DigitShift = 3
}
func (f *Formatter) Append(dst []byte, x interface{}) []byte {
var d Decimal
r := f.RoundingContext
d.Convert(r, x)
return f.Render(dst, FormatDigits(&d, r))
}
func FormatDigits(d *Decimal, r RoundingContext) Digits {
if r.isScientific() {
return scientificVisibleDigits(r, d)
}
return decimalVisibleDigits(r, d)
}
func (f *Formatter) Format(dst []byte, d *Decimal) []byte {
return f.Render(dst, FormatDigits(d, f.RoundingContext))
}
func (f *Formatter) Render(dst []byte, d Digits) []byte {
var result []byte
var postPrefix, preSuffix int
if d.IsScientific {
result, postPrefix, preSuffix = appendScientific(dst, f, &d)
} else {
result, postPrefix, preSuffix = appendDecimal(dst, f, &d)
}
if f.PadRune == 0 {
return result
}
width := int(f.FormatWidth)
if count := utf8.RuneCount(result); count < width {
insertPos := 0
switch f.Flags & PadMask {
case PadAfterPrefix:
insertPos = postPrefix
case PadBeforeSuffix:
insertPos = preSuffix
case PadAfterSuffix:
insertPos = len(result)
}
num := width - count
pad := [utf8.UTFMax]byte{' '}
sz := 1
if r := f.PadRune; r != 0 {
sz = utf8.EncodeRune(pad[:], r)
}
extra := sz * num
if n := len(result) + extra; n < cap(result) {
result = result[:n]
copy(result[insertPos+extra:], result[insertPos:])
} else {
buf := make([]byte, n)
copy(buf, result[:insertPos])
copy(buf[insertPos+extra:], result[insertPos:])
result = buf
}
for ; num > 0; num-- {
insertPos += copy(result[insertPos:], pad[:sz])
}
}
return result
}
// decimalVisibleDigits converts d according to the RoundingContext. Note that
// the exponent may change as a result of this operation.
func decimalVisibleDigits(r RoundingContext, d *Decimal) Digits {
if d.NaN || d.Inf {
return Digits{digits: digits{Neg: d.Neg, NaN: d.NaN, Inf: d.Inf}}
}
n := Digits{digits: d.normalize().digits}
exp := n.Exp
exp += int32(r.DigitShift)
// Cap integer digits. Remove *most-significant* digits.
if r.MaxIntegerDigits > 0 {
if p := int(exp) - int(r.MaxIntegerDigits); p > 0 {
if p > len(n.Digits) {
p = len(n.Digits)
}
if n.Digits = n.Digits[p:]; len(n.Digits) == 0 {
exp = 0
} else {
exp -= int32(p)
}
// Strip leading zeros.
for len(n.Digits) > 0 && n.Digits[0] == 0 {
n.Digits = n.Digits[1:]
exp--
}
}
}
// Rounding if not already done by Convert.
p := len(n.Digits)
if maxSig := int(r.MaxSignificantDigits); maxSig > 0 {
p = maxSig
}
if maxFrac := int(r.MaxFractionDigits); maxFrac >= 0 {
if cap := int(exp) + maxFrac; cap < p {
p = int(exp) + maxFrac
}
if p < 0 {
p = 0
}
}
n.round(r.Mode, p)
// set End (trailing zeros)
n.End = int32(len(n.Digits))
if n.End == 0 {
exp = 0
if r.MinFractionDigits > 0 {
n.End = int32(r.MinFractionDigits)
}
if p := int32(r.MinSignificantDigits) - 1; p > n.End {
n.End = p
}
} else {
if end := exp + int32(r.MinFractionDigits); end > n.End {
n.End = end
}
if n.End < int32(r.MinSignificantDigits) {
n.End = int32(r.MinSignificantDigits)
}
}
n.Exp = exp
return n
}
// appendDecimal appends a formatted number to dst. It returns two possible
// insertion points for padding.
func appendDecimal(dst []byte, f *Formatter, n *Digits) (b []byte, postPre, preSuf int) {
if dst, ok := f.renderSpecial(dst, n); ok {
return dst, 0, len(dst)
}
digits := n.Digits
exp := n.Exp
// Split in integer and fraction part.
var intDigits, fracDigits []byte
numInt := 0
numFrac := int(n.End - n.Exp)
if exp > 0 {
numInt = int(exp)
if int(exp) >= len(digits) { // ddddd | ddddd00
intDigits = digits
} else { // ddd.dd
intDigits = digits[:exp]
fracDigits = digits[exp:]
}
} else {
fracDigits = digits
}
neg := n.Neg
affix, suffix := f.getAffixes(neg)
dst = appendAffix(dst, f, affix, neg)
savedLen := len(dst)
minInt := int(f.MinIntegerDigits)
if minInt == 0 && f.MinSignificantDigits > 0 {
minInt = 1
}
// add leading zeros
for i := minInt; i > numInt; i-- {
dst = f.AppendDigit(dst, 0)
if f.needsSep(i) {
dst = append(dst, f.Symbol(SymGroup)...)
}
}
i := 0
for ; i < len(intDigits); i++ {
dst = f.AppendDigit(dst, intDigits[i])
if f.needsSep(numInt - i) {
dst = append(dst, f.Symbol(SymGroup)...)
}
}
for ; i < numInt; i++ {
dst = f.AppendDigit(dst, 0)
if f.needsSep(numInt - i) {
dst = append(dst, f.Symbol(SymGroup)...)
}
}
if numFrac > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
dst = append(dst, f.Symbol(SymDecimal)...)
}
// Add trailing zeros
i = 0
for n := -int(n.Exp); i < n; i++ {
dst = f.AppendDigit(dst, 0)
}
for _, d := range fracDigits {
i++
dst = f.AppendDigit(dst, d)
}
for ; i < numFrac; i++ {
dst = f.AppendDigit(dst, 0)
}
return appendAffix(dst, f, suffix, neg), savedLen, len(dst)
}
func scientificVisibleDigits(r RoundingContext, d *Decimal) Digits {
if d.NaN || d.Inf {
return Digits{digits: digits{Neg: d.Neg, NaN: d.NaN, Inf: d.Inf}}
}
n := Digits{digits: d.normalize().digits, IsScientific: true}
// Normalize to have at least one digit. This simplifies engineering
// notation.
if len(n.Digits) == 0 {
n.Digits = append(n.Digits, 0)
n.Exp = 1
}
// Significant digits are transformed by the parser for scientific notation
// and do not need to be handled here.
maxInt, numInt := int(r.MaxIntegerDigits), int(r.MinIntegerDigits)
if numInt == 0 {
numInt = 1
}
// If a maximum number of integers is specified, the minimum must be 1
// and the exponent is grouped by this number (e.g. for engineering)
if maxInt > numInt {
// Correct the exponent to reflect a single integer digit.
numInt = 1
// engineering
// 0.01234 ([12345]e-1) -> 1.2345e-2 12.345e-3
// 12345 ([12345]e+5) -> 1.2345e4 12.345e3
d := int(n.Exp-1) % maxInt
if d < 0 {
d += maxInt
}
numInt += d
}
p := len(n.Digits)
if maxSig := int(r.MaxSignificantDigits); maxSig > 0 {
p = maxSig
}
if maxFrac := int(r.MaxFractionDigits); maxFrac >= 0 && numInt+maxFrac < p {
p = numInt + maxFrac
}
n.round(r.Mode, p)
n.Comma = uint8(numInt)
n.End = int32(len(n.Digits))
if minSig := int32(r.MinFractionDigits) + int32(numInt); n.End < minSig {
n.End = minSig
}
return n
}
// appendScientific appends a formatted number to dst. It returns two possible
// insertion points for padding.
func appendScientific(dst []byte, f *Formatter, n *Digits) (b []byte, postPre, preSuf int) {
if dst, ok := f.renderSpecial(dst, n); ok {
return dst, 0, 0
}
digits := n.Digits
numInt := int(n.Comma)
numFrac := int(n.End) - int(n.Comma)
var intDigits, fracDigits []byte
if numInt <= len(digits) {
intDigits = digits[:numInt]
fracDigits = digits[numInt:]
} else {
intDigits = digits
}
neg := n.Neg
affix, suffix := f.getAffixes(neg)
dst = appendAffix(dst, f, affix, neg)
savedLen := len(dst)
i := 0
for ; i < len(intDigits); i++ {
dst = f.AppendDigit(dst, intDigits[i])
if f.needsSep(numInt - i) {
dst = append(dst, f.Symbol(SymGroup)...)
}
}
for ; i < numInt; i++ {
dst = f.AppendDigit(dst, 0)
if f.needsSep(numInt - i) {
dst = append(dst, f.Symbol(SymGroup)...)
}
}
if numFrac > 0 || f.Flags&AlwaysDecimalSeparator != 0 {
dst = append(dst, f.Symbol(SymDecimal)...)
}
i = 0
for ; i < len(fracDigits); i++ {
dst = f.AppendDigit(dst, fracDigits[i])
}
for ; i < numFrac; i++ {
dst = f.AppendDigit(dst, 0)
}
// exp
buf := [12]byte{}
// TODO: use exponential if superscripting is not available (no Latin
// numbers or no tags) and use exponential in all other cases.
exp := n.Exp - int32(n.Comma)
exponential := f.Symbol(SymExponential)
if exponential == "E" {
dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE
dst = append(dst, f.Symbol(SymSuperscriptingExponent)...)
dst = append(dst, "\u202f"...) // NARROW NO-BREAK SPACE
dst = f.AppendDigit(dst, 1)
dst = f.AppendDigit(dst, 0)
switch {
case exp < 0:
dst = append(dst, superMinus...)
exp = -exp
case f.Flags&AlwaysExpSign != 0:
dst = append(dst, superPlus...)
}
b = strconv.AppendUint(buf[:0], uint64(exp), 10)
for i := len(b); i < int(f.MinExponentDigits); i++ {
dst = append(dst, superDigits[0]...)
}
for _, c := range b {
dst = append(dst, superDigits[c-'0']...)
}
} else {
dst = append(dst, exponential...)
switch {
case exp < 0:
dst = append(dst, f.Symbol(SymMinusSign)...)
exp = -exp
case f.Flags&AlwaysExpSign != 0:
dst = append(dst, f.Symbol(SymPlusSign)...)
}
b = strconv.AppendUint(buf[:0], uint64(exp), 10)
for i := len(b); i < int(f.MinExponentDigits); i++ {
dst = f.AppendDigit(dst, 0)
}
for _, c := range b {
dst = f.AppendDigit(dst, c-'0')
}
}
return appendAffix(dst, f, suffix, neg), savedLen, len(dst)
}
const (
superMinus = "\u207B" // SUPERSCRIPT HYPHEN-MINUS
superPlus = "\u207A" // SUPERSCRIPT PLUS SIGN
)
var (
// Note: the digits are not sequential!!!
superDigits = []string{
"\u2070", // SUPERSCRIPT DIGIT ZERO
"\u00B9", // SUPERSCRIPT DIGIT ONE
"\u00B2", // SUPERSCRIPT DIGIT TWO
"\u00B3", // SUPERSCRIPT DIGIT THREE
"\u2074", // SUPERSCRIPT DIGIT FOUR
"\u2075", // SUPERSCRIPT DIGIT FIVE
"\u2076", // SUPERSCRIPT DIGIT SIX
"\u2077", // SUPERSCRIPT DIGIT SEVEN
"\u2078", // SUPERSCRIPT DIGIT EIGHT
"\u2079", // SUPERSCRIPT DIGIT NINE
}
)
func (f *Formatter) getAffixes(neg bool) (affix, suffix string) {
str := f.Affix
if str != "" {
if f.NegOffset > 0 {
if neg {
str = str[f.NegOffset:]
} else {
str = str[:f.NegOffset]
}
}
sufStart := 1 + str[0]
affix = str[1:sufStart]
suffix = str[sufStart+1:]
}
// TODO: introduce a NeedNeg sign to indicate if the left pattern already
// has a sign marked?
if f.NegOffset == 0 && (neg || f.Flags&AlwaysSign != 0) {
affix = "-" + affix
}
return affix, suffix
}
func (f *Formatter) renderSpecial(dst []byte, d *Digits) (b []byte, ok bool) {
if d.NaN {
return fmtNaN(dst, f), true
}
if d.Inf {
return fmtInfinite(dst, f, d), true
}
return dst, false
}
func fmtNaN(dst []byte, f *Formatter) []byte {
return append(dst, f.Symbol(SymNan)...)
}
func fmtInfinite(dst []byte, f *Formatter, d *Digits) []byte {
affix, suffix := f.getAffixes(d.Neg)
dst = appendAffix(dst, f, affix, d.Neg)
dst = append(dst, f.Symbol(SymInfinity)...)
dst = appendAffix(dst, f, suffix, d.Neg)
return dst
}
func appendAffix(dst []byte, f *Formatter, affix string, neg bool) []byte {
quoting := false
escaping := false
for _, r := range affix {
switch {
case escaping:
// escaping occurs both inside and outside of quotes
dst = append(dst, string(r)...)
escaping = false
case r == '\\':
escaping = true
case r == '\'':
quoting = !quoting
case quoting:
dst = append(dst, string(r)...)
case r == '%':
if f.DigitShift == 3 {
dst = append(dst, f.Symbol(SymPerMille)...)
} else {
dst = append(dst, f.Symbol(SymPercentSign)...)
}
case r == '-' || r == '+':
if neg {
dst = append(dst, f.Symbol(SymMinusSign)...)
} else if f.Flags&ElideSign == 0 {
dst = append(dst, f.Symbol(SymPlusSign)...)
} else {
dst = append(dst, ' ')
}
default:
dst = append(dst, string(r)...)
}
}
return dst
}

View file

@ -1,522 +0,0 @@
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"fmt"
"log"
"testing"
"golang.org/x/text/language"
)
func TestAppendDecimal(t *testing.T) {
type pairs map[string]string // alternates with decimal input and result
testCases := []struct {
pattern string
// We want to be able to test some forms of patterns that cannot be
// represented as a string.
pat *Pattern
test pairs
}{{
pattern: "0",
test: pairs{
"0": "0",
"1": "1",
"-1": "-1",
".00": "0",
"10.": "10",
"12": "12",
"1.2": "1",
"NaN": "NaN",
"-Inf": "-∞",
},
}, {
pattern: "+0;+0",
test: pairs{
"0": "+0",
"1": "+1",
"-1": "-1",
".00": "+0",
"10.": "+10",
"12": "+12",
"1.2": "+1",
"NaN": "NaN",
"-Inf": "-∞",
"Inf": "+∞",
},
}, {
pattern: "0 +;0 +",
test: pairs{
"0": "0 +",
"1": "1 +",
"-1": "1 -",
".00": "0 +",
},
}, {
pattern: "0;0-",
test: pairs{
"-1": "1-",
"NaN": "NaN",
"-Inf": "∞-",
"Inf": "∞",
},
}, {
pattern: "0000",
test: pairs{
"0": "0000",
"1": "0001",
"12": "0012",
"12345": "12345",
},
}, {
pattern: ".0",
test: pairs{
"0": ".0",
"1": "1.0",
"1.2": "1.2",
"1.2345": "1.2",
},
}, {
pattern: "#.0",
test: pairs{
"0": ".0",
},
}, {
pattern: "#.0#",
test: pairs{
"0": ".0",
"1": "1.0",
},
}, {
pattern: "0.0#",
test: pairs{
"0": "0.0",
},
}, {
pattern: "#0.###",
test: pairs{
"0": "0",
"1": "1",
"1.2": "1.2",
"1.2345": "1.234", // rounding should have been done earlier
"1234.5": "1234.5",
"1234.567": "1234.567",
},
}, {
pattern: "#0.######",
test: pairs{
"0": "0",
"1234.5678": "1234.5678",
"0.123456789": "0.123457",
"NaN": "NaN",
"Inf": "∞",
},
// Test separators.
}, {
pattern: "#,#.00",
test: pairs{
"100": "1,0,0.00",
},
}, {
pattern: "#,0.##",
test: pairs{
"10": "1,0",
},
}, {
pattern: "#,0",
test: pairs{
"10": "1,0",
},
}, {
pattern: "#,##,#.00",
test: pairs{
"1000": "1,00,0.00",
},
}, {
pattern: "#,##0.###",
test: pairs{
"0": "0",
"1234.5678": "1,234.568",
"0.123456789": "0.123",
},
}, {
pattern: "#,##,##0.###",
test: pairs{
"0": "0",
"123456789012": "1,23,45,67,89,012",
"0.123456789": "0.123",
},
}, {
pattern: "0,00,000.###",
test: pairs{
"0": "0,00,000",
"123456789012": "1,23,45,67,89,012",
"12.3456789": "0,00,012.346",
"0.123456789": "0,00,000.123",
},
// Support for ill-formed patterns.
}, {
pattern: "#",
test: pairs{
".00": "", // This is the behavior of fmt.
"0": "", // This is the behavior of fmt.
"1": "1",
"10.": "10",
},
}, {
pattern: ".#",
test: pairs{
"0": "", // This is the behavior of fmt.
"1": "1",
"1.2": "1.2",
"1.2345": "1.2",
},
}, {
pattern: "#,#.##",
test: pairs{
"10": "1,0",
},
}, {
pattern: "#,#",
test: pairs{
"10": "1,0",
},
// Special patterns
}, {
pattern: "#,max_int=2",
pat: &Pattern{
RoundingContext: RoundingContext{
MaxIntegerDigits: 2,
},
},
test: pairs{
"2017": "17",
},
}, {
pattern: "0,max_int=2",
pat: &Pattern{
RoundingContext: RoundingContext{
MaxIntegerDigits: 2,
MinIntegerDigits: 1,
},
},
test: pairs{
"2000": "0",
"2001": "1",
"2017": "17",
},
}, {
pattern: "00,max_int=2",
pat: &Pattern{
RoundingContext: RoundingContext{
MaxIntegerDigits: 2,
MinIntegerDigits: 2,
},
},
test: pairs{
"2000": "00",
"2001": "01",
"2017": "17",
},
}, {
pattern: "@@@@,max_int=2",
pat: &Pattern{
RoundingContext: RoundingContext{
MaxIntegerDigits: 2,
MinSignificantDigits: 4,
},
},
test: pairs{
"2017": "17.00",
"2000": "0.000",
"2001": "1.000",
},
// Significant digits
}, {
pattern: "@@##",
test: pairs{
"1": "1.0",
"0.1": "0.10", // leading zero does not count as significant digit
"123": "123",
"1234": "1234",
"12345": "12340",
},
}, {
pattern: "@@@@",
test: pairs{
"1": "1.000",
".1": "0.1000",
".001": "0.001000",
"123": "123.0",
"1234": "1234",
"12345": "12340", // rounding down
"NaN": "NaN",
"-Inf": "-∞",
},
// TODO: rounding
// {"@@@@": "23456": "23460"}, // rounding up
// TODO: padding
// Scientific and Engineering notation
}, {
pattern: "#E0",
test: pairs{
"0": "0\u202f×\u202f10⁰",
"1": "1\u202f×\u202f10⁰",
"123.456": "1\u202f×\u202f10²",
},
}, {
pattern: "#E+0",
test: pairs{
"0": "0\u202f×\u202f10⁺⁰",
"1000": "1\u202f×\u202f10⁺³",
"1E100": "1\u202f×\u202f10⁺¹⁰⁰",
"1E-100": "1\u202f×\u202f10⁻¹⁰⁰",
"NaN": "NaN",
"-Inf": "-∞",
},
}, {
pattern: "##0E00",
test: pairs{
"100": "100\u202f×\u202f10⁰⁰",
"12345": "12\u202f×\u202f10⁰³",
"123.456": "123\u202f×\u202f10⁰⁰",
},
}, {
pattern: "##0.###E00",
test: pairs{
"100": "100\u202f×\u202f10⁰⁰",
"12345": "12.345\u202f×\u202f10⁰³",
"123456": "123.456\u202f×\u202f10⁰³",
"123.456": "123.456\u202f×\u202f10⁰⁰",
"123.4567": "123.457\u202f×\u202f10⁰⁰",
},
}, {
pattern: "##0.000E00",
test: pairs{
"100": "100.000\u202f×\u202f10⁰⁰",
"12345": "12.345\u202f×\u202f10⁰³",
"123.456": "123.456\u202f×\u202f10⁰⁰",
"12.3456": "12.346\u202f×\u202f10⁰⁰",
},
}, {
pattern: "@@E0",
test: pairs{
"0": "0.0\u202f×\u202f10⁰",
"99": "9.9\u202f×\u202f10¹",
"0.99": "9.9\u202f×\u202f10⁻¹",
},
}, {
pattern: "@###E00",
test: pairs{
"0": "0\u202f×\u202f10⁰⁰",
"1": "1\u202f×\u202f10⁰⁰",
"11": "1.1\u202f×\u202f10⁰¹",
"111": "1.11\u202f×\u202f10⁰²",
"1111": "1.111\u202f×\u202f10⁰³",
"11111": "1.111\u202f×\u202f10⁰⁴",
"0.1": "1\u202f×\u202f10⁻⁰¹",
"0.11": "1.1\u202f×\u202f10⁻⁰¹",
"0.001": "1\u202f×\u202f10⁻⁰³",
},
}, {
pattern: "*x##0",
test: pairs{
"0": "xx0",
"10": "x10",
"100": "100",
"1000": "1000",
},
}, {
pattern: "##0*x",
test: pairs{
"0": "0xx",
"10": "10x",
"100": "100",
"1000": "1000",
},
}, {
pattern: "* ###0.000",
test: pairs{
"0": " 0.000",
"123": " 123.000",
"123.456": " 123.456",
"1234.567": "1234.567",
},
}, {
pattern: "**0.0#######E00",
test: pairs{
"0": "***0.0\u202f×\u202f10⁰⁰",
"10": "***1.0\u202f×\u202f10⁰¹",
"11": "***1.1\u202f×\u202f10⁰¹",
"111": "**1.11\u202f×\u202f10⁰²",
"1111": "*1.111\u202f×\u202f10⁰³",
"11111": "1.1111\u202f×\u202f10⁰⁴",
"11110": "*1.111\u202f×\u202f10⁰⁴",
"11100": "**1.11\u202f×\u202f10⁰⁴",
"11000": "***1.1\u202f×\u202f10⁰⁴",
"10000": "***1.0\u202f×\u202f10⁰⁴",
},
}, {
pattern: "*xpre0suf",
test: pairs{
"0": "pre0suf",
"10": "pre10suf",
},
}, {
pattern: "*∞ pre ###0 suf",
test: pairs{
"0": "∞∞∞ pre 0 suf",
"10": "∞∞ pre 10 suf",
"100": "∞ pre 100 suf",
"1000": " pre 1000 suf",
},
}, {
pattern: "pre *∞###0 suf",
test: pairs{
"0": "pre ∞∞∞0 suf",
"10": "pre ∞∞10 suf",
"100": "pre ∞100 suf",
"1000": "pre 1000 suf",
},
}, {
pattern: "pre ###0*∞ suf",
test: pairs{
"0": "pre 0∞∞∞ suf",
"10": "pre 10∞∞ suf",
"100": "pre 100∞ suf",
"1000": "pre 1000 suf",
},
}, {
pattern: "pre ###0 suf *∞",
test: pairs{
"0": "pre 0 suf ∞∞∞",
"10": "pre 10 suf ∞∞",
"100": "pre 100 suf ∞",
"1000": "pre 1000 suf ",
},
}, {
// Take width of positive pattern.
pattern: "**###0;**-#####0x",
test: pairs{
"0": "***0",
"-1": "*-1x",
},
}, {
pattern: "0.00%",
test: pairs{
"0.1": "10.00%",
},
}, {
pattern: "0.##%",
test: pairs{
"0.1": "10%",
"0.11": "11%",
"0.111": "11.1%",
"0.1111": "11.11%",
"0.11111": "11.11%",
},
}, {
pattern: "‰ 0.0#",
test: pairs{
"0.1": "‰ 100.0",
"0.11": "‰ 110.0",
"0.111": "‰ 111.0",
"0.1111": "‰ 111.1",
"0.11111": "‰ 111.11",
"0.111111": "‰ 111.11",
},
}}
// TODO:
// "#,##0.00¤",
// "#,##0.00 ¤;(#,##0.00 ¤)",
for _, tc := range testCases {
pat := tc.pat
if pat == nil {
var err error
if pat, err = ParsePattern(tc.pattern); err != nil {
log.Fatal(err)
}
}
var f Formatter
f.InitPattern(language.English, pat)
for num, want := range tc.test {
buf := make([]byte, 100)
t.Run(tc.pattern+"/"+num, func(t *testing.T) {
var d Decimal
d.Convert(f.RoundingContext, dec(num))
buf = f.Format(buf[:0], &d)
if got := string(buf); got != want {
t.Errorf("\n got %[1]q (%[1]s)\nwant %[2]q (%[2]s)", got, want)
}
})
}
}
}
func TestLocales(t *testing.T) {
testCases := []struct {
tag language.Tag
num string
want string
}{
{language.Make("en"), "123456.78", "123,456.78"},
{language.Make("de"), "123456.78", "123.456,78"},
{language.Make("de-CH"), "123456.78", "123456.78"},
{language.Make("fr"), "123456.78", "123 456,78"},
{language.Make("bn"), "123456.78", "১,২৩,৪৫৬.৭৮"},
}
for _, tc := range testCases {
t.Run(fmt.Sprint(tc.tag, "/", tc.num), func(t *testing.T) {
var f Formatter
f.InitDecimal(tc.tag)
var d Decimal
d.Convert(f.RoundingContext, dec(tc.num))
b := f.Format(nil, &d)
if got := string(b); got != tc.want {
t.Errorf("got %[1]q (%[1]s); want %[2]q (%[2]s)", got, tc.want)
}
})
}
}
func TestFormatters(t *testing.T) {
var f Formatter
testCases := []struct {
init func(t language.Tag)
num string
want string
}{
{f.InitDecimal, "123456.78", "123,456.78"},
{f.InitScientific, "123456.78", "1.23\u202f×\u202f10⁵"},
{f.InitEngineering, "123456.78", "123.46\u202f×\u202f10³"},
{f.InitEngineering, "1234", "1.23\u202f×\u202f10³"},
{f.InitPercent, "0.1234", "12.34%"},
{f.InitPerMille, "0.1234", "123.40‰"},
}
for i, tc := range testCases {
t.Run(fmt.Sprint(i, "/", tc.num), func(t *testing.T) {
tc.init(language.English)
f.SetScale(2)
var d Decimal
d.Convert(f.RoundingContext, dec(tc.num))
b := f.Format(nil, &d)
if got := string(b); got != tc.want {
t.Errorf("got %[1]q (%[1]s); want %[2]q (%[2]s)", got, tc.want)
}
})
}
}

View file

@ -1,458 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"flag"
"fmt"
"log"
"reflect"
"strings"
"unicode/utf8"
"golang.org/x/text/internal"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/number"
"golang.org/x/text/internal/stringset"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var (
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
outputFile = flag.String("output", "tables.go", "output file")
outputTestFile = flag.String("testoutput", "data_test.go", "output file")
draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
)
func main() {
gen.Init()
const pkg = "number"
gen.Repackage("gen_common.go", "common.go", pkg)
// Read the CLDR zip file.
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("supplemental", "main")
d.SetSectionFilter("numbers", "numberingSystem")
data, err := d.DecodeZip(r)
if err != nil {
log.Fatalf("DecodeZip: %v", err)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, pkg)
fmt.Fprintln(w, `import "golang.org/x/text/internal/stringset"`)
gen.WriteCLDRVersion(w)
genNumSystem(w, data)
genSymbols(w, data)
genFormats(w, data)
}
var systemMap = map[string]system{"latn": 0}
func getNumberSystem(str string) system {
ns, ok := systemMap[str]
if !ok {
log.Fatalf("No index for numbering system %q", str)
}
return ns
}
func genNumSystem(w *gen.CodeWriter, data *cldr.CLDR) {
numSysData := []systemData{
{digitSize: 1, zero: [4]byte{'0'}},
}
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
if len(ns.Digits) == 0 {
continue
}
switch ns.Id {
case "latn":
// hard-wired
continue
case "hanidec":
// non-consecutive digits: treat as "algorithmic"
continue
}
zero, sz := utf8.DecodeRuneInString(ns.Digits)
if ns.Digits[sz-1]+9 > 0xBF { // 1011 1111: highest continuation byte
log.Fatalf("Last byte of zero value overflows for %s", ns.Id)
}
i := rune(0)
for _, r := range ns.Digits {
// Verify that we can do simple math on the UTF-8 byte sequence
// of zero to get the digit.
if zero+i != r {
// Runes not consecutive.
log.Fatalf("Digit %d of %s (%U) is not offset correctly from zero value", i, ns.Id, r)
}
i++
}
var x [utf8.UTFMax]byte
utf8.EncodeRune(x[:], zero)
id := system(len(numSysData))
systemMap[ns.Id] = id
numSysData = append(numSysData, systemData{
id: id,
digitSize: byte(sz),
zero: x,
})
}
w.WriteVar("numSysData", numSysData)
algoID := system(len(numSysData))
fmt.Fprintln(w, "const (")
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
id, ok := systemMap[ns.Id]
if !ok {
id = algoID
systemMap[ns.Id] = id
algoID++
}
fmt.Fprintf(w, "num%s = %#x\n", strings.Title(ns.Id), id)
}
fmt.Fprintln(w, "numNumberSystems")
fmt.Fprintln(w, ")")
fmt.Fprintln(w, "var systemMap = map[string]system{")
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
fmt.Fprintf(w, "%q: num%s,\n", ns.Id, strings.Title(ns.Id))
w.Size += len(ns.Id) + 16 + 1 // very coarse approximation
}
fmt.Fprintln(w, "}")
}
func genSymbols(w *gen.CodeWriter, data *cldr.CLDR) {
d, err := cldr.ParseDraft(*draft)
if err != nil {
log.Fatalf("invalid draft level: %v", err)
}
nNumberSystems := system(len(systemMap))
type symbols [NumSymbolTypes]string
type key struct {
tag int // from language.CompactIndex
system system
}
symbolMap := map[key]*symbols{}
defaults := map[int]system{}
for _, lang := range data.Locales() {
ldml := data.RawLDML(lang)
if ldml.Numbers == nil {
continue
}
langIndex, ok := language.CompactIndex(language.MustParse(lang))
if !ok {
log.Fatalf("No compact index for language %s", lang)
}
if d := ldml.Numbers.DefaultNumberingSystem; len(d) > 0 {
defaults[langIndex] = getNumberSystem(d[0].Data())
}
syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
syms.SelectDraft(d)
getFirst := func(name string, x interface{}) string {
v := reflect.ValueOf(x)
slice := cldr.MakeSlice(x)
slice.SelectAnyOf("alt", "", "alt")
if reflect.Indirect(v).Len() == 0 {
return ""
} else if reflect.Indirect(v).Len() > 1 {
log.Fatalf("%s: multiple values of %q within single symbol not supported.", lang, name)
}
return reflect.Indirect(v).Index(0).MethodByName("Data").Call(nil)[0].String()
}
for _, sym := range ldml.Numbers.Symbols {
if sym.NumberSystem == "" {
// This is just linking the default of root to "latn".
continue
}
symbolMap[key{langIndex, getNumberSystem(sym.NumberSystem)}] = &symbols{
SymDecimal: getFirst("decimal", &sym.Decimal),
SymGroup: getFirst("group", &sym.Group),
SymList: getFirst("list", &sym.List),
SymPercentSign: getFirst("percentSign", &sym.PercentSign),
SymPlusSign: getFirst("plusSign", &sym.PlusSign),
SymMinusSign: getFirst("minusSign", &sym.MinusSign),
SymExponential: getFirst("exponential", &sym.Exponential),
SymSuperscriptingExponent: getFirst("superscriptingExponent", &sym.SuperscriptingExponent),
SymPerMille: getFirst("perMille", &sym.PerMille),
SymInfinity: getFirst("infinity", &sym.Infinity),
SymNan: getFirst("nan", &sym.Nan),
SymTimeSeparator: getFirst("timeSeparator", &sym.TimeSeparator),
}
}
}
// Expand all values.
for k, syms := range symbolMap {
for t := SymDecimal; t < NumSymbolTypes; t++ {
p := k.tag
for syms[t] == "" {
p = int(internal.Parent[p])
if pSyms, ok := symbolMap[key{p, k.system}]; ok && (*pSyms)[t] != "" {
syms[t] = (*pSyms)[t]
break
}
if p == 0 /* und */ {
// Default to root, latn.
syms[t] = (*symbolMap[key{}])[t]
}
}
}
}
// Unique the symbol sets and write the string data.
m := map[symbols]int{}
sb := stringset.NewBuilder()
symIndex := [][NumSymbolTypes]byte{}
for ns := system(0); ns < nNumberSystems; ns++ {
for _, l := range data.Locales() {
langIndex, _ := language.CompactIndex(language.MustParse(l))
s := symbolMap[key{langIndex, ns}]
if s == nil {
continue
}
if _, ok := m[*s]; !ok {
m[*s] = len(symIndex)
sb.Add(s[:]...)
var x [NumSymbolTypes]byte
for i := SymDecimal; i < NumSymbolTypes; i++ {
x[i] = byte(sb.Index((*s)[i]))
}
symIndex = append(symIndex, x)
}
}
}
w.WriteVar("symIndex", symIndex)
w.WriteVar("symData", sb.Set())
// resolveSymbolIndex gets the index from the closest matching locale,
// including the locale itself.
resolveSymbolIndex := func(langIndex int, ns system) byte {
for {
if sym := symbolMap[key{langIndex, ns}]; sym != nil {
return byte(m[*sym])
}
if langIndex == 0 {
return 0 // und, latn
}
langIndex = int(internal.Parent[langIndex])
}
}
// Create an index with the symbols for each locale for the latn numbering
// system. If this is not the default, or the only one, for a locale, we
// will overwrite the value later.
var langToDefaults [language.NumCompactTags]byte
for _, l := range data.Locales() {
langIndex, _ := language.CompactIndex(language.MustParse(l))
langToDefaults[langIndex] = resolveSymbolIndex(langIndex, 0)
}
// Delete redundant entries.
for _, l := range data.Locales() {
langIndex, _ := language.CompactIndex(language.MustParse(l))
def := defaults[langIndex]
syms := symbolMap[key{langIndex, def}]
if syms == nil {
continue
}
for ns := system(0); ns < nNumberSystems; ns++ {
if ns == def {
continue
}
if altSyms, ok := symbolMap[key{langIndex, ns}]; ok && *altSyms == *syms {
delete(symbolMap, key{langIndex, ns})
}
}
}
// Create a sorted list of alternatives per language. This will only need to
// be referenced if a user specified an alternative numbering system.
var langToAlt []altSymData
for _, l := range data.Locales() {
langIndex, _ := language.CompactIndex(language.MustParse(l))
start := len(langToAlt)
if start > 0x7F {
log.Fatal("Number of alternative assignments > 0x7F")
}
// Create the entry for the default value.
def := defaults[langIndex]
langToAlt = append(langToAlt, altSymData{
compactTag: uint16(langIndex),
system: def,
symIndex: resolveSymbolIndex(langIndex, def),
})
for ns := system(0); ns < nNumberSystems; ns++ {
if def == ns {
continue
}
if sym := symbolMap[key{langIndex, ns}]; sym != nil {
langToAlt = append(langToAlt, altSymData{
compactTag: uint16(langIndex),
system: ns,
symIndex: resolveSymbolIndex(langIndex, ns),
})
}
}
if def == 0 && len(langToAlt) == start+1 {
// No additional data: erase the entry.
langToAlt = langToAlt[:start]
} else {
// Overwrite the entry in langToDefaults.
langToDefaults[langIndex] = 0x80 | byte(start)
}
}
w.WriteComment(`
langToDefaults maps a compact language index to the default numbering system
and default symbol set`)
w.WriteVar("langToDefaults", langToDefaults)
w.WriteComment(`
langToAlt is a list of numbering system and symbol set pairs, sorted and
marked by compact language index.`)
w.WriteVar("langToAlt", langToAlt)
}
// genFormats generates the lookup table for decimal, scientific and percent
// patterns.
//
// CLDR allows for patterns to be different per language for different numbering
// systems. In practice the patterns are set to be consistent for a language
// independent of the numbering system. genFormats verifies that no language
// deviates from this.
func genFormats(w *gen.CodeWriter, data *cldr.CLDR) {
d, err := cldr.ParseDraft(*draft)
if err != nil {
log.Fatalf("invalid draft level: %v", err)
}
// Fill the first slot with a dummy so we can identify unspecified tags.
formats := []number.Pattern{{}}
patterns := map[string]int{}
// TODO: It would be possible to eliminate two of these slices by having
// another indirection and store a reference to the combination of patterns.
decimal := make([]byte, language.NumCompactTags)
scientific := make([]byte, language.NumCompactTags)
percent := make([]byte, language.NumCompactTags)
for _, lang := range data.Locales() {
ldml := data.RawLDML(lang)
if ldml.Numbers == nil {
continue
}
langIndex, ok := language.CompactIndex(language.MustParse(lang))
if !ok {
log.Fatalf("No compact index for language %s", lang)
}
type patternSlice []*struct {
cldr.Common
Numbers string `xml:"numbers,attr"`
Count string `xml:"count,attr"`
}
add := func(name string, tags []byte, ps patternSlice) {
sl := cldr.MakeSlice(&ps)
sl.SelectDraft(d)
if len(ps) == 0 {
return
}
if len(ps) > 2 || len(ps) == 2 && ps[0] != ps[1] {
log.Fatalf("Inconsistent %d patterns for language %s", name, lang)
}
s := ps[0].Data()
index, ok := patterns[s]
if !ok {
nf, err := number.ParsePattern(s)
if err != nil {
log.Fatal(err)
}
index = len(formats)
patterns[s] = index
formats = append(formats, *nf)
}
tags[langIndex] = byte(index)
}
for _, df := range ldml.Numbers.DecimalFormats {
for _, l := range df.DecimalFormatLength {
if l.Type != "" {
continue
}
for _, f := range l.DecimalFormat {
add("decimal", decimal, f.Pattern)
}
}
}
for _, df := range ldml.Numbers.ScientificFormats {
for _, l := range df.ScientificFormatLength {
if l.Type != "" {
continue
}
for _, f := range l.ScientificFormat {
add("scientific", scientific, f.Pattern)
}
}
}
for _, df := range ldml.Numbers.PercentFormats {
for _, l := range df.PercentFormatLength {
if l.Type != "" {
continue
}
for _, f := range l.PercentFormat {
add("percent", percent, f.Pattern)
}
}
}
}
// Complete the parent tag array to reflect inheritance. An index of 0
// indicates an unspecified value.
for _, data := range [][]byte{decimal, scientific, percent} {
for i := range data {
p := uint16(i)
for ; data[p] == 0; p = internal.Parent[p] {
}
data[i] = data[p]
}
}
w.WriteVar("tagToDecimal", decimal)
w.WriteVar("tagToScientific", scientific)
w.WriteVar("tagToPercent", percent)
value := strings.Replace(fmt.Sprintf("%#v", formats), "number.", "", -1)
// Break up the lines. This won't give ideal perfect formatting, but it is
// better than one huge line.
value = strings.Replace(value, ", ", ",\n", -1)
fmt.Fprintf(w, "var formats = %s\n", value)
}

View file

@ -1,44 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import "unicode/utf8"
// A system identifies a CLDR numbering system.
type system byte
type systemData struct {
id system
digitSize byte // number of UTF-8 bytes per digit
zero [utf8.UTFMax]byte // UTF-8 sequence of zero digit.
}
// A SymbolType identifies a symbol of a specific kind.
type SymbolType int
const (
SymDecimal SymbolType = iota
SymGroup
SymList
SymPercentSign
SymPlusSign
SymMinusSign
SymExponential
SymSuperscriptingExponent
SymPerMille
SymInfinity
SymNan
SymTimeSeparator
NumSymbolTypes
)
type altSymData struct {
compactTag uint16
system system
symIndex byte
}

View file

@ -1,154 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run gen.go gen_common.go
// Package number contains tools and data for formatting numbers.
package number
import (
"unicode/utf8"
"golang.org/x/text/internal"
"golang.org/x/text/language"
)
// Info holds number formatting configuration data.
type Info struct {
system systemData // numbering system information
symIndex byte // index to symbols
}
// InfoFromLangID returns a Info for the given compact language identifier and
// numbering system identifier. If system is the empty string, the default
// numbering system will be taken for that language.
func InfoFromLangID(compactIndex int, numberSystem string) Info {
p := langToDefaults[compactIndex]
// Lookup the entry for the language.
pSymIndex := byte(0) // Default: Latin, default symbols
system, ok := systemMap[numberSystem]
if !ok {
// Take the value for the default numbering system. This is by far the
// most common case as an alternative numbering system is hardly used.
if p&0x80 == 0 {
pSymIndex = p
} else {
// Take the first entry from the alternatives list.
data := langToAlt[p&^0x80]
pSymIndex = data.symIndex
system = data.system
}
} else {
langIndex := compactIndex
ns := system
outerLoop:
for {
if p&0x80 == 0 {
if ns == 0 {
// The index directly points to the symbol data.
pSymIndex = p
break
}
// Move to the parent and retry.
langIndex = int(internal.Parent[langIndex])
}
// The index points to a list of symbol data indexes.
for _, e := range langToAlt[p&^0x80:] {
if int(e.compactTag) != langIndex {
if langIndex == 0 {
// The CLDR root defines full symbol information for all
// numbering systems (even though mostly by means of
// aliases). This means that we will never fall back to
// the default of the language. Also, the loop is
// guaranteed to terminate as a consequence.
ns = numLatn
// Fall back to Latin and start from the original
// language. See
// http://unicode.org/reports/tr35/#Locale_Inheritance.
langIndex = compactIndex
} else {
// Fall back to parent.
langIndex = int(internal.Parent[langIndex])
}
break
}
if e.system == ns {
pSymIndex = e.symIndex
break outerLoop
}
}
}
}
if int(system) >= len(numSysData) { // algorithmic
// Will generate ASCII digits in case the user inadvertently calls
// WriteDigit or Digit on it.
d := numSysData[0]
d.id = system
return Info{
system: d,
symIndex: pSymIndex,
}
}
return Info{
system: numSysData[system],
symIndex: pSymIndex,
}
}
// InfoFromTag returns a Info for the given language tag.
func InfoFromTag(t language.Tag) Info {
for {
if index, ok := language.CompactIndex(t); ok {
return InfoFromLangID(index, t.TypeForKey("nu"))
}
t = t.Parent()
}
}
// IsDecimal reports if the numbering system can convert decimal to native
// symbols one-to-one.
func (n Info) IsDecimal() bool {
return int(n.system.id) < len(numSysData)
}
// WriteDigit writes the UTF-8 sequence for n corresponding to the given ASCII
// digit to dst and reports the number of bytes written. dst must be large
// enough to hold the rune (can be up to utf8.UTFMax bytes).
func (n Info) WriteDigit(dst []byte, asciiDigit rune) int {
copy(dst, n.system.zero[:n.system.digitSize])
dst[n.system.digitSize-1] += byte(asciiDigit - '0')
return int(n.system.digitSize)
}
// AppendDigit appends the UTF-8 sequence for n corresponding to the given digit
// to dst and reports the number of bytes written. dst must be large enough to
// hold the rune (can be up to utf8.UTFMax bytes).
func (n Info) AppendDigit(dst []byte, digit byte) []byte {
dst = append(dst, n.system.zero[:n.system.digitSize]...)
dst[len(dst)-1] += digit
return dst
}
// Digit returns the digit for the numbering system for the corresponding ASCII
// value. For example, ni.Digit('3') could return '三'. Note that the argument
// is the rune constant '3', which equals 51, not the integer constant 3.
func (n Info) Digit(asciiDigit rune) rune {
var x [utf8.UTFMax]byte
n.WriteDigit(x[:], asciiDigit)
r, _ := utf8.DecodeRune(x[:])
return r
}
// Symbol returns the string for the given symbol type.
func (n Info) Symbol(t SymbolType) string {
return symData.Elem(int(symIndex[n.symIndex][t]))
}
func formatForLang(t language.Tag, index []byte) *Pattern {
for ; ; t = t.Parent() {
if x, ok := language.CompactIndex(t); ok {
return &formats[index[x]]
}
}
}

View file

@ -1,100 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"fmt"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
)
func TestInfo(t *testing.T) {
testCases := []struct {
lang string
sym SymbolType
wantSym string
wantNine rune
}{
{"und", SymDecimal, ".", '9'},
{"de", SymGroup, ".", '9'},
{"de-BE", SymGroup, ".", '9'}, // inherits from de (no number data in CLDR)
{"de-BE-oxendict", SymGroup, ".", '9'}, // inherits from de (no compact index)
// U+096F DEVANAGARI DIGIT NINE ('९')
{"de-BE-u-nu-deva", SymGroup, ".", '\u096f'}, // miss -> latn -> de
{"de-Cyrl-BE", SymGroup, ",", '9'}, // inherits from root
{"de-CH", SymGroup, "", '9'}, // overrides values in de
{"de-CH-oxendict", SymGroup, "", '9'}, // inherits from de-CH (no compact index)
{"de-CH-u-nu-deva", SymGroup, "", '\u096f'}, // miss -> latn -> de-CH
{"pa", SymExponential, "E", '9'},
// "×۱۰^" -> U+00d7 U+06f1 U+06f0^"
// U+06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
// U+06F1 EXTENDED ARABIC-INDIC DIGIT ONE
// U+06F9 EXTENDED ARABIC-INDIC DIGIT NINE
{"pa-u-nu-arabext", SymExponential, "\u00d7\u06f1\u06f0^", '\u06f9'},
// "གྲངས་མེད" - > U+0f42 U+0fb2 U+0f44 U+0f66 U+0f0b U+0f58 U+0f7a U+0f51
// Examples:
// U+0F29 TIBETAN DIGIT NINE (༩)
{"dz", SymInfinity, "\u0f42\u0fb2\u0f44\u0f66\u0f0b\u0f58\u0f7a\u0f51", '\u0f29'}, // defaults to tibt
{"dz-u-nu-latn", SymInfinity, "∞", '9'}, // select alternative
{"dz-u-nu-tibt", SymInfinity, "\u0f42\u0fb2\u0f44\u0f66\u0f0b\u0f58\u0f7a\u0f51", '\u0f29'},
{"en-u-nu-tibt", SymInfinity, "∞", '\u0f29'},
// algorithmic number systems fall back to ASCII if Digits is used.
{"en-u-nu-hanidec", SymPlusSign, "+", '9'},
{"en-u-nu-roman", SymPlusSign, "+", '9'},
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("%s:%v", tc.lang, tc.sym), func(t *testing.T) {
info := InfoFromTag(language.MustParse(tc.lang))
if got := info.Symbol(tc.sym); got != tc.wantSym {
t.Errorf("sym: got %q; want %q", got, tc.wantSym)
}
if got := info.Digit('9'); got != tc.wantNine {
t.Errorf("Digit(9): got %+q; want %+q", got, tc.wantNine)
}
var buf [4]byte
if got := string(buf[:info.WriteDigit(buf[:], '9')]); got != string(tc.wantNine) {
t.Errorf("WriteDigit(9): got %+q; want %+q", got, tc.wantNine)
}
if got := string(info.AppendDigit([]byte{}, 9)); got != string(tc.wantNine) {
t.Errorf("AppendDigit(9): got %+q; want %+q", got, tc.wantNine)
}
})
}
}
func TestFormats(t *testing.T) {
testCases := []struct {
lang string
pattern string
index []byte
}{
{"en", "#,##0.###", tagToDecimal},
{"de", "#,##0.###", tagToDecimal},
{"de-CH", "#,##0.###", tagToDecimal},
{"pa", "#,##,##0.###", tagToDecimal},
{"pa-Arab", "#,##0.###", tagToDecimal}, // Does NOT inherit from pa!
{"mr", "#,##,##0.###", tagToDecimal},
{"mr-IN", "#,##,##0.###", tagToDecimal}, // Inherits from mr.
{"nl", "#E0", tagToScientific},
{"nl-MX", "#E0", tagToScientific}, // Inherits through Tag.Parent.
{"zgh", "#,##0 %", tagToPercent},
}
for _, tc := range testCases {
testtext.Run(t, tc.lang, func(t *testing.T) {
got := formatForLang(language.MustParse(tc.lang), tc.index)
want, _ := ParsePattern(tc.pattern)
if *got != *want {
t.Errorf("\ngot %#v;\nwant %#v", got, want)
}
})
}
}

View file

@ -1,485 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"errors"
"unicode/utf8"
)
// This file contains a parser for the CLDR number patterns as described in
// http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
//
// The following BNF is derived from this standard.
//
// pattern := subpattern (';' subpattern)?
// subpattern := affix? number exponent? affix?
// number := decimal | sigDigits
// decimal := '#'* '0'* ('.' fraction)? | '#' | '0'
// fraction := '0'* '#'*
// sigDigits := '#'* '@' '@'* '#'*
// exponent := 'E' '+'? '0'* '0'
// padSpec := '*' \L
//
// Notes:
// - An affix pattern may contain any runes, but runes with special meaning
// should be escaped.
// - Sequences of digits, '#', and '@' in decimal and sigDigits may have
// interstitial commas.
// TODO: replace special characters in affixes (-, +, ¤) with control codes.
// Pattern holds information for formatting numbers. It is designed to hold
// information from CLDR number patterns.
//
// This pattern is precompiled for all patterns for all languages. Even though
// the number of patterns is not very large, we want to keep this small.
//
// This type is only intended for internal use.
type Pattern struct {
RoundingContext
Affix string // includes prefix and suffix. First byte is prefix length.
Offset uint16 // Offset into Affix for prefix and suffix
NegOffset uint16 // Offset into Affix for negative prefix and suffix or 0.
PadRune rune
FormatWidth uint16
GroupingSize [2]uint8
Flags PatternFlag
}
// A RoundingContext indicates how a number should be converted to digits.
// It contains all information needed to determine the "visible digits" as
// required by the pluralization rules.
type RoundingContext struct {
// TODO: unify these two fields so that there is a more unambiguous meaning
// of how precision is handled.
MaxSignificantDigits int16 // -1 is unlimited
MaxFractionDigits int16 // -1 is unlimited
Increment uint32
IncrementScale uint8 // May differ from printed scale.
Mode RoundingMode
DigitShift uint8 // Number of decimals to shift. Used for % and ‰.
// Number of digits.
MinIntegerDigits uint8
MaxIntegerDigits uint8
MinFractionDigits uint8
MinSignificantDigits uint8
MinExponentDigits uint8
}
// RoundSignificantDigits returns the number of significant digits an
// implementation of Convert may round to or n < 0 if there is no maximum or
// a maximum is not recommended.
func (r *RoundingContext) RoundSignificantDigits() (n int) {
if r.MaxFractionDigits == 0 && r.MaxSignificantDigits > 0 {
return int(r.MaxSignificantDigits)
} else if r.isScientific() && r.MaxIntegerDigits == 1 {
if r.MaxSignificantDigits == 0 ||
int(r.MaxFractionDigits+1) == int(r.MaxSignificantDigits) {
// Note: don't add DigitShift: it is only used for decimals.
return int(r.MaxFractionDigits) + 1
}
}
return -1
}
// RoundFractionDigits returns the number of fraction digits an implementation
// of Convert may round to or n < 0 if there is no maximum or a maximum is not
// recommended.
func (r *RoundingContext) RoundFractionDigits() (n int) {
if r.MinExponentDigits == 0 &&
r.MaxSignificantDigits == 0 &&
r.MaxFractionDigits >= 0 {
return int(r.MaxFractionDigits) + int(r.DigitShift)
}
return -1
}
// SetScale fixes the RoundingContext to a fixed number of fraction digits.
func (r *RoundingContext) SetScale(scale int) {
r.MinFractionDigits = uint8(scale)
r.MaxFractionDigits = int16(scale)
}
func (r *RoundingContext) SetPrecision(prec int) {
r.MaxSignificantDigits = int16(prec)
}
func (r *RoundingContext) isScientific() bool {
return r.MinExponentDigits > 0
}
func (f *Pattern) needsSep(pos int) bool {
p := pos - 1
size := int(f.GroupingSize[0])
if size == 0 || p == 0 {
return false
}
if p == size {
return true
}
if p -= size; p < 0 {
return false
}
// TODO: make second groupingsize the same as first if 0 so that we can
// avoid this check.
if x := int(f.GroupingSize[1]); x != 0 {
size = x
}
return p%size == 0
}
// A PatternFlag is a bit mask for the flag field of a Pattern.
type PatternFlag uint8
const (
AlwaysSign PatternFlag = 1 << iota
ElideSign // Use space instead of plus sign. AlwaysSign must be true.
AlwaysExpSign
AlwaysDecimalSeparator
ParenthesisForNegative // Common pattern. Saves space.
PadAfterNumber
PadAfterAffix
PadBeforePrefix = 0 // Default
PadAfterPrefix = PadAfterAffix
PadBeforeSuffix = PadAfterNumber
PadAfterSuffix = PadAfterNumber | PadAfterAffix
PadMask = PadAfterNumber | PadAfterAffix
)
type parser struct {
*Pattern
leadingSharps int
pos int
err error
doNotTerminate bool
groupingCount uint
hasGroup bool
buf []byte
}
func (p *parser) setError(err error) {
if p.err == nil {
p.err = err
}
}
func (p *parser) updateGrouping() {
if p.hasGroup &&
0 < p.groupingCount && p.groupingCount < 255 {
p.GroupingSize[1] = p.GroupingSize[0]
p.GroupingSize[0] = uint8(p.groupingCount)
}
p.groupingCount = 0
p.hasGroup = true
}
var (
// TODO: more sensible and localizeable error messages.
errMultiplePadSpecifiers = errors.New("format: pattern has multiple pad specifiers")
errInvalidPadSpecifier = errors.New("format: invalid pad specifier")
errInvalidQuote = errors.New("format: invalid quote")
errAffixTooLarge = errors.New("format: prefix or suffix exceeds maximum UTF-8 length of 256 bytes")
errDuplicatePercentSign = errors.New("format: duplicate percent sign")
errDuplicatePermilleSign = errors.New("format: duplicate permille sign")
errUnexpectedEnd = errors.New("format: unexpected end of pattern")
)
// ParsePattern extracts formatting information from a CLDR number pattern.
//
// See http://unicode.org/reports/tr35/tr35-numbers.html#Number_Format_Patterns.
func ParsePattern(s string) (f *Pattern, err error) {
p := parser{Pattern: &Pattern{}}
s = p.parseSubPattern(s)
if s != "" {
// Parse negative sub pattern.
if s[0] != ';' {
p.setError(errors.New("format: error parsing first sub pattern"))
return nil, p.err
}
neg := parser{Pattern: &Pattern{}} // just for extracting the affixes.
s = neg.parseSubPattern(s[len(";"):])
p.NegOffset = uint16(len(p.buf))
p.buf = append(p.buf, neg.buf...)
}
if s != "" {
p.setError(errors.New("format: spurious characters at end of pattern"))
}
if p.err != nil {
return nil, p.err
}
if affix := string(p.buf); affix == "\x00\x00" || affix == "\x00\x00\x00\x00" {
// No prefix or suffixes.
p.NegOffset = 0
} else {
p.Affix = affix
}
if p.Increment == 0 {
p.IncrementScale = 0
}
return p.Pattern, nil
}
func (p *parser) parseSubPattern(s string) string {
s = p.parsePad(s, PadBeforePrefix)
s = p.parseAffix(s)
s = p.parsePad(s, PadAfterPrefix)
s = p.parse(p.number, s)
p.updateGrouping()
s = p.parsePad(s, PadBeforeSuffix)
s = p.parseAffix(s)
s = p.parsePad(s, PadAfterSuffix)
return s
}
func (p *parser) parsePad(s string, f PatternFlag) (tail string) {
if len(s) >= 2 && s[0] == '*' {
r, sz := utf8.DecodeRuneInString(s[1:])
if p.PadRune != 0 {
p.err = errMultiplePadSpecifiers
} else {
p.Flags |= f
p.PadRune = r
}
return s[1+sz:]
}
return s
}
func (p *parser) parseAffix(s string) string {
x := len(p.buf)
p.buf = append(p.buf, 0) // placeholder for affix length
s = p.parse(p.affix, s)
n := len(p.buf) - x - 1
if n > 0xFF {
p.setError(errAffixTooLarge)
}
p.buf[x] = uint8(n)
return s
}
// state implements a state transition. It returns the new state. A state
// function may set an error on the parser or may simply return on an incorrect
// token and let the next phase fail.
type state func(r rune) state
// parse repeatedly applies a state function on the given string until a
// termination condition is reached.
func (p *parser) parse(fn state, s string) (tail string) {
for i, r := range s {
p.doNotTerminate = false
if fn = fn(r); fn == nil || p.err != nil {
return s[i:]
}
p.FormatWidth++
}
if p.doNotTerminate {
p.setError(errUnexpectedEnd)
}
return ""
}
func (p *parser) affix(r rune) state {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'#', '@', '.', '*', ',', ';':
return nil
case '\'':
p.FormatWidth--
return p.escapeFirst
case '%':
if p.DigitShift != 0 {
p.setError(errDuplicatePercentSign)
}
p.DigitShift = 2
case '\u2030': // ‰ Per mille
if p.DigitShift != 0 {
p.setError(errDuplicatePermilleSign)
}
p.DigitShift = 3
// TODO: handle currency somehow: ¤, ¤¤, ¤¤¤, ¤¤¤¤
}
p.buf = append(p.buf, string(r)...)
return p.affix
}
func (p *parser) escapeFirst(r rune) state {
switch r {
case '\'':
p.buf = append(p.buf, "\\'"...)
return p.affix
default:
p.buf = append(p.buf, '\'')
p.buf = append(p.buf, string(r)...)
}
return p.escape
}
func (p *parser) escape(r rune) state {
switch r {
case '\'':
p.FormatWidth--
p.buf = append(p.buf, '\'')
return p.affix
default:
p.buf = append(p.buf, string(r)...)
}
return p.escape
}
// number parses a number. The BNF says the integer part should always have
// a '0', but that does not appear to be the case according to the rest of the
// documentation. We will allow having only '#' numbers.
func (p *parser) number(r rune) state {
switch r {
case '#':
p.groupingCount++
p.leadingSharps++
case '@':
p.groupingCount++
p.leadingSharps = 0
p.MaxFractionDigits = -1
return p.sigDigits(r)
case ',':
if p.leadingSharps == 0 { // no leading commas
return nil
}
p.updateGrouping()
case 'E':
p.MaxIntegerDigits = uint8(p.leadingSharps)
return p.exponent
case '.': // allow ".##" etc.
p.updateGrouping()
return p.fraction
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return p.integer(r)
default:
return nil
}
return p.number
}
func (p *parser) integer(r rune) state {
if !('0' <= r && r <= '9') {
var next state
switch r {
case 'E':
if p.leadingSharps > 0 {
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
}
next = p.exponent
case '.':
next = p.fraction
case ',':
next = p.integer
}
p.updateGrouping()
return next
}
p.Increment = p.Increment*10 + uint32(r-'0')
p.groupingCount++
p.MinIntegerDigits++
return p.integer
}
func (p *parser) sigDigits(r rune) state {
switch r {
case '@':
p.groupingCount++
p.MaxSignificantDigits++
p.MinSignificantDigits++
case '#':
return p.sigDigitsFinal(r)
case 'E':
p.updateGrouping()
return p.normalizeSigDigitsWithExponent()
default:
p.updateGrouping()
return nil
}
return p.sigDigits
}
func (p *parser) sigDigitsFinal(r rune) state {
switch r {
case '#':
p.groupingCount++
p.MaxSignificantDigits++
case 'E':
p.updateGrouping()
return p.normalizeSigDigitsWithExponent()
default:
p.updateGrouping()
return nil
}
return p.sigDigitsFinal
}
func (p *parser) normalizeSigDigitsWithExponent() state {
p.MinIntegerDigits, p.MaxIntegerDigits = 1, 1
p.MinFractionDigits = p.MinSignificantDigits - 1
p.MaxFractionDigits = p.MaxSignificantDigits - 1
p.MinSignificantDigits, p.MaxSignificantDigits = 0, 0
return p.exponent
}
func (p *parser) fraction(r rune) state {
switch r {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
p.Increment = p.Increment*10 + uint32(r-'0')
p.IncrementScale++
p.MinFractionDigits++
p.MaxFractionDigits++
case '#':
p.MaxFractionDigits++
case 'E':
if p.leadingSharps > 0 {
p.MaxIntegerDigits = uint8(p.leadingSharps) + p.MinIntegerDigits
}
return p.exponent
default:
return nil
}
return p.fraction
}
func (p *parser) exponent(r rune) state {
switch r {
case '+':
// Set mode and check it wasn't already set.
if p.Flags&AlwaysExpSign != 0 || p.MinExponentDigits > 0 {
break
}
p.Flags |= AlwaysExpSign
p.doNotTerminate = true
return p.exponent
case '0':
p.MinExponentDigits++
return p.exponent
}
// termination condition
if p.MinExponentDigits == 0 {
p.setError(errors.New("format: need at least one digit"))
}
return nil
}

View file

@ -1,438 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"reflect"
"testing"
"unsafe"
)
var testCases = []struct {
pat string
want *Pattern
}{{
"#",
&Pattern{
FormatWidth: 1,
// TODO: Should MinIntegerDigits be 1?
},
}, {
"0",
&Pattern{
FormatWidth: 1,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
},
},
}, {
"+0",
&Pattern{
Affix: "\x01+\x00",
FormatWidth: 2,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
},
},
}, {
"0+",
&Pattern{
Affix: "\x00\x01+",
FormatWidth: 2,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
},
},
}, {
"0000",
&Pattern{
FormatWidth: 4,
RoundingContext: RoundingContext{
MinIntegerDigits: 4,
},
},
}, {
".#",
&Pattern{
FormatWidth: 2,
RoundingContext: RoundingContext{
MaxFractionDigits: 1,
},
},
}, {
"#0.###",
&Pattern{
FormatWidth: 6,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxFractionDigits: 3,
},
},
}, {
"#0.######",
&Pattern{
FormatWidth: 9,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxFractionDigits: 6,
},
},
}, {
"#,0",
&Pattern{
FormatWidth: 3,
GroupingSize: [2]uint8{1, 0},
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
},
},
}, {
"#,0.00",
&Pattern{
FormatWidth: 6,
GroupingSize: [2]uint8{1, 0},
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MinFractionDigits: 2,
MaxFractionDigits: 2,
},
},
}, {
"#,##0.###",
&Pattern{
FormatWidth: 9,
GroupingSize: [2]uint8{3, 0},
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxFractionDigits: 3,
},
},
}, {
"#,##,##0.###",
&Pattern{
FormatWidth: 12,
GroupingSize: [2]uint8{3, 2},
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxFractionDigits: 3,
},
},
}, {
// Ignore additional separators.
"#,####,##,##0.###",
&Pattern{
FormatWidth: 17,
GroupingSize: [2]uint8{3, 2},
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxFractionDigits: 3,
},
},
}, {
"#E0",
&Pattern{
FormatWidth: 3,
RoundingContext: RoundingContext{
MaxIntegerDigits: 1,
MinExponentDigits: 1,
},
},
}, {
// At least one exponent digit is required. As long as this is true, one can
// determine that scientific rendering is needed if MinExponentDigits > 0.
"#E#",
nil,
}, {
"0E0",
&Pattern{
FormatWidth: 3,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MinExponentDigits: 1,
},
},
}, {
"##0.###E00",
&Pattern{
FormatWidth: 10,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxIntegerDigits: 3,
MaxFractionDigits: 3,
MinExponentDigits: 2,
},
},
}, {
"##00.0#E0",
&Pattern{
FormatWidth: 9,
RoundingContext: RoundingContext{
MinIntegerDigits: 2,
MaxIntegerDigits: 4,
MinFractionDigits: 1,
MaxFractionDigits: 2,
MinExponentDigits: 1,
},
},
}, {
"#00.0E+0",
&Pattern{
FormatWidth: 8,
Flags: AlwaysExpSign,
RoundingContext: RoundingContext{
MinIntegerDigits: 2,
MaxIntegerDigits: 3,
MinFractionDigits: 1,
MaxFractionDigits: 1,
MinExponentDigits: 1,
},
},
}, {
"0.0E++0",
nil,
}, {
"#0E+",
nil,
}, {
// significant digits
"@",
&Pattern{
FormatWidth: 1,
RoundingContext: RoundingContext{
MinSignificantDigits: 1,
MaxSignificantDigits: 1,
MaxFractionDigits: -1,
},
},
}, {
// significant digits
"@@@@",
&Pattern{
FormatWidth: 4,
RoundingContext: RoundingContext{
MinSignificantDigits: 4,
MaxSignificantDigits: 4,
MaxFractionDigits: -1,
},
},
}, {
"@###",
&Pattern{
FormatWidth: 4,
RoundingContext: RoundingContext{
MinSignificantDigits: 1,
MaxSignificantDigits: 4,
MaxFractionDigits: -1,
},
},
}, {
// Exponents in significant digits mode gets normalized.
"@@E0",
&Pattern{
FormatWidth: 4,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxIntegerDigits: 1,
MinFractionDigits: 1,
MaxFractionDigits: 1,
MinExponentDigits: 1,
},
},
}, {
"@###E00",
&Pattern{
FormatWidth: 7,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxIntegerDigits: 1,
MinFractionDigits: 0,
MaxFractionDigits: 3,
MinExponentDigits: 2,
},
},
}, {
// The significant digits mode does not allow fractions.
"@###.#E0",
nil,
}, {
//alternative negative pattern
"#0.###;(#0.###)",
&Pattern{
Affix: "\x00\x00\x01(\x01)",
NegOffset: 2,
FormatWidth: 6,
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MaxFractionDigits: 3,
},
},
}, {
// Rounding increment
"1.05",
&Pattern{
FormatWidth: 4,
RoundingContext: RoundingContext{
Increment: 105,
IncrementScale: 2,
MinIntegerDigits: 1,
MinFractionDigits: 2,
MaxFractionDigits: 2,
},
},
}, {
// Rounding increment with grouping
"1,05",
&Pattern{
FormatWidth: 4,
GroupingSize: [2]uint8{2, 0},
RoundingContext: RoundingContext{
Increment: 105,
IncrementScale: 0,
MinIntegerDigits: 3,
MinFractionDigits: 0,
MaxFractionDigits: 0,
},
},
}, {
"0.0%",
&Pattern{
Affix: "\x00\x01%",
FormatWidth: 4,
RoundingContext: RoundingContext{
DigitShift: 2,
MinIntegerDigits: 1,
MinFractionDigits: 1,
MaxFractionDigits: 1,
},
},
}, {
"0.0‰",
&Pattern{
Affix: "\x00\x03‰",
FormatWidth: 4,
RoundingContext: RoundingContext{
DigitShift: 3,
MinIntegerDigits: 1,
MinFractionDigits: 1,
MaxFractionDigits: 1,
},
},
}, {
"#,##0.00¤",
&Pattern{
Affix: "\x00\x02¤",
FormatWidth: 9,
GroupingSize: [2]uint8{3, 0},
RoundingContext: RoundingContext{
MinIntegerDigits: 1,
MinFractionDigits: 2,
MaxFractionDigits: 2,
},
},
}, {
"#,##0.00 ¤;(#,##0.00 ¤)",
&Pattern{Affix: "\x00\x04\u00a0¤\x01(\x05\u00a0¤)",
NegOffset: 6,
FormatWidth: 10,
GroupingSize: [2]uint8{3, 0},
RoundingContext: RoundingContext{
DigitShift: 0,
MinIntegerDigits: 1,
MinFractionDigits: 2,
MaxFractionDigits: 2,
},
},
}, {
// padding
"*x#",
&Pattern{
PadRune: 'x',
FormatWidth: 1,
},
}, {
// padding
"#*x",
&Pattern{
PadRune: 'x',
FormatWidth: 1,
Flags: PadBeforeSuffix,
},
}, {
"*xpre#suf",
&Pattern{
Affix: "\x03pre\x03suf",
PadRune: 'x',
FormatWidth: 7,
},
}, {
"pre*x#suf",
&Pattern{
Affix: "\x03pre\x03suf",
PadRune: 'x',
FormatWidth: 7,
Flags: PadAfterPrefix,
},
}, {
"pre#*xsuf",
&Pattern{
Affix: "\x03pre\x03suf",
PadRune: 'x',
FormatWidth: 7,
Flags: PadBeforeSuffix,
},
}, {
"pre#suf*x",
&Pattern{
Affix: "\x03pre\x03suf",
PadRune: 'x',
FormatWidth: 7,
Flags: PadAfterSuffix,
},
}, {
`* #0 o''clock`,
&Pattern{Affix: "\x00\x09 o\\'clock",
FormatWidth: 10,
PadRune: 32,
RoundingContext: RoundingContext{
MinIntegerDigits: 0x1,
},
},
}, {
`'123'* #0'456'`,
&Pattern{Affix: "\x05'123'\x05'456'",
FormatWidth: 8,
PadRune: 32,
RoundingContext: RoundingContext{
MinIntegerDigits: 0x1,
},
Flags: PadAfterPrefix},
}, {
// no duplicate padding
"*xpre#suf*x", nil,
}, {
// no duplicate padding
"*xpre#suf*x", nil,
}}
func TestParsePattern(t *testing.T) {
for i, tc := range testCases {
t.Run(tc.pat, func(t *testing.T) {
f, err := ParsePattern(tc.pat)
if !reflect.DeepEqual(f, tc.want) {
t.Errorf("%d:%s:\ngot %#v;\nwant %#v", i, tc.pat, f, tc.want)
}
if got, want := err != nil, tc.want == nil; got != want {
t.Errorf("%d:%s:error: got %v; want %v", i, tc.pat, err, want)
}
})
}
}
func TestPatternSize(t *testing.T) {
if sz := unsafe.Sizeof(Pattern{}); sz > 56 {
t.Errorf("got %d; want <= 56", sz)
}
}

View file

@ -1,16 +0,0 @@
// Code generated by "stringer -type RoundingMode"; DO NOT EDIT.
package number
import "fmt"
const _RoundingMode_name = "ToNearestEvenToNearestZeroToNearestAwayToPositiveInfToNegativeInfToZeroAwayFromZeronumModes"
var _RoundingMode_index = [...]uint8{0, 13, 26, 39, 52, 65, 71, 83, 91}
func (i RoundingMode) String() string {
if i >= RoundingMode(len(_RoundingMode_index)-1) {
return fmt.Sprintf("RoundingMode(%d)", i)
}
return _RoundingMode_name[_RoundingMode_index[i]:_RoundingMode_index[i+1]]
}

File diff suppressed because it is too large Load diff

View file

@ -1,125 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package number
import (
"flag"
"log"
"reflect"
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/unicode/cldr"
)
var draft = flag.String("draft",
"contributed",
`Minimal draft requirements (approved, contributed, provisional, unconfirmed).`)
func TestNumberSystems(t *testing.T) {
testtext.SkipIfNotLong(t)
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("supplemental")
d.SetSectionFilter("numberingSystem")
data, err := d.DecodeZip(r)
if err != nil {
t.Fatalf("DecodeZip: %v", err)
}
for _, ns := range data.Supplemental().NumberingSystems.NumberingSystem {
n := systemMap[ns.Id]
if int(n) >= len(numSysData) {
continue
}
info := InfoFromLangID(0, ns.Id)
val := '0'
for _, rWant := range ns.Digits {
if rGot := info.Digit(val); rGot != rWant {
t.Errorf("%s:%d: got %U; want %U", ns.Id, val, rGot, rWant)
}
val++
}
}
}
func TestSymbols(t *testing.T) {
testtext.SkipIfNotLong(t)
draft, err := cldr.ParseDraft(*draft)
if err != nil {
log.Fatalf("invalid draft level: %v", err)
}
r := gen.OpenCLDRCoreZip()
defer r.Close()
d := &cldr.Decoder{}
d.SetDirFilter("main")
d.SetSectionFilter("numbers")
data, err := d.DecodeZip(r)
if err != nil {
t.Fatalf("DecodeZip: %v", err)
}
for _, lang := range data.Locales() {
ldml := data.RawLDML(lang)
if ldml.Numbers == nil {
continue
}
langIndex, ok := language.CompactIndex(language.MustParse(lang))
if !ok {
t.Fatalf("No compact index for language %s", lang)
}
syms := cldr.MakeSlice(&ldml.Numbers.Symbols)
syms.SelectDraft(draft)
for _, sym := range ldml.Numbers.Symbols {
if sym.NumberSystem == "" {
continue
}
testCases := []struct {
name string
st SymbolType
x interface{}
}{
{"Decimal", SymDecimal, sym.Decimal},
{"Group", SymGroup, sym.Group},
{"List", SymList, sym.List},
{"PercentSign", SymPercentSign, sym.PercentSign},
{"PlusSign", SymPlusSign, sym.PlusSign},
{"MinusSign", SymMinusSign, sym.MinusSign},
{"Exponential", SymExponential, sym.Exponential},
{"SuperscriptingExponent", SymSuperscriptingExponent, sym.SuperscriptingExponent},
{"PerMille", SymPerMille, sym.PerMille},
{"Infinity", SymInfinity, sym.Infinity},
{"NaN", SymNan, sym.Nan},
{"TimeSeparator", SymTimeSeparator, sym.TimeSeparator},
}
info := InfoFromLangID(langIndex, sym.NumberSystem)
for _, tc := range testCases {
// Extract the wanted value.
v := reflect.ValueOf(tc.x)
if v.Len() == 0 {
return
}
if v.Len() > 1 {
t.Fatalf("Multiple values of %q within single symbol not supported.", tc.name)
}
want := v.Index(0).MethodByName("Data").Call(nil)[0].String()
got := info.Symbol(tc.st)
if got != want {
t.Errorf("%s:%s:%s: got %q; want %q", lang, sym.NumberSystem, tc.name, got, want)
}
}
}
}
}

View file

@ -1,86 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package stringset provides a way to represent a collection of strings
// compactly.
package stringset
import "sort"
// A Set holds a collection of strings that can be looked up by an index number.
type Set struct {
// These fields are exported to allow for code generation.
Data string
Index []uint16
}
// Elem returns the string with index i. It panics if i is out of range.
func (s *Set) Elem(i int) string {
return s.Data[s.Index[i]:s.Index[i+1]]
}
// Len returns the number of strings in the set.
func (s *Set) Len() int {
return len(s.Index) - 1
}
// Search returns the index of the given string or -1 if it is not in the set.
// The Set must have been created with strings in sorted order.
func Search(s *Set, str string) int {
// TODO: optimize this if it gets used a lot.
n := len(s.Index) - 1
p := sort.Search(n, func(i int) bool {
return s.Elem(i) >= str
})
if p == n || str != s.Elem(p) {
return -1
}
return p
}
// A Builder constructs Sets.
type Builder struct {
set Set
index map[string]int
}
// NewBuilder returns a new and initialized Builder.
func NewBuilder() *Builder {
return &Builder{
set: Set{
Index: []uint16{0},
},
index: map[string]int{},
}
}
// Set creates the set created so far.
func (b *Builder) Set() Set {
return b.set
}
// Index returns the index for the given string, which must have been added
// before.
func (b *Builder) Index(s string) int {
return b.index[s]
}
// Add adds a string to the index. Strings that are added by a single Add will
// be stored together, unless they match an existing string.
func (b *Builder) Add(ss ...string) {
// First check if the string already exists.
for _, s := range ss {
if _, ok := b.index[s]; ok {
continue
}
b.index[s] = len(b.set.Index) - 1
b.set.Data += s
x := len(b.set.Data)
if x > 0xFFFF {
panic("Index too > 0xFFFF")
}
b.set.Index = append(b.set.Index, uint16(x))
}
}

View file

@ -1,53 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package stringset
import "testing"
func TestStringSet(t *testing.T) {
testCases := [][]string{
{""},
{"∫"},
{"a", "b", "c"},
{"", "a", "bb", "ccc"},
{" ", "aaa", "bb", "c"},
}
test := func(tc int, b *Builder) {
set := b.Set()
if set.Len() != len(testCases[tc]) {
t.Errorf("%d:Len() = %d; want %d", tc, set.Len(), len(testCases[tc]))
}
for i, s := range testCases[tc] {
if x := b.Index(s); x != i {
t.Errorf("%d:Index(%q) = %d; want %d", tc, s, x, i)
}
if p := Search(&set, s); p != i {
t.Errorf("%d:Search(%q) = %d; want %d", tc, s, p, i)
}
if set.Elem(i) != s {
t.Errorf("%d:Elem(%d) = %s; want %s", tc, i, set.Elem(i), s)
}
}
if p := Search(&set, "apple"); p != -1 {
t.Errorf(`%d:Search("apple") = %d; want -1`, tc, p)
}
}
for i, tc := range testCases {
b := NewBuilder()
for _, s := range tc {
b.Add(s)
}
b.Add(tc...)
test(i, b)
}
for i, tc := range testCases {
b := NewBuilder()
b.Add(tc...)
for _, s := range tc {
b.Add(s)
}
test(i, b)
}
}

View file

@ -1,100 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tag contains functionality handling tags and related data.
package tag // import "golang.org/x/text/internal/tag"
import "sort"
// An Index converts tags to a compact numeric value.
//
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
// be used to store additional information about the tag.
type Index string
// Elem returns the element data at the given index.
func (s Index) Elem(x int) string {
return string(s[x*4 : x*4+4])
}
// Index reports the index of the given key or -1 if it could not be found.
// Only the first len(key) bytes from the start of the 4-byte entries will be
// considered for the search and the first match in Index will be returned.
func (s Index) Index(key []byte) int {
n := len(key)
// search the index of the first entry with an equal or higher value than
// key in s.
index := sort.Search(len(s)/4, func(i int) bool {
return cmp(s[i*4:i*4+n], key) != -1
})
i := index * 4
if cmp(s[i:i+len(key)], key) != 0 {
return -1
}
return index
}
// Next finds the next occurrence of key after index x, which must have been
// obtained from a call to Index using the same key. It returns x+1 or -1.
func (s Index) Next(key []byte, x int) int {
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
return x
}
return -1
}
// cmp returns an integer comparing a and b lexicographically.
func cmp(a Index, b []byte) int {
n := len(a)
if len(b) < n {
n = len(b)
}
for i, c := range b[:n] {
switch {
case a[i] > c:
return 1
case a[i] < c:
return -1
}
}
switch {
case len(a) < len(b):
return -1
case len(a) > len(b):
return 1
}
return 0
}
// Compare returns an integer comparing a and b lexicographically.
func Compare(a string, b []byte) int {
return cmp(Index(a), b)
}
// FixCase reformats b to the same pattern of cases as form.
// If returns false if string b is malformed.
func FixCase(form string, b []byte) bool {
if len(form) != len(b) {
return false
}
for i, c := range b {
if form[i] <= 'Z' {
if c >= 'a' {
c -= 'z' - 'Z'
}
if c < 'A' || 'Z' < c {
return false
}
} else {
if c <= 'Z' {
c += 'z' - 'Z'
}
if c < 'a' || 'z' < c {
return false
}
}
b[i] = c
}
return true
}

View file

@ -1,67 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tag
import (
"strings"
"testing"
)
var strdata = []string{
"aa ",
"aaa ",
"aaaa",
"aaab",
"aab ",
"ab ",
"ba ",
"xxxx",
"\xff\xff\xff\xff",
}
var testCases = map[string]int{
"a": 0,
"aa": 0,
"aaa": 1,
"aa ": 0,
"aaaa": 2,
"aaab": 3,
"b": 6,
"ba": 6,
" ": -1,
"aaax": -1,
"bbbb": -1,
"zzzz": -1,
}
func TestIndex(t *testing.T) {
index := Index(strings.Join(strdata, ""))
for k, v := range testCases {
if i := index.Index([]byte(k)); i != v {
t.Errorf("%s: got %d; want %d", k, i, v)
}
}
}
func TestFixCase(t *testing.T) {
tests := []string{
"aaaa", "AbCD", "abcd",
"Zzzz", "AbCD", "Abcd",
"Zzzz", "AbC", "",
"XXX", "ab ", "",
"XXX", "usd", "USD",
"cmn", "AB ", "",
"gsw", "CMN", "cmn",
}
for tc := tests; len(tc) > 0; tc = tc[3:] {
b := []byte(tc[1])
if !FixCase(tc[0], b) {
b = nil
}
if string(b) != tc[2] {
t.Errorf("FixCase(%q, %q) = %q; want %q", tc[0], tc[1], b, tc[2])
}
}
}

View file

@ -1,53 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testtext
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
)
// CodeSize builds the given code sample and returns the binary size or en error
// if an error occurred. The code sample typically will look like this:
// package main
// import "golang.org/x/text/somepackage"
// func main() {
// somepackage.Func() // reference Func to cause it to be linked in.
// }
// See dict_test.go in the display package for an example.
func CodeSize(s string) (int, error) {
// Write the file.
tmpdir, err := ioutil.TempDir(os.TempDir(), "testtext")
if err != nil {
return 0, fmt.Errorf("testtext: failed to create tmpdir: %v", err)
}
defer os.RemoveAll(tmpdir)
filename := filepath.Join(tmpdir, "main.go")
if err := ioutil.WriteFile(filename, []byte(s), 0644); err != nil {
return 0, fmt.Errorf("testtext: failed to write main.go: %v", err)
}
// Build the binary.
w := &bytes.Buffer{}
cmd := exec.Command(filepath.Join(runtime.GOROOT(), "bin", "go"), "build", "-o", "main")
cmd.Dir = tmpdir
cmd.Stderr = w
cmd.Stdout = w
if err := cmd.Run(); err != nil {
return 0, fmt.Errorf("testtext: failed to execute command: %v\nmain.go:\n%vErrors:%s", err, s, w)
}
// Determine the size.
fi, err := os.Stat(filepath.Join(tmpdir, "main"))
if err != nil {
return 0, fmt.Errorf("testtext: failed to get file info: %v", err)
}
return int(fi.Size()), nil
}

View file

@ -1,22 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package testtext
import (
"flag"
"testing"
"golang.org/x/text/internal/gen"
)
var long = flag.Bool("long", false,
"run tests that require fetching data online")
// SkipIfNotLong returns whether long tests should be performed.
func SkipIfNotLong(t *testing.T) {
if testing.Short() || !(gen.IsLocal() || *long) {
t.Skip("skipping test to prevent downloading; to run use -long or use -local or UNICODE_DIR to specify a local source")
}
}

View file

@ -1,14 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !gccgo
package testtext
import "testing"
// AllocsPerRun wraps testing.AllocsPerRun.
func AllocsPerRun(runs int, f func()) (avg float64) {
return testing.AllocsPerRun(runs, f)
}

View file

@ -1,11 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build gccgo
package testtext
// AllocsPerRun always returns 0 for gccgo until gccgo implements escape
// analysis equal or better to that of gc.
func AllocsPerRun(runs int, f func()) (avg float64) { return 0 }

View file

@ -1,23 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.7
package testtext
import "testing"
func Run(t *testing.T, name string, fn func(t *testing.T)) bool {
t.Logf("Running %s...", name)
fn(t)
return t.Failed()
}
// Bench runs the given benchmark function. This pre-1.7 implementation renders
// the measurement useless, but allows the code to be compiled at least.
func Bench(b *testing.B, name string, fn func(b *testing.B)) bool {
b.Logf("Running %s...", name)
fn(b)
return b.Failed()
}

View file

@ -1,17 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.7
package testtext
import "testing"
func Run(t *testing.T, name string, fn func(t *testing.T)) bool {
return t.Run(name, fn)
}
func Bench(b *testing.B, name string, fn func(b *testing.B)) bool {
return b.Run(name, fn)
}

View file

@ -1,105 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package testtext contains test data that is of common use to the text
// repository.
package testtext // import "golang.org/x/text/internal/testtext"
const (
// ASCII is an ASCII string containing all letters in the English alphabet.
ASCII = "The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. " +
"The quick brown fox jumps over the lazy dog. "
// Vietnamese is a snippet from http://creativecommons.org/licenses/by-sa/3.0/vn/
Vietnamese = `Với các điều kiện sau: Ghi nhận công của tác giả.
Nếu bạn sử dụng, chuyển đổi, hoặc xây dựng dự án từ
nội dung được chia sẻ này, bạn phải áp dụng giấy phép này hoặc
một giấy phép khác các điều khoản tương tự như giấy phép này
cho dự án của bạn. Hiểu rằng: Miễn Bất kỳ các điều kiện nào
trên đây cũng thể được miễn bỏ nếu bạn được sự cho phép của
người sở hữu bản quyền. Phạm vi công chúng Khi tác phẩm hoặc
bất kỳ chương nào của tác phẩm đã trong vùng dành cho công
chúng theo quy định của pháp luật thì tình trạng của không
bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
// Russian is a snippet from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
Russian = `При обязательном соблюдении следующих условий:
Attribution Вы должны атрибутировать произведение (указывать
автора и источник) в порядке, предусмотренном автором или
лицензиаром (но только так, чтобы никоим образом не подразумевалось,
что они поддерживают вас или использование вами данного произведения).
Υπό τις ακόλουθες προϋποθέσεις:`
// Greek is a snippet from http://creativecommons.org/licenses/by-sa/3.0/gr/
Greek = `Αναφορά Δημιουργού Θα πρέπει να κάνετε την αναφορά στο έργο με τον
τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια
(χωρίς όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή
τη χρήση του έργου από εσάς). Παρόμοια Διανομή Εάν αλλοιώσετε,
τροποποιήσετε ή δημιουργήσετε περαιτέρω βασισμένοι στο έργο θα
μπορείτε να διανέμετε το έργο που θα προκύψει μόνο με την ίδια ή
παρόμοια άδεια.`
// Arabic is a snippet from http://creativecommons.org/licenses/by-sa/3.0/deed.ar
Arabic = `بموجب الشروط التالية نسب المصنف يجب عليك أن
تنسب العمل بالطريقة التي تحددها المؤلف أو المرخص (ولكن ليس بأي حال من
الأحوال أن توحي وتقترح بتحول أو استخدامك للعمل).
المشاركة على قدم المساواة إذا كنت يعدل ، والتغيير ، أو الاستفادة
من هذا العمل ، قد ينتج عن توزيع العمل إلا في ظل تشابه او تطابق فى واحد
لهذا الترخيص.`
// Hebrew is a snippet from http://creativecommons.org/licenses/by-sa/1.0/il/
Hebrew = `בכפוף לתנאים הבאים: ייחוס עליך לייחס את היצירה (לתת קרדיט) באופן
המצויין על-ידי היוצר או מעניק הרישיון (אך לא בשום אופן המרמז על כך
שהם תומכים בך או בשימוש שלך ביצירה). שיתוף זהה אם תחליט/י לשנות,
לעבד או ליצור יצירה נגזרת בהסתמך על יצירה זו, תוכל/י להפיץ את יצירתך
החדשה רק תחת אותו הרישיון או רישיון דומה לרישיון זה.`
TwoByteUTF8 = Russian + Greek + Arabic + Hebrew
// Thai is a snippet from http://creativecommons.org/licenses/by-sa/3.0/th/
Thai = `ภายใต้เงื่อนไข ดังต่อไปนี้ : แสดงที่มา คุณต้องแสดงที่
มาของงานดังกล่าว ตามรูปแบบที่ผู้สร้างสรรค์หรือผู้อนุญาตกำหนด (แต่
ไม่ใช่ในลักษณะที่ว่า พวกเขาสนับสนุนคุณหรือสนับสนุนการที่
คุณนำงานไปใช้) อนุญาตแบบเดียวกัน หากคุณดัดแปลง เปลี่ยนรูป หรื
อต่อเติมงานนี้ คุณต้องใช้สัญญาอนุญาตแบบเดียวกันหรือแบบที่เหมื
อนกับสัญญาอนุญาตที่ใช้กับงานนี้เท่านั้น`
ThreeByteUTF8 = Thai
// Japanese is a snippet from http://creativecommons.org/licenses/by-sa/2.0/jp/
Japanese = `あなたの従うべき条件は以下の通りです
表示 あなたは原著作者のクレジットを表示しなければなりません
継承 もしあなたがこの作品を改変変形または加工した場合
あなたはその結果生じた作品をこの作品と同一の許諾条件の下でのみ
頒布することができます`
// Chinese is a snippet from http://creativecommons.org/licenses/by-sa/2.5/cn/
Chinese = `您可以自由 复制发行展览表演放映
广播或通过信息网络传播本作品 创作演绎作品
对本作品进行商业性使用 惟须遵守下列条件
署名 您必须按照作者或者许可人指定的方式对作品进行署名
相同方式共享 如果您改变转换本作品或者以本作品为基础进行创作
您只能采用与本协议相同的许可协议发布基于本作品的演绎作品`
// Korean is a snippet from http://creativecommons.org/licenses/by-sa/2.0/kr/
Korean = `다음과 같은 조건을 따라야 합니다: 저작자표시
저작자나 이용허락자가 정한 방법으로 저작물의
원저작자를 표시하여야 합니다(그러나 원저작자가 이용자나 이용자의
이용을 보증하거나 추천한다는 의미로 표시해서는 안됩니다).
동일조건변경허락 저작물을 이용하여 만든 이차적 저작물에는
라이선스와 동일한 라이선스를 적용해야 합니다.`
CJK = Chinese + Japanese + Korean
All = ASCII + Vietnamese + TwoByteUTF8 + ThreeByteUTF8 + CJK
)

View file

@ -1,87 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package utf8internal contains low-level utf8-related constants, tables, etc.
// that are used internally by the text package.
package utf8internal
// The default lowest and highest continuation byte.
const (
LoCB = 0x80 // 1000 0000
HiCB = 0xBF // 1011 1111
)
// Constants related to getting information of first bytes of UTF-8 sequences.
const (
// ASCII identifies a UTF-8 byte as ASCII.
ASCII = as
// FirstInvalid indicates a byte is invalid as a first byte of a UTF-8
// sequence.
FirstInvalid = xx
// SizeMask is a mask for the size bits. Use use x&SizeMask to get the size.
SizeMask = 7
// AcceptShift is the right-shift count for the first byte info byte to get
// the index into the AcceptRanges table. See AcceptRanges.
AcceptShift = 4
// The names of these constants are chosen to give nice alignment in the
// table below. The first nibble is an index into acceptRanges or F for
// special one-byte cases. The second nibble is the Rune length or the
// Status for the special one-byte case.
xx = 0xF1 // invalid: size 1
as = 0xF0 // ASCII: size 1
s1 = 0x02 // accept 0, size 2
s2 = 0x13 // accept 1, size 3
s3 = 0x03 // accept 0, size 3
s4 = 0x23 // accept 2, size 3
s5 = 0x34 // accept 3, size 4
s6 = 0x04 // accept 0, size 4
s7 = 0x44 // accept 4, size 4
)
// First is information about the first byte in a UTF-8 sequence.
var First = [256]uint8{
// 1 2 3 4 5 6 7 8 9 A B C D E F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
// 1 2 3 4 5 6 7 8 9 A B C D E F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
}
// AcceptRange gives the range of valid values for the second byte in a UTF-8
// sequence for any value for First that is not ASCII or FirstInvalid.
type AcceptRange struct {
Lo uint8 // lowest value for second byte.
Hi uint8 // highest value for second byte.
}
// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b
//
// AcceptRanges[First[b[0]]>>AcceptShift]
//
// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting
// at b[0].
var AcceptRanges = [...]AcceptRange{
0: {LoCB, HiCB},
1: {0xA0, HiCB},
2: {LoCB, 0x9F},
3: {0x90, HiCB},
4: {LoCB, 0x8F},
}