mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-11-03 08:02:36 +09:00 
			
		
		
		
	
		
			
				
	
	
		
			1649 lines
		
	
	
		
			46 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			1649 lines
		
	
	
		
			46 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright 2013 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
// +build ignore
 | 
						|
 | 
						|
// Language tag table generator.
 | 
						|
// Data read from the web.
 | 
						|
 | 
						|
package main
 | 
						|
 | 
						|
import (
 | 
						|
	"bufio"
 | 
						|
	"flag"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"io/ioutil"
 | 
						|
	"log"
 | 
						|
	"math"
 | 
						|
	"reflect"
 | 
						|
	"regexp"
 | 
						|
	"sort"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
 | 
						|
	"golang.org/x/text/internal/gen"
 | 
						|
	"golang.org/x/text/internal/tag"
 | 
						|
	"golang.org/x/text/unicode/cldr"
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
	test = flag.Bool("test",
 | 
						|
		false,
 | 
						|
		"test existing tables; can be used to compare web data with package data.")
 | 
						|
	outputFile = flag.String("output",
 | 
						|
		"tables.go",
 | 
						|
		"output file for generated tables")
 | 
						|
)
 | 
						|
 | 
						|
var comment = []string{
 | 
						|
	`
 | 
						|
lang holds an alphabetically sorted list of ISO-639 language identifiers.
 | 
						|
All entries are 4 bytes. The index of the identifier (divided by 4) is the language tag.
 | 
						|
For 2-byte language identifiers, the two successive bytes have the following meaning:
 | 
						|
    - if the first letter of the 2- and 3-letter ISO codes are the same:
 | 
						|
      the second and third letter of the 3-letter ISO code.
 | 
						|
    - otherwise: a 0 and a by 2 bits right-shifted index into altLangISO3.
 | 
						|
For 3-byte language identifiers the 4th byte is 0.`,
 | 
						|
	`
 | 
						|
langNoIndex is a bit vector of all 3-letter language codes that are not used as an index
 | 
						|
in lookup tables. The language ids for these language codes are derived directly
 | 
						|
from the letters and are not consecutive.`,
 | 
						|
	`
 | 
						|
altLangISO3 holds an alphabetically sorted list of 3-letter language code alternatives
 | 
						|
to 2-letter language codes that cannot be derived using the method described above.
 | 
						|
Each 3-letter code is followed by its 1-byte langID.`,
 | 
						|
	`
 | 
						|
altLangIndex is used to convert indexes in altLangISO3 to langIDs.`,
 | 
						|
	`
 | 
						|
langAliasMap maps langIDs to their suggested replacements.`,
 | 
						|
	`
 | 
						|
script is an alphabetically sorted list of ISO 15924 codes. The index
 | 
						|
of the script in the string, divided by 4, is the internal scriptID.`,
 | 
						|
	`
 | 
						|
isoRegionOffset needs to be added to the index of regionISO to obtain the regionID
 | 
						|
for 2-letter ISO codes. (The first isoRegionOffset regionIDs are reserved for
 | 
						|
the UN.M49 codes used for groups.)`,
 | 
						|
	`
 | 
						|
regionISO holds a list of alphabetically sorted 2-letter ISO region codes.
 | 
						|
Each 2-letter codes is followed by two bytes with the following meaning:
 | 
						|
    - [A-Z}{2}: the first letter of the 2-letter code plus these two 
 | 
						|
                letters form the 3-letter ISO code.
 | 
						|
    - 0, n:     index into altRegionISO3.`,
 | 
						|
	`
 | 
						|
regionTypes defines the status of a region for various standards.`,
 | 
						|
	`
 | 
						|
m49 maps regionIDs to UN.M49 codes. The first isoRegionOffset entries are
 | 
						|
codes indicating collections of regions.`,
 | 
						|
	`
 | 
						|
m49Index gives indexes into fromM49 based on the three most significant bits
 | 
						|
of a 10-bit UN.M49 code. To search an UN.M49 code in fromM49, search in
 | 
						|
   fromM49[m49Index[msb39(code)]:m49Index[msb3(code)+1]]
 | 
						|
for an entry where the first 7 bits match the 7 lsb of the UN.M49 code.
 | 
						|
The region code is stored in the 9 lsb of the indexed value.`,
 | 
						|
	`
 | 
						|
fromM49 contains entries to map UN.M49 codes to regions. See m49Index for details.`,
 | 
						|
	`
 | 
						|
altRegionISO3 holds a list of 3-letter region codes that cannot be
 | 
						|
mapped to 2-letter codes using the default algorithm. This is a short list.`,
 | 
						|
	`
 | 
						|
altRegionIDs holds a list of regionIDs the positions of which match those
 | 
						|
of the 3-letter ISO codes in altRegionISO3.`,
 | 
						|
	`
 | 
						|
variantNumSpecialized is the number of specialized variants in variants.`,
 | 
						|
	`
 | 
						|
suppressScript is an index from langID to the dominant script for that language,
 | 
						|
if it exists.  If a script is given, it should be suppressed from the language tag.`,
 | 
						|
	`
 | 
						|
likelyLang is a lookup table, indexed by langID, for the most likely
 | 
						|
scripts and regions given incomplete information. If more entries exist for a
 | 
						|
given language, region and script are the index and size respectively
 | 
						|
of the list in likelyLangList.`,
 | 
						|
	`
 | 
						|
likelyLangList holds lists info associated with likelyLang.`,
 | 
						|
	`
 | 
						|
likelyRegion is a lookup table, indexed by regionID, for the most likely
 | 
						|
languages and scripts given incomplete information. If more entries exist
 | 
						|
for a given regionID, lang and script are the index and size respectively
 | 
						|
of the list in likelyRegionList.
 | 
						|
TODO: exclude containers and user-definable regions from the list.`,
 | 
						|
	`
 | 
						|
likelyRegionList holds lists info associated with likelyRegion.`,
 | 
						|
	`
 | 
						|
likelyScript is a lookup table, indexed by scriptID, for the most likely
 | 
						|
languages and regions given a script.`,
 | 
						|
	`
 | 
						|
matchLang holds pairs of langIDs of base languages that are typically
 | 
						|
mutually intelligible. Each pair is associated with a confidence and
 | 
						|
whether the intelligibility goes one or both ways.`,
 | 
						|
	`
 | 
						|
matchScript holds pairs of scriptIDs where readers of one script
 | 
						|
can typically also read the other. Each is associated with a confidence.`,
 | 
						|
	`
 | 
						|
nRegionGroups is the number of region groups.`,
 | 
						|
	`
 | 
						|
regionInclusion maps region identifiers to sets of regions in regionInclusionBits,
 | 
						|
where each set holds all groupings that are directly connected in a region
 | 
						|
containment graph.`,
 | 
						|
	`
 | 
						|
regionInclusionBits is an array of bit vectors where every vector represents
 | 
						|
a set of region groupings.  These sets are used to compute the distance
 | 
						|
between two regions for the purpose of language matching.`,
 | 
						|
	`
 | 
						|
regionInclusionNext marks, for each entry in regionInclusionBits, the set of
 | 
						|
all groups that are reachable from the groups set in the respective entry.`,
 | 
						|
}
 | 
						|
 | 
						|
// TODO: consider changing some of these structures to tries. This can reduce
 | 
						|
// memory, but may increase the need for memory allocations. This could be
 | 
						|
// mitigated if we can piggyback on language tags for common cases.
 | 
						|
 | 
						|
func failOnError(e error) {
 | 
						|
	if e != nil {
 | 
						|
		log.Panic(e)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
type setType int
 | 
						|
 | 
						|
const (
 | 
						|
	Indexed setType = 1 + iota // all elements must be of same size
 | 
						|
	Linear
 | 
						|
)
 | 
						|
 | 
						|
type stringSet struct {
 | 
						|
	s              []string
 | 
						|
	sorted, frozen bool
 | 
						|
 | 
						|
	// We often need to update values after the creation of an index is completed.
 | 
						|
	// We include a convenience map for keeping track of this.
 | 
						|
	update map[string]string
 | 
						|
	typ    setType // used for checking.
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) clone() stringSet {
 | 
						|
	c := *ss
 | 
						|
	c.s = append([]string(nil), c.s...)
 | 
						|
	return c
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) setType(t setType) {
 | 
						|
	if ss.typ != t && ss.typ != 0 {
 | 
						|
		log.Panicf("type %d cannot be assigned as it was already %d", t, ss.typ)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// parse parses a whitespace-separated string and initializes ss with its
 | 
						|
// components.
 | 
						|
func (ss *stringSet) parse(s string) {
 | 
						|
	scan := bufio.NewScanner(strings.NewReader(s))
 | 
						|
	scan.Split(bufio.ScanWords)
 | 
						|
	for scan.Scan() {
 | 
						|
		ss.add(scan.Text())
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) assertChangeable() {
 | 
						|
	if ss.frozen {
 | 
						|
		log.Panic("attempt to modify a frozen stringSet")
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) add(s string) {
 | 
						|
	ss.assertChangeable()
 | 
						|
	ss.s = append(ss.s, s)
 | 
						|
	ss.sorted = ss.frozen
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) freeze() {
 | 
						|
	ss.compact()
 | 
						|
	ss.frozen = true
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) compact() {
 | 
						|
	if ss.sorted {
 | 
						|
		return
 | 
						|
	}
 | 
						|
	a := ss.s
 | 
						|
	sort.Strings(a)
 | 
						|
	k := 0
 | 
						|
	for i := 1; i < len(a); i++ {
 | 
						|
		if a[k] != a[i] {
 | 
						|
			a[k+1] = a[i]
 | 
						|
			k++
 | 
						|
		}
 | 
						|
	}
 | 
						|
	ss.s = a[:k+1]
 | 
						|
	ss.sorted = ss.frozen
 | 
						|
}
 | 
						|
 | 
						|
type funcSorter struct {
 | 
						|
	fn func(a, b string) bool
 | 
						|
	sort.StringSlice
 | 
						|
}
 | 
						|
 | 
						|
func (s funcSorter) Less(i, j int) bool {
 | 
						|
	return s.fn(s.StringSlice[i], s.StringSlice[j])
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) sortFunc(f func(a, b string) bool) {
 | 
						|
	ss.compact()
 | 
						|
	sort.Sort(funcSorter{f, sort.StringSlice(ss.s)})
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) remove(s string) {
 | 
						|
	ss.assertChangeable()
 | 
						|
	if i, ok := ss.find(s); ok {
 | 
						|
		copy(ss.s[i:], ss.s[i+1:])
 | 
						|
		ss.s = ss.s[:len(ss.s)-1]
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) replace(ol, nu string) {
 | 
						|
	ss.s[ss.index(ol)] = nu
 | 
						|
	ss.sorted = ss.frozen
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) index(s string) int {
 | 
						|
	ss.setType(Indexed)
 | 
						|
	i, ok := ss.find(s)
 | 
						|
	if !ok {
 | 
						|
		if i < len(ss.s) {
 | 
						|
			log.Panicf("find: item %q is not in list. Closest match is %q.", s, ss.s[i])
 | 
						|
		}
 | 
						|
		log.Panicf("find: item %q is not in list", s)
 | 
						|
 | 
						|
	}
 | 
						|
	return i
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) find(s string) (int, bool) {
 | 
						|
	ss.compact()
 | 
						|
	i := sort.SearchStrings(ss.s, s)
 | 
						|
	return i, i != len(ss.s) && ss.s[i] == s
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) slice() []string {
 | 
						|
	ss.compact()
 | 
						|
	return ss.s
 | 
						|
}
 | 
						|
 | 
						|
func (ss *stringSet) updateLater(v, key string) {
 | 
						|
	if ss.update == nil {
 | 
						|
		ss.update = map[string]string{}
 | 
						|
	}
 | 
						|
	ss.update[v] = key
 | 
						|
}
 | 
						|
 | 
						|
// join joins the string and ensures that all entries are of the same length.
 | 
						|
func (ss *stringSet) join() string {
 | 
						|
	ss.setType(Indexed)
 | 
						|
	n := len(ss.s[0])
 | 
						|
	for _, s := range ss.s {
 | 
						|
		if len(s) != n {
 | 
						|
			log.Panicf("join: not all entries are of the same length: %q", s)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	ss.s = append(ss.s, strings.Repeat("\xff", n))
 | 
						|
	return strings.Join(ss.s, "")
 | 
						|
}
 | 
						|
 | 
						|
// ianaEntry holds information for an entry in the IANA Language Subtag Repository.
 | 
						|
// All types use the same entry.
 | 
						|
// See http://tools.ietf.org/html/bcp47#section-5.1 for a description of the various
 | 
						|
// fields.
 | 
						|
type ianaEntry struct {
 | 
						|
	typ            string
 | 
						|
	description    []string
 | 
						|
	scope          string
 | 
						|
	added          string
 | 
						|
	preferred      string
 | 
						|
	deprecated     string
 | 
						|
	suppressScript string
 | 
						|
	macro          string
 | 
						|
	prefix         []string
 | 
						|
}
 | 
						|
 | 
						|
type builder struct {
 | 
						|
	w    *gen.CodeWriter
 | 
						|
	hw   io.Writer // MultiWriter for w and w.Hash
 | 
						|
	data *cldr.CLDR
 | 
						|
	supp *cldr.SupplementalData
 | 
						|
 | 
						|
	// indices
 | 
						|
	locale      stringSet // common locales
 | 
						|
	lang        stringSet // canonical language ids (2 or 3 letter ISO codes) with data
 | 
						|
	langNoIndex stringSet // 3-letter ISO codes with no associated data
 | 
						|
	script      stringSet // 4-letter ISO codes
 | 
						|
	region      stringSet // 2-letter ISO or 3-digit UN M49 codes
 | 
						|
	variant     stringSet // 4-8-alphanumeric variant code.
 | 
						|
 | 
						|
	// Region codes that are groups with their corresponding group IDs.
 | 
						|
	groups map[int]index
 | 
						|
 | 
						|
	// langInfo
 | 
						|
	registry map[string]*ianaEntry
 | 
						|
}
 | 
						|
 | 
						|
type index uint
 | 
						|
 | 
						|
func newBuilder(w *gen.CodeWriter) *builder {
 | 
						|
	r := gen.OpenCLDRCoreZip()
 | 
						|
	defer r.Close()
 | 
						|
	d := &cldr.Decoder{}
 | 
						|
	data, err := d.DecodeZip(r)
 | 
						|
	failOnError(err)
 | 
						|
	b := builder{
 | 
						|
		w:    w,
 | 
						|
		hw:   io.MultiWriter(w, w.Hash),
 | 
						|
		data: data,
 | 
						|
		supp: data.Supplemental(),
 | 
						|
	}
 | 
						|
	b.parseRegistry()
 | 
						|
	return &b
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) parseRegistry() {
 | 
						|
	r := gen.OpenIANAFile("assignments/language-subtag-registry")
 | 
						|
	defer r.Close()
 | 
						|
	b.registry = make(map[string]*ianaEntry)
 | 
						|
 | 
						|
	scan := bufio.NewScanner(r)
 | 
						|
	scan.Split(bufio.ScanWords)
 | 
						|
	var record *ianaEntry
 | 
						|
	for more := scan.Scan(); more; {
 | 
						|
		key := scan.Text()
 | 
						|
		more = scan.Scan()
 | 
						|
		value := scan.Text()
 | 
						|
		switch key {
 | 
						|
		case "Type:":
 | 
						|
			record = &ianaEntry{typ: value}
 | 
						|
		case "Subtag:", "Tag:":
 | 
						|
			if s := strings.SplitN(value, "..", 2); len(s) > 1 {
 | 
						|
				for a := s[0]; a <= s[1]; a = inc(a) {
 | 
						|
					b.addToRegistry(a, record)
 | 
						|
				}
 | 
						|
			} else {
 | 
						|
				b.addToRegistry(value, record)
 | 
						|
			}
 | 
						|
		case "Suppress-Script:":
 | 
						|
			record.suppressScript = value
 | 
						|
		case "Added:":
 | 
						|
			record.added = value
 | 
						|
		case "Deprecated:":
 | 
						|
			record.deprecated = value
 | 
						|
		case "Macrolanguage:":
 | 
						|
			record.macro = value
 | 
						|
		case "Preferred-Value:":
 | 
						|
			record.preferred = value
 | 
						|
		case "Prefix:":
 | 
						|
			record.prefix = append(record.prefix, value)
 | 
						|
		case "Scope:":
 | 
						|
			record.scope = value
 | 
						|
		case "Description:":
 | 
						|
			buf := []byte(value)
 | 
						|
			for more = scan.Scan(); more; more = scan.Scan() {
 | 
						|
				b := scan.Bytes()
 | 
						|
				if b[0] == '%' || b[len(b)-1] == ':' {
 | 
						|
					break
 | 
						|
				}
 | 
						|
				buf = append(buf, ' ')
 | 
						|
				buf = append(buf, b...)
 | 
						|
			}
 | 
						|
			record.description = append(record.description, string(buf))
 | 
						|
			continue
 | 
						|
		default:
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		more = scan.Scan()
 | 
						|
	}
 | 
						|
	if scan.Err() != nil {
 | 
						|
		log.Panic(scan.Err())
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) addToRegistry(key string, entry *ianaEntry) {
 | 
						|
	if info, ok := b.registry[key]; ok {
 | 
						|
		if info.typ != "language" || entry.typ != "extlang" {
 | 
						|
			log.Fatalf("parseRegistry: tag %q already exists", key)
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		b.registry[key] = entry
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
var commentIndex = make(map[string]string)
 | 
						|
 | 
						|
func init() {
 | 
						|
	for _, s := range comment {
 | 
						|
		key := strings.TrimSpace(strings.SplitN(s, " ", 2)[0])
 | 
						|
		commentIndex[key] = s
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) comment(name string) {
 | 
						|
	if s := commentIndex[name]; len(s) > 0 {
 | 
						|
		b.w.WriteComment(s)
 | 
						|
	} else {
 | 
						|
		fmt.Fprintln(b.w)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) pf(f string, x ...interface{}) {
 | 
						|
	fmt.Fprintf(b.hw, f, x...)
 | 
						|
	fmt.Fprint(b.hw, "\n")
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) p(x ...interface{}) {
 | 
						|
	fmt.Fprintln(b.hw, x...)
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) addSize(s int) {
 | 
						|
	b.w.Size += s
 | 
						|
	b.pf("// Size: %d bytes", s)
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeConst(name string, x interface{}) {
 | 
						|
	b.comment(name)
 | 
						|
	b.w.WriteConst(name, x)
 | 
						|
}
 | 
						|
 | 
						|
// writeConsts computes f(v) for all v in values and writes the results
 | 
						|
// as constants named _v to a single constant block.
 | 
						|
func (b *builder) writeConsts(f func(string) int, values ...string) {
 | 
						|
	b.pf("const (")
 | 
						|
	for _, v := range values {
 | 
						|
		b.pf("\t_%s = %v", v, f(v))
 | 
						|
	}
 | 
						|
	b.pf(")")
 | 
						|
}
 | 
						|
 | 
						|
// writeType writes the type of the given value, which must be a struct.
 | 
						|
func (b *builder) writeType(value interface{}) {
 | 
						|
	b.comment(reflect.TypeOf(value).Name())
 | 
						|
	b.w.WriteType(value)
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeSlice(name string, ss interface{}) {
 | 
						|
	b.writeSliceAddSize(name, 0, ss)
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeSliceAddSize(name string, extraSize int, ss interface{}) {
 | 
						|
	b.comment(name)
 | 
						|
	b.w.Size += extraSize
 | 
						|
	v := reflect.ValueOf(ss)
 | 
						|
	t := v.Type().Elem()
 | 
						|
	b.pf("// Size: %d bytes, %d elements", v.Len()*int(t.Size())+extraSize, v.Len())
 | 
						|
 | 
						|
	fmt.Fprintf(b.w, "var %s = ", name)
 | 
						|
	b.w.WriteArray(ss)
 | 
						|
	b.p()
 | 
						|
}
 | 
						|
 | 
						|
type fromTo struct {
 | 
						|
	from, to uint16
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeSortedMap(name string, ss *stringSet, index func(s string) uint16) {
 | 
						|
	ss.sortFunc(func(a, b string) bool {
 | 
						|
		return index(a) < index(b)
 | 
						|
	})
 | 
						|
	m := []fromTo{}
 | 
						|
	for _, s := range ss.s {
 | 
						|
		m = append(m, fromTo{index(s), index(ss.update[s])})
 | 
						|
	}
 | 
						|
	b.writeSlice(name, m)
 | 
						|
}
 | 
						|
 | 
						|
const base = 'z' - 'a' + 1
 | 
						|
 | 
						|
func strToInt(s string) uint {
 | 
						|
	v := uint(0)
 | 
						|
	for i := 0; i < len(s); i++ {
 | 
						|
		v *= base
 | 
						|
		v += uint(s[i] - 'a')
 | 
						|
	}
 | 
						|
	return v
 | 
						|
}
 | 
						|
 | 
						|
// converts the given integer to the original ASCII string passed to strToInt.
 | 
						|
// len(s) must match the number of characters obtained.
 | 
						|
func intToStr(v uint, s []byte) {
 | 
						|
	for i := len(s) - 1; i >= 0; i-- {
 | 
						|
		s[i] = byte(v%base) + 'a'
 | 
						|
		v /= base
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeBitVector(name string, ss []string) {
 | 
						|
	vec := make([]uint8, int(math.Ceil(math.Pow(base, float64(len(ss[0])))/8)))
 | 
						|
	for _, s := range ss {
 | 
						|
		v := strToInt(s)
 | 
						|
		vec[v/8] |= 1 << (v % 8)
 | 
						|
	}
 | 
						|
	b.writeSlice(name, vec)
 | 
						|
}
 | 
						|
 | 
						|
// TODO: convert this type into a list or two-stage trie.
 | 
						|
func (b *builder) writeMapFunc(name string, m map[string]string, f func(string) uint16) {
 | 
						|
	b.comment(name)
 | 
						|
	v := reflect.ValueOf(m)
 | 
						|
	sz := v.Len() * (2 + int(v.Type().Key().Size()))
 | 
						|
	for _, k := range m {
 | 
						|
		sz += len(k)
 | 
						|
	}
 | 
						|
	b.addSize(sz)
 | 
						|
	keys := []string{}
 | 
						|
	b.pf(`var %s = map[string]uint16{`, name)
 | 
						|
	for k := range m {
 | 
						|
		keys = append(keys, k)
 | 
						|
	}
 | 
						|
	sort.Strings(keys)
 | 
						|
	for _, k := range keys {
 | 
						|
		b.pf("\t%q: %v,", k, f(m[k]))
 | 
						|
	}
 | 
						|
	b.p("}")
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeMap(name string, m interface{}) {
 | 
						|
	b.comment(name)
 | 
						|
	v := reflect.ValueOf(m)
 | 
						|
	sz := v.Len() * (2 + int(v.Type().Key().Size()) + int(v.Type().Elem().Size()))
 | 
						|
	b.addSize(sz)
 | 
						|
	f := strings.FieldsFunc(fmt.Sprintf("%#v", m), func(r rune) bool {
 | 
						|
		return strings.IndexRune("{}, ", r) != -1
 | 
						|
	})
 | 
						|
	sort.Strings(f[1:])
 | 
						|
	b.pf(`var %s = %s{`, name, f[0])
 | 
						|
	for _, kv := range f[1:] {
 | 
						|
		b.pf("\t%s,", kv)
 | 
						|
	}
 | 
						|
	b.p("}")
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) langIndex(s string) uint16 {
 | 
						|
	if s == "und" {
 | 
						|
		return 0
 | 
						|
	}
 | 
						|
	if i, ok := b.lang.find(s); ok {
 | 
						|
		return uint16(i)
 | 
						|
	}
 | 
						|
	return uint16(strToInt(s)) + uint16(len(b.lang.s))
 | 
						|
}
 | 
						|
 | 
						|
// inc advances the string to its lexicographical successor.
 | 
						|
func inc(s string) string {
 | 
						|
	const maxTagLength = 4
 | 
						|
	var buf [maxTagLength]byte
 | 
						|
	intToStr(strToInt(strings.ToLower(s))+1, buf[:len(s)])
 | 
						|
	for i := 0; i < len(s); i++ {
 | 
						|
		if s[i] <= 'Z' {
 | 
						|
			buf[i] -= 'a' - 'A'
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return string(buf[:len(s)])
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) parseIndices() {
 | 
						|
	meta := b.supp.Metadata
 | 
						|
 | 
						|
	for k, v := range b.registry {
 | 
						|
		var ss *stringSet
 | 
						|
		switch v.typ {
 | 
						|
		case "language":
 | 
						|
			if len(k) == 2 || v.suppressScript != "" || v.scope == "special" {
 | 
						|
				b.lang.add(k)
 | 
						|
				continue
 | 
						|
			} else {
 | 
						|
				ss = &b.langNoIndex
 | 
						|
			}
 | 
						|
		case "region":
 | 
						|
			ss = &b.region
 | 
						|
		case "script":
 | 
						|
			ss = &b.script
 | 
						|
		case "variant":
 | 
						|
			ss = &b.variant
 | 
						|
		default:
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		ss.add(k)
 | 
						|
	}
 | 
						|
	// Include any language for which there is data.
 | 
						|
	for _, lang := range b.data.Locales() {
 | 
						|
		if x := b.data.RawLDML(lang); false ||
 | 
						|
			x.LocaleDisplayNames != nil ||
 | 
						|
			x.Characters != nil ||
 | 
						|
			x.Delimiters != nil ||
 | 
						|
			x.Measurement != nil ||
 | 
						|
			x.Dates != nil ||
 | 
						|
			x.Numbers != nil ||
 | 
						|
			x.Units != nil ||
 | 
						|
			x.ListPatterns != nil ||
 | 
						|
			x.Collations != nil ||
 | 
						|
			x.Segmentations != nil ||
 | 
						|
			x.Rbnf != nil ||
 | 
						|
			x.Annotations != nil ||
 | 
						|
			x.Metadata != nil {
 | 
						|
 | 
						|
			from := strings.Split(lang, "_")
 | 
						|
			if lang := from[0]; lang != "root" {
 | 
						|
				b.lang.add(lang)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// Include locales for plural rules, which uses a different structure.
 | 
						|
	for _, plurals := range b.data.Supplemental().Plurals {
 | 
						|
		for _, rules := range plurals.PluralRules {
 | 
						|
			for _, lang := range strings.Split(rules.Locales, " ") {
 | 
						|
				if lang = strings.Split(lang, "_")[0]; lang != "root" {
 | 
						|
					b.lang.add(lang)
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// Include languages in likely subtags.
 | 
						|
	for _, m := range b.supp.LikelySubtags.LikelySubtag {
 | 
						|
		from := strings.Split(m.From, "_")
 | 
						|
		b.lang.add(from[0])
 | 
						|
	}
 | 
						|
	// Include ISO-639 alpha-3 bibliographic entries.
 | 
						|
	for _, a := range meta.Alias.LanguageAlias {
 | 
						|
		if a.Reason == "bibliographic" {
 | 
						|
			b.langNoIndex.add(a.Type)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// Include regions in territoryAlias (not all are in the IANA registry!)
 | 
						|
	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
 | 
						|
		if len(reg.Type) == 2 {
 | 
						|
			b.region.add(reg.Type)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	for _, s := range b.lang.s {
 | 
						|
		if len(s) == 3 {
 | 
						|
			b.langNoIndex.remove(s)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeConst("numLanguages", len(b.lang.slice())+len(b.langNoIndex.slice()))
 | 
						|
	b.writeConst("numScripts", len(b.script.slice()))
 | 
						|
	b.writeConst("numRegions", len(b.region.slice()))
 | 
						|
 | 
						|
	// Add dummy codes at the start of each list to represent "unspecified".
 | 
						|
	b.lang.add("---")
 | 
						|
	b.script.add("----")
 | 
						|
	b.region.add("---")
 | 
						|
 | 
						|
	// common locales
 | 
						|
	b.locale.parse(meta.DefaultContent.Locales)
 | 
						|
}
 | 
						|
 | 
						|
// TODO: region inclusion data will probably not be use used in future matchers.
 | 
						|
 | 
						|
func (b *builder) computeRegionGroups() {
 | 
						|
	b.groups = make(map[int]index)
 | 
						|
 | 
						|
	// Create group indices.
 | 
						|
	for i := 1; b.region.s[i][0] < 'A'; i++ { // Base M49 indices on regionID.
 | 
						|
		b.groups[i] = index(len(b.groups))
 | 
						|
	}
 | 
						|
	for _, g := range b.supp.TerritoryContainment.Group {
 | 
						|
		// Skip UN and EURO zone as they are flattening the containment
 | 
						|
		// relationship.
 | 
						|
		if g.Type == "EZ" || g.Type == "UN" {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		group := b.region.index(g.Type)
 | 
						|
		if _, ok := b.groups[group]; !ok {
 | 
						|
			b.groups[group] = index(len(b.groups))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if len(b.groups) > 32 {
 | 
						|
		log.Fatalf("only 32 groups supported, found %d", len(b.groups))
 | 
						|
	}
 | 
						|
	b.writeConst("nRegionGroups", len(b.groups))
 | 
						|
}
 | 
						|
 | 
						|
var langConsts = []string{
 | 
						|
	"af", "am", "ar", "az", "bg", "bn", "ca", "cs", "da", "de", "el", "en", "es",
 | 
						|
	"et", "fa", "fi", "fil", "fr", "gu", "he", "hi", "hr", "hu", "hy", "id", "is",
 | 
						|
	"it", "ja", "ka", "kk", "km", "kn", "ko", "ky", "lo", "lt", "lv", "mk", "ml",
 | 
						|
	"mn", "mo", "mr", "ms", "mul", "my", "nb", "ne", "nl", "no", "pa", "pl", "pt",
 | 
						|
	"ro", "ru", "sh", "si", "sk", "sl", "sq", "sr", "sv", "sw", "ta", "te", "th",
 | 
						|
	"tl", "tn", "tr", "uk", "ur", "uz", "vi", "zh", "zu",
 | 
						|
 | 
						|
	// constants for grandfathered tags (if not already defined)
 | 
						|
	"jbo", "ami", "bnn", "hak", "tlh", "lb", "nv", "pwn", "tao", "tay", "tsu",
 | 
						|
	"nn", "sfb", "vgt", "sgg", "cmn", "nan", "hsn",
 | 
						|
}
 | 
						|
 | 
						|
// writeLanguage generates all tables needed for language canonicalization.
 | 
						|
func (b *builder) writeLanguage() {
 | 
						|
	meta := b.supp.Metadata
 | 
						|
 | 
						|
	b.writeConst("nonCanonicalUnd", b.lang.index("und"))
 | 
						|
	b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
 | 
						|
	b.writeConst("langPrivateStart", b.langIndex("qaa"))
 | 
						|
	b.writeConst("langPrivateEnd", b.langIndex("qtz"))
 | 
						|
 | 
						|
	// Get language codes that need to be mapped (overlong 3-letter codes,
 | 
						|
	// deprecated 2-letter codes, legacy and grandfathered tags.)
 | 
						|
	langAliasMap := stringSet{}
 | 
						|
	aliasTypeMap := map[string]langAliasType{}
 | 
						|
 | 
						|
	// altLangISO3 get the alternative ISO3 names that need to be mapped.
 | 
						|
	altLangISO3 := stringSet{}
 | 
						|
	// Add dummy start to avoid the use of index 0.
 | 
						|
	altLangISO3.add("---")
 | 
						|
	altLangISO3.updateLater("---", "aa")
 | 
						|
 | 
						|
	lang := b.lang.clone()
 | 
						|
	for _, a := range meta.Alias.LanguageAlias {
 | 
						|
		if a.Replacement == "" {
 | 
						|
			a.Replacement = "und"
 | 
						|
		}
 | 
						|
		// TODO: support mapping to tags
 | 
						|
		repl := strings.SplitN(a.Replacement, "_", 2)[0]
 | 
						|
		if a.Reason == "overlong" {
 | 
						|
			if len(a.Replacement) == 2 && len(a.Type) == 3 {
 | 
						|
				lang.updateLater(a.Replacement, a.Type)
 | 
						|
			}
 | 
						|
		} else if len(a.Type) <= 3 {
 | 
						|
			switch a.Reason {
 | 
						|
			case "macrolanguage":
 | 
						|
				aliasTypeMap[a.Type] = langMacro
 | 
						|
			case "deprecated":
 | 
						|
				// handled elsewhere
 | 
						|
				continue
 | 
						|
			case "bibliographic", "legacy":
 | 
						|
				if a.Type == "no" {
 | 
						|
					continue
 | 
						|
				}
 | 
						|
				aliasTypeMap[a.Type] = langLegacy
 | 
						|
			default:
 | 
						|
				log.Fatalf("new %s alias: %s", a.Reason, a.Type)
 | 
						|
			}
 | 
						|
			langAliasMap.add(a.Type)
 | 
						|
			langAliasMap.updateLater(a.Type, repl)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// Manually add the mapping of "nb" (Norwegian) to its macro language.
 | 
						|
	// This can be removed if CLDR adopts this change.
 | 
						|
	langAliasMap.add("nb")
 | 
						|
	langAliasMap.updateLater("nb", "no")
 | 
						|
	aliasTypeMap["nb"] = langMacro
 | 
						|
 | 
						|
	for k, v := range b.registry {
 | 
						|
		// Also add deprecated values for 3-letter ISO codes, which CLDR omits.
 | 
						|
		if v.typ == "language" && v.deprecated != "" && v.preferred != "" {
 | 
						|
			langAliasMap.add(k)
 | 
						|
			langAliasMap.updateLater(k, v.preferred)
 | 
						|
			aliasTypeMap[k] = langDeprecated
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// Fix CLDR mappings.
 | 
						|
	lang.updateLater("tl", "tgl")
 | 
						|
	lang.updateLater("sh", "hbs")
 | 
						|
	lang.updateLater("mo", "mol")
 | 
						|
	lang.updateLater("no", "nor")
 | 
						|
	lang.updateLater("tw", "twi")
 | 
						|
	lang.updateLater("nb", "nob")
 | 
						|
	lang.updateLater("ak", "aka")
 | 
						|
	lang.updateLater("bh", "bih")
 | 
						|
 | 
						|
	// Ensure that each 2-letter code is matched with a 3-letter code.
 | 
						|
	for _, v := range lang.s[1:] {
 | 
						|
		s, ok := lang.update[v]
 | 
						|
		if !ok {
 | 
						|
			if s, ok = lang.update[langAliasMap.update[v]]; !ok {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			lang.update[v] = s
 | 
						|
		}
 | 
						|
		if v[0] != s[0] {
 | 
						|
			altLangISO3.add(s)
 | 
						|
			altLangISO3.updateLater(s, v)
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Complete canonialized language tags.
 | 
						|
	lang.freeze()
 | 
						|
	for i, v := range lang.s {
 | 
						|
		// We can avoid these manual entries by using the IANI registry directly.
 | 
						|
		// Seems easier to update the list manually, as changes are rare.
 | 
						|
		// The panic in this loop will trigger if we miss an entry.
 | 
						|
		add := ""
 | 
						|
		if s, ok := lang.update[v]; ok {
 | 
						|
			if s[0] == v[0] {
 | 
						|
				add = s[1:]
 | 
						|
			} else {
 | 
						|
				add = string([]byte{0, byte(altLangISO3.index(s))})
 | 
						|
			}
 | 
						|
		} else if len(v) == 3 {
 | 
						|
			add = "\x00"
 | 
						|
		} else {
 | 
						|
			log.Panicf("no data for long form of %q", v)
 | 
						|
		}
 | 
						|
		lang.s[i] += add
 | 
						|
	}
 | 
						|
	b.writeConst("lang", tag.Index(lang.join()))
 | 
						|
 | 
						|
	b.writeConst("langNoIndexOffset", len(b.lang.s))
 | 
						|
 | 
						|
	// space of all valid 3-letter language identifiers.
 | 
						|
	b.writeBitVector("langNoIndex", b.langNoIndex.slice())
 | 
						|
 | 
						|
	altLangIndex := []uint16{}
 | 
						|
	for i, s := range altLangISO3.slice() {
 | 
						|
		altLangISO3.s[i] += string([]byte{byte(len(altLangIndex))})
 | 
						|
		if i > 0 {
 | 
						|
			idx := b.lang.index(altLangISO3.update[s])
 | 
						|
			altLangIndex = append(altLangIndex, uint16(idx))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeConst("altLangISO3", tag.Index(altLangISO3.join()))
 | 
						|
	b.writeSlice("altLangIndex", altLangIndex)
 | 
						|
 | 
						|
	b.writeSortedMap("langAliasMap", &langAliasMap, b.langIndex)
 | 
						|
	types := make([]langAliasType, len(langAliasMap.s))
 | 
						|
	for i, s := range langAliasMap.s {
 | 
						|
		types[i] = aliasTypeMap[s]
 | 
						|
	}
 | 
						|
	b.writeSlice("langAliasTypes", types)
 | 
						|
}
 | 
						|
 | 
						|
var scriptConsts = []string{
 | 
						|
	"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
 | 
						|
	"Zzzz",
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeScript() {
 | 
						|
	b.writeConsts(b.script.index, scriptConsts...)
 | 
						|
	b.writeConst("script", tag.Index(b.script.join()))
 | 
						|
 | 
						|
	supp := make([]uint8, len(b.lang.slice()))
 | 
						|
	for i, v := range b.lang.slice()[1:] {
 | 
						|
		if sc := b.registry[v].suppressScript; sc != "" {
 | 
						|
			supp[i+1] = uint8(b.script.index(sc))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeSlice("suppressScript", supp)
 | 
						|
 | 
						|
	// There is only one deprecated script in CLDR. This value is hard-coded.
 | 
						|
	// We check here if the code must be updated.
 | 
						|
	for _, a := range b.supp.Metadata.Alias.ScriptAlias {
 | 
						|
		if a.Type != "Qaai" {
 | 
						|
			log.Panicf("unexpected deprecated stript %q", a.Type)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func parseM49(s string) int16 {
 | 
						|
	if len(s) == 0 {
 | 
						|
		return 0
 | 
						|
	}
 | 
						|
	v, err := strconv.ParseUint(s, 10, 10)
 | 
						|
	failOnError(err)
 | 
						|
	return int16(v)
 | 
						|
}
 | 
						|
 | 
						|
var regionConsts = []string{
 | 
						|
	"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
 | 
						|
	"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeRegion() {
 | 
						|
	b.writeConsts(b.region.index, regionConsts...)
 | 
						|
 | 
						|
	isoOffset := b.region.index("AA")
 | 
						|
	m49map := make([]int16, len(b.region.slice()))
 | 
						|
	fromM49map := make(map[int16]int)
 | 
						|
	altRegionISO3 := ""
 | 
						|
	altRegionIDs := []uint16{}
 | 
						|
 | 
						|
	b.writeConst("isoRegionOffset", isoOffset)
 | 
						|
 | 
						|
	// 2-letter region lookup and mapping to numeric codes.
 | 
						|
	regionISO := b.region.clone()
 | 
						|
	regionISO.s = regionISO.s[isoOffset:]
 | 
						|
	regionISO.sorted = false
 | 
						|
 | 
						|
	regionTypes := make([]byte, len(b.region.s))
 | 
						|
 | 
						|
	// Is the region valid BCP 47?
 | 
						|
	for s, e := range b.registry {
 | 
						|
		if len(s) == 2 && s == strings.ToUpper(s) {
 | 
						|
			i := b.region.index(s)
 | 
						|
			for _, d := range e.description {
 | 
						|
				if strings.Contains(d, "Private use") {
 | 
						|
					regionTypes[i] = iso3166UserAssgined
 | 
						|
				}
 | 
						|
			}
 | 
						|
			regionTypes[i] |= bcp47Region
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Is the region a valid ccTLD?
 | 
						|
	r := gen.OpenIANAFile("domains/root/db")
 | 
						|
	defer r.Close()
 | 
						|
 | 
						|
	buf, err := ioutil.ReadAll(r)
 | 
						|
	failOnError(err)
 | 
						|
	re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`)
 | 
						|
	for _, m := range re.FindAllSubmatch(buf, -1) {
 | 
						|
		i := b.region.index(strings.ToUpper(string(m[1])))
 | 
						|
		regionTypes[i] |= ccTLD
 | 
						|
	}
 | 
						|
 | 
						|
	b.writeSlice("regionTypes", regionTypes)
 | 
						|
 | 
						|
	iso3Set := make(map[string]int)
 | 
						|
	update := func(iso2, iso3 string) {
 | 
						|
		i := regionISO.index(iso2)
 | 
						|
		if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] {
 | 
						|
			regionISO.s[i] += iso3[1:]
 | 
						|
			iso3Set[iso3] = -1
 | 
						|
		} else {
 | 
						|
			if ok && j >= 0 {
 | 
						|
				regionISO.s[i] += string([]byte{0, byte(j)})
 | 
						|
			} else {
 | 
						|
				iso3Set[iso3] = len(altRegionISO3)
 | 
						|
				regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
 | 
						|
				altRegionISO3 += iso3
 | 
						|
				altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
 | 
						|
		i := regionISO.index(tc.Type) + isoOffset
 | 
						|
		if d := m49map[i]; d != 0 {
 | 
						|
			log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
 | 
						|
		}
 | 
						|
		m49 := parseM49(tc.Numeric)
 | 
						|
		m49map[i] = m49
 | 
						|
		if r := fromM49map[m49]; r == 0 {
 | 
						|
			fromM49map[m49] = i
 | 
						|
		} else if r != i {
 | 
						|
			dep := b.registry[regionISO.s[r-isoOffset]].deprecated
 | 
						|
			if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) {
 | 
						|
				fromM49map[m49] = i
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for _, ta := range b.supp.Metadata.Alias.TerritoryAlias {
 | 
						|
		if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 {
 | 
						|
			from := parseM49(ta.Type)
 | 
						|
			if r := fromM49map[from]; r == 0 {
 | 
						|
				fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
 | 
						|
		if len(tc.Alpha3) == 3 {
 | 
						|
			update(tc.Type, tc.Alpha3)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// This entries are not included in territoryCodes. Mostly 3-letter variants
 | 
						|
	// of deleted codes and an entry for QU.
 | 
						|
	for _, m := range []struct{ iso2, iso3 string }{
 | 
						|
		{"CT", "CTE"},
 | 
						|
		{"DY", "DHY"},
 | 
						|
		{"HV", "HVO"},
 | 
						|
		{"JT", "JTN"},
 | 
						|
		{"MI", "MID"},
 | 
						|
		{"NH", "NHB"},
 | 
						|
		{"NQ", "ATN"},
 | 
						|
		{"PC", "PCI"},
 | 
						|
		{"PU", "PUS"},
 | 
						|
		{"PZ", "PCZ"},
 | 
						|
		{"RH", "RHO"},
 | 
						|
		{"VD", "VDR"},
 | 
						|
		{"WK", "WAK"},
 | 
						|
		// These three-letter codes are used for others as well.
 | 
						|
		{"FQ", "ATF"},
 | 
						|
	} {
 | 
						|
		update(m.iso2, m.iso3)
 | 
						|
	}
 | 
						|
	for i, s := range regionISO.s {
 | 
						|
		if len(s) != 4 {
 | 
						|
			regionISO.s[i] = s + "  "
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeConst("regionISO", tag.Index(regionISO.join()))
 | 
						|
	b.writeConst("altRegionISO3", altRegionISO3)
 | 
						|
	b.writeSlice("altRegionIDs", altRegionIDs)
 | 
						|
 | 
						|
	// Create list of deprecated regions.
 | 
						|
	// TODO: consider inserting SF -> FI. Not included by CLDR, but is the only
 | 
						|
	// Transitionally-reserved mapping not included.
 | 
						|
	regionOldMap := stringSet{}
 | 
						|
	// Include regions in territoryAlias (not all are in the IANA registry!)
 | 
						|
	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
 | 
						|
		if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 {
 | 
						|
			regionOldMap.add(reg.Type)
 | 
						|
			regionOldMap.updateLater(reg.Type, reg.Replacement)
 | 
						|
			i, _ := regionISO.find(reg.Type)
 | 
						|
			j, _ := regionISO.find(reg.Replacement)
 | 
						|
			if k := m49map[i+isoOffset]; k == 0 {
 | 
						|
				m49map[i+isoOffset] = m49map[j+isoOffset]
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 {
 | 
						|
		return uint16(b.region.index(s))
 | 
						|
	})
 | 
						|
	// 3-digit region lookup, groupings.
 | 
						|
	for i := 1; i < isoOffset; i++ {
 | 
						|
		m := parseM49(b.region.s[i])
 | 
						|
		m49map[i] = m
 | 
						|
		fromM49map[m] = i
 | 
						|
	}
 | 
						|
	b.writeSlice("m49", m49map)
 | 
						|
 | 
						|
	const (
 | 
						|
		searchBits = 7
 | 
						|
		regionBits = 9
 | 
						|
	)
 | 
						|
	if len(m49map) >= 1<<regionBits {
 | 
						|
		log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits)
 | 
						|
	}
 | 
						|
	m49Index := [9]int16{}
 | 
						|
	fromM49 := []uint16{}
 | 
						|
	m49 := []int{}
 | 
						|
	for k, _ := range fromM49map {
 | 
						|
		m49 = append(m49, int(k))
 | 
						|
	}
 | 
						|
	sort.Ints(m49)
 | 
						|
	for _, k := range m49[1:] {
 | 
						|
		val := (k & (1<<searchBits - 1)) << regionBits
 | 
						|
		fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)]))
 | 
						|
		m49Index[1:][k>>searchBits] = int16(len(fromM49))
 | 
						|
	}
 | 
						|
	b.writeSlice("m49Index", m49Index)
 | 
						|
	b.writeSlice("fromM49", fromM49)
 | 
						|
}
 | 
						|
 | 
						|
const (
 | 
						|
	// TODO: put these lists in regionTypes as user data? Could be used for
 | 
						|
	// various optimizations and refinements and could be exposed in the API.
 | 
						|
	iso3166Except = "AC CP DG EA EU FX IC SU TA UK"
 | 
						|
	iso3166Trans  = "AN BU CS NT TP YU ZR" // SF is not in our set of Regions.
 | 
						|
	// DY and RH are actually not deleted, but indeterminately reserved.
 | 
						|
	iso3166DelCLDR = "CT DD DY FQ HV JT MI NH NQ PC PU PZ RH VD WK YD"
 | 
						|
)
 | 
						|
 | 
						|
const (
 | 
						|
	iso3166UserAssgined = 1 << iota
 | 
						|
	ccTLD
 | 
						|
	bcp47Region
 | 
						|
)
 | 
						|
 | 
						|
func find(list []string, s string) int {
 | 
						|
	for i, t := range list {
 | 
						|
		if t == s {
 | 
						|
			return i
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return -1
 | 
						|
}
 | 
						|
 | 
						|
// writeVariants generates per-variant information and creates a map from variant
 | 
						|
// name to index value. We assign index values such that sorting multiple
 | 
						|
// variants by index value will result in the correct order.
 | 
						|
// There are two types of variants: specialized and general. Specialized variants
 | 
						|
// are only applicable to certain language or language-script pairs. Generalized
 | 
						|
// variants apply to any language. Generalized variants always sort after
 | 
						|
// specialized variants.  We will therefore always assign a higher index value
 | 
						|
// to a generalized variant than any other variant. Generalized variants are
 | 
						|
// sorted alphabetically among themselves.
 | 
						|
// Specialized variants may also sort after other specialized variants. Such
 | 
						|
// variants will be ordered after any of the variants they may follow.
 | 
						|
// We assume that if a variant x is followed by a variant y, then for any prefix
 | 
						|
// p of x, p-x is a prefix of y. This allows us to order tags based on the
 | 
						|
// maximum of the length of any of its prefixes.
 | 
						|
// TODO: it is possible to define a set of Prefix values on variants such that
 | 
						|
// a total order cannot be defined to the point that this algorithm breaks.
 | 
						|
// In other words, we cannot guarantee the same order of variants for the
 | 
						|
// future using the same algorithm or for non-compliant combinations of
 | 
						|
// variants. For this reason, consider using simple alphabetic sorting
 | 
						|
// of variants and ignore Prefix restrictions altogether.
 | 
						|
func (b *builder) writeVariant() {
 | 
						|
	generalized := stringSet{}
 | 
						|
	specialized := stringSet{}
 | 
						|
	specializedExtend := stringSet{}
 | 
						|
	// Collate the variants by type and check assumptions.
 | 
						|
	for _, v := range b.variant.slice() {
 | 
						|
		e := b.registry[v]
 | 
						|
		if len(e.prefix) == 0 {
 | 
						|
			generalized.add(v)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		c := strings.Split(e.prefix[0], "-")
 | 
						|
		hasScriptOrRegion := false
 | 
						|
		if len(c) > 1 {
 | 
						|
			_, hasScriptOrRegion = b.script.find(c[1])
 | 
						|
			if !hasScriptOrRegion {
 | 
						|
				_, hasScriptOrRegion = b.region.find(c[1])
 | 
						|
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if len(c) == 1 || len(c) == 2 && hasScriptOrRegion {
 | 
						|
			// Variant is preceded by a language.
 | 
						|
			specialized.add(v)
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		// Variant is preceded by another variant.
 | 
						|
		specializedExtend.add(v)
 | 
						|
		prefix := c[0] + "-"
 | 
						|
		if hasScriptOrRegion {
 | 
						|
			prefix += c[1]
 | 
						|
		}
 | 
						|
		for _, p := range e.prefix {
 | 
						|
			// Verify that the prefix minus the last element is a prefix of the
 | 
						|
			// predecessor element.
 | 
						|
			i := strings.LastIndex(p, "-")
 | 
						|
			pred := b.registry[p[i+1:]]
 | 
						|
			if find(pred.prefix, p[:i]) < 0 {
 | 
						|
				log.Fatalf("prefix %q for variant %q not consistent with predecessor spec", p, v)
 | 
						|
			}
 | 
						|
			// The sorting used below does not work in the general case. It works
 | 
						|
			// if we assume that variants that may be followed by others only have
 | 
						|
			// prefixes of the same length. Verify this.
 | 
						|
			count := strings.Count(p[:i], "-")
 | 
						|
			for _, q := range pred.prefix {
 | 
						|
				if c := strings.Count(q, "-"); c != count {
 | 
						|
					log.Fatalf("variant %q preceding %q has a prefix %q of size %d; want %d", p[i+1:], v, q, c, count)
 | 
						|
				}
 | 
						|
			}
 | 
						|
			if !strings.HasPrefix(p, prefix) {
 | 
						|
				log.Fatalf("prefix %q of variant %q should start with %q", p, v, prefix)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	// Sort extended variants.
 | 
						|
	a := specializedExtend.s
 | 
						|
	less := func(v, w string) bool {
 | 
						|
		// Sort by the maximum number of elements.
 | 
						|
		maxCount := func(s string) (max int) {
 | 
						|
			for _, p := range b.registry[s].prefix {
 | 
						|
				if c := strings.Count(p, "-"); c > max {
 | 
						|
					max = c
 | 
						|
				}
 | 
						|
			}
 | 
						|
			return
 | 
						|
		}
 | 
						|
		if cv, cw := maxCount(v), maxCount(w); cv != cw {
 | 
						|
			return cv < cw
 | 
						|
		}
 | 
						|
		// Sort by name as tie breaker.
 | 
						|
		return v < w
 | 
						|
	}
 | 
						|
	sort.Sort(funcSorter{less, sort.StringSlice(a)})
 | 
						|
	specializedExtend.frozen = true
 | 
						|
 | 
						|
	// Create index from variant name to index.
 | 
						|
	variantIndex := make(map[string]uint8)
 | 
						|
	add := func(s []string) {
 | 
						|
		for _, v := range s {
 | 
						|
			variantIndex[v] = uint8(len(variantIndex))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	add(specialized.slice())
 | 
						|
	add(specializedExtend.s)
 | 
						|
	numSpecialized := len(variantIndex)
 | 
						|
	add(generalized.slice())
 | 
						|
	if n := len(variantIndex); n > 255 {
 | 
						|
		log.Fatalf("maximum number of variants exceeded: was %d; want <= 255", n)
 | 
						|
	}
 | 
						|
	b.writeMap("variantIndex", variantIndex)
 | 
						|
	b.writeConst("variantNumSpecialized", numSpecialized)
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeLanguageInfo() {
 | 
						|
}
 | 
						|
 | 
						|
// writeLikelyData writes tables that are used both for finding parent relations and for
 | 
						|
// language matching.  Each entry contains additional bits to indicate the status of the
 | 
						|
// data to know when it cannot be used for parent relations.
 | 
						|
func (b *builder) writeLikelyData() {
 | 
						|
	const (
 | 
						|
		isList = 1 << iota
 | 
						|
		scriptInFrom
 | 
						|
		regionInFrom
 | 
						|
	)
 | 
						|
	type ( // generated types
 | 
						|
		likelyScriptRegion struct {
 | 
						|
			region uint16
 | 
						|
			script uint8
 | 
						|
			flags  uint8
 | 
						|
		}
 | 
						|
		likelyLangScript struct {
 | 
						|
			lang   uint16
 | 
						|
			script uint8
 | 
						|
			flags  uint8
 | 
						|
		}
 | 
						|
		likelyLangRegion struct {
 | 
						|
			lang   uint16
 | 
						|
			region uint16
 | 
						|
		}
 | 
						|
		// likelyTag is used for getting likely tags for group regions, where
 | 
						|
		// the likely region might be a region contained in the group.
 | 
						|
		likelyTag struct {
 | 
						|
			lang   uint16
 | 
						|
			region uint16
 | 
						|
			script uint8
 | 
						|
		}
 | 
						|
	)
 | 
						|
	var ( // generated variables
 | 
						|
		likelyRegionGroup = make([]likelyTag, len(b.groups))
 | 
						|
		likelyLang        = make([]likelyScriptRegion, len(b.lang.s))
 | 
						|
		likelyRegion      = make([]likelyLangScript, len(b.region.s))
 | 
						|
		likelyScript      = make([]likelyLangRegion, len(b.script.s))
 | 
						|
		likelyLangList    = []likelyScriptRegion{}
 | 
						|
		likelyRegionList  = []likelyLangScript{}
 | 
						|
	)
 | 
						|
	type fromTo struct {
 | 
						|
		from, to []string
 | 
						|
	}
 | 
						|
	langToOther := map[int][]fromTo{}
 | 
						|
	regionToOther := map[int][]fromTo{}
 | 
						|
	for _, m := range b.supp.LikelySubtags.LikelySubtag {
 | 
						|
		from := strings.Split(m.From, "_")
 | 
						|
		to := strings.Split(m.To, "_")
 | 
						|
		if len(to) != 3 {
 | 
						|
			log.Fatalf("invalid number of subtags in %q: found %d, want 3", m.To, len(to))
 | 
						|
		}
 | 
						|
		if len(from) > 3 {
 | 
						|
			log.Fatalf("invalid number of subtags: found %d, want 1-3", len(from))
 | 
						|
		}
 | 
						|
		if from[0] != to[0] && from[0] != "und" {
 | 
						|
			log.Fatalf("unexpected language change in expansion: %s -> %s", from, to)
 | 
						|
		}
 | 
						|
		if len(from) == 3 {
 | 
						|
			if from[2] != to[2] {
 | 
						|
				log.Fatalf("unexpected region change in expansion: %s -> %s", from, to)
 | 
						|
			}
 | 
						|
			if from[0] != "und" {
 | 
						|
				log.Fatalf("unexpected fully specified from tag: %s -> %s", from, to)
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if len(from) == 1 || from[0] != "und" {
 | 
						|
			id := 0
 | 
						|
			if from[0] != "und" {
 | 
						|
				id = b.lang.index(from[0])
 | 
						|
			}
 | 
						|
			langToOther[id] = append(langToOther[id], fromTo{from, to})
 | 
						|
		} else if len(from) == 2 && len(from[1]) == 4 {
 | 
						|
			sid := b.script.index(from[1])
 | 
						|
			likelyScript[sid].lang = uint16(b.langIndex(to[0]))
 | 
						|
			likelyScript[sid].region = uint16(b.region.index(to[2]))
 | 
						|
		} else {
 | 
						|
			r := b.region.index(from[len(from)-1])
 | 
						|
			if id, ok := b.groups[r]; ok {
 | 
						|
				if from[0] != "und" {
 | 
						|
					log.Fatalf("region changed unexpectedly: %s -> %s", from, to)
 | 
						|
				}
 | 
						|
				likelyRegionGroup[id].lang = uint16(b.langIndex(to[0]))
 | 
						|
				likelyRegionGroup[id].script = uint8(b.script.index(to[1]))
 | 
						|
				likelyRegionGroup[id].region = uint16(b.region.index(to[2]))
 | 
						|
			} else {
 | 
						|
				regionToOther[r] = append(regionToOther[r], fromTo{from, to})
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeType(likelyLangRegion{})
 | 
						|
	b.writeSlice("likelyScript", likelyScript)
 | 
						|
 | 
						|
	for id := range b.lang.s {
 | 
						|
		list := langToOther[id]
 | 
						|
		if len(list) == 1 {
 | 
						|
			likelyLang[id].region = uint16(b.region.index(list[0].to[2]))
 | 
						|
			likelyLang[id].script = uint8(b.script.index(list[0].to[1]))
 | 
						|
		} else if len(list) > 1 {
 | 
						|
			likelyLang[id].flags = isList
 | 
						|
			likelyLang[id].region = uint16(len(likelyLangList))
 | 
						|
			likelyLang[id].script = uint8(len(list))
 | 
						|
			for _, x := range list {
 | 
						|
				flags := uint8(0)
 | 
						|
				if len(x.from) > 1 {
 | 
						|
					if x.from[1] == x.to[2] {
 | 
						|
						flags = regionInFrom
 | 
						|
					} else {
 | 
						|
						flags = scriptInFrom
 | 
						|
					}
 | 
						|
				}
 | 
						|
				likelyLangList = append(likelyLangList, likelyScriptRegion{
 | 
						|
					region: uint16(b.region.index(x.to[2])),
 | 
						|
					script: uint8(b.script.index(x.to[1])),
 | 
						|
					flags:  flags,
 | 
						|
				})
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// TODO: merge suppressScript data with this table.
 | 
						|
	b.writeType(likelyScriptRegion{})
 | 
						|
	b.writeSlice("likelyLang", likelyLang)
 | 
						|
	b.writeSlice("likelyLangList", likelyLangList)
 | 
						|
 | 
						|
	for id := range b.region.s {
 | 
						|
		list := regionToOther[id]
 | 
						|
		if len(list) == 1 {
 | 
						|
			likelyRegion[id].lang = uint16(b.langIndex(list[0].to[0]))
 | 
						|
			likelyRegion[id].script = uint8(b.script.index(list[0].to[1]))
 | 
						|
			if len(list[0].from) > 2 {
 | 
						|
				likelyRegion[id].flags = scriptInFrom
 | 
						|
			}
 | 
						|
		} else if len(list) > 1 {
 | 
						|
			likelyRegion[id].flags = isList
 | 
						|
			likelyRegion[id].lang = uint16(len(likelyRegionList))
 | 
						|
			likelyRegion[id].script = uint8(len(list))
 | 
						|
			for i, x := range list {
 | 
						|
				if len(x.from) == 2 && i != 0 || i > 0 && len(x.from) != 3 {
 | 
						|
					log.Fatalf("unspecified script must be first in list: %v at %d", x.from, i)
 | 
						|
				}
 | 
						|
				x := likelyLangScript{
 | 
						|
					lang:   uint16(b.langIndex(x.to[0])),
 | 
						|
					script: uint8(b.script.index(x.to[1])),
 | 
						|
				}
 | 
						|
				if len(list[0].from) > 2 {
 | 
						|
					x.flags = scriptInFrom
 | 
						|
				}
 | 
						|
				likelyRegionList = append(likelyRegionList, x)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeType(likelyLangScript{})
 | 
						|
	b.writeSlice("likelyRegion", likelyRegion)
 | 
						|
	b.writeSlice("likelyRegionList", likelyRegionList)
 | 
						|
 | 
						|
	b.writeType(likelyTag{})
 | 
						|
	b.writeSlice("likelyRegionGroup", likelyRegionGroup)
 | 
						|
}
 | 
						|
 | 
						|
type mutualIntelligibility struct {
 | 
						|
	want, have uint16
 | 
						|
	conf       uint8
 | 
						|
	oneway     bool
 | 
						|
}
 | 
						|
 | 
						|
type scriptIntelligibility struct {
 | 
						|
	lang       uint16 // langID or 0 if *
 | 
						|
	want, have uint8
 | 
						|
	conf       uint8
 | 
						|
}
 | 
						|
 | 
						|
type sortByConf []mutualIntelligibility
 | 
						|
 | 
						|
func (l sortByConf) Less(a, b int) bool {
 | 
						|
	return l[a].conf > l[b].conf
 | 
						|
}
 | 
						|
 | 
						|
func (l sortByConf) Swap(a, b int) {
 | 
						|
	l[a], l[b] = l[b], l[a]
 | 
						|
}
 | 
						|
 | 
						|
func (l sortByConf) Len() int {
 | 
						|
	return len(l)
 | 
						|
}
 | 
						|
 | 
						|
// toConf converts a percentage value [0, 100] to a confidence class.
 | 
						|
func toConf(pct uint8) uint8 {
 | 
						|
	switch {
 | 
						|
	case pct == 100:
 | 
						|
		return 3 // Exact
 | 
						|
	case pct >= 90:
 | 
						|
		return 2 // High
 | 
						|
	case pct > 50:
 | 
						|
		return 1 // Low
 | 
						|
	default:
 | 
						|
		return 0 // No
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// writeMatchData writes tables with languages and scripts for which there is
 | 
						|
// mutual intelligibility. The data is based on CLDR's languageMatching data.
 | 
						|
// Note that we use a different algorithm than the one defined by CLDR and that
 | 
						|
// we slightly modify the data. For example, we convert scores to confidence levels.
 | 
						|
// We also drop all region-related data as we use a different algorithm to
 | 
						|
// determine region equivalence.
 | 
						|
func (b *builder) writeMatchData() {
 | 
						|
	b.writeType(mutualIntelligibility{})
 | 
						|
	b.writeType(scriptIntelligibility{})
 | 
						|
	lm := b.supp.LanguageMatching.LanguageMatches
 | 
						|
	cldr.MakeSlice(&lm).SelectAnyOf("type", "written")
 | 
						|
 | 
						|
	matchLang := []mutualIntelligibility{}
 | 
						|
	matchScript := []scriptIntelligibility{}
 | 
						|
	// Convert the languageMatch entries in lists keyed by desired language.
 | 
						|
	for _, m := range lm[0].LanguageMatch {
 | 
						|
		// Different versions of CLDR use different separators.
 | 
						|
		desired := strings.Replace(m.Desired, "-", "_", -1)
 | 
						|
		supported := strings.Replace(m.Supported, "-", "_", -1)
 | 
						|
		d := strings.Split(desired, "_")
 | 
						|
		s := strings.Split(supported, "_")
 | 
						|
		if len(d) != len(s) || len(d) > 2 {
 | 
						|
			// Skip all entries with regions and work around CLDR bug.
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		pct, _ := strconv.ParseInt(m.Percent, 10, 8)
 | 
						|
		if len(d) == 2 && d[0] == s[0] && len(d[1]) == 4 {
 | 
						|
			// language-script pair.
 | 
						|
			lang := uint16(0)
 | 
						|
			if d[0] != "*" {
 | 
						|
				lang = uint16(b.langIndex(d[0]))
 | 
						|
			}
 | 
						|
			matchScript = append(matchScript, scriptIntelligibility{
 | 
						|
				lang: lang,
 | 
						|
				want: uint8(b.script.index(d[1])),
 | 
						|
				have: uint8(b.script.index(s[1])),
 | 
						|
				conf: toConf(uint8(pct)),
 | 
						|
			})
 | 
						|
			if m.Oneway != "true" {
 | 
						|
				matchScript = append(matchScript, scriptIntelligibility{
 | 
						|
					lang: lang,
 | 
						|
					want: uint8(b.script.index(s[1])),
 | 
						|
					have: uint8(b.script.index(d[1])),
 | 
						|
					conf: toConf(uint8(pct)),
 | 
						|
				})
 | 
						|
			}
 | 
						|
		} else if len(d) == 1 && d[0] != "*" {
 | 
						|
			if pct == 100 {
 | 
						|
				// nb == no is already handled by macro mapping. Check there
 | 
						|
				// really is only this case.
 | 
						|
				if d[0] != "no" || s[0] != "nb" {
 | 
						|
					log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
 | 
						|
				}
 | 
						|
				continue
 | 
						|
			}
 | 
						|
			matchLang = append(matchLang, mutualIntelligibility{
 | 
						|
				want:   uint16(b.langIndex(d[0])),
 | 
						|
				have:   uint16(b.langIndex(s[0])),
 | 
						|
				conf:   uint8(pct),
 | 
						|
				oneway: m.Oneway == "true",
 | 
						|
			})
 | 
						|
		} else {
 | 
						|
			// TODO: Handle other mappings.
 | 
						|
			a := []string{"*;*", "*_*;*_*", "es_MX;es_419"}
 | 
						|
			s := strings.Join([]string{desired, supported}, ";")
 | 
						|
			if i := sort.SearchStrings(a, s); i == len(a) || a[i] != s {
 | 
						|
				log.Printf("%q not handled", s)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	sort.Stable(sortByConf(matchLang))
 | 
						|
	// collapse percentage into confidence classes
 | 
						|
	for i, m := range matchLang {
 | 
						|
		matchLang[i].conf = toConf(m.conf)
 | 
						|
	}
 | 
						|
	b.writeSlice("matchLang", matchLang)
 | 
						|
	b.writeSlice("matchScript", matchScript)
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeRegionInclusionData() {
 | 
						|
	var (
 | 
						|
		// mm holds for each group the set of groups with a distance of 1.
 | 
						|
		mm = make(map[int][]index)
 | 
						|
 | 
						|
		// containment holds for each group the transitive closure of
 | 
						|
		// containment of other groups.
 | 
						|
		containment = make(map[index][]index)
 | 
						|
	)
 | 
						|
	for _, g := range b.supp.TerritoryContainment.Group {
 | 
						|
		// Skip UN and EURO zone as they are flattening the containment
 | 
						|
		// relationship.
 | 
						|
		if g.Type == "EZ" || g.Type == "UN" {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		group := b.region.index(g.Type)
 | 
						|
		groupIdx := b.groups[group]
 | 
						|
		for _, mem := range strings.Split(g.Contains, " ") {
 | 
						|
			r := b.region.index(mem)
 | 
						|
			mm[r] = append(mm[r], groupIdx)
 | 
						|
			if g, ok := b.groups[r]; ok {
 | 
						|
				mm[group] = append(mm[group], g)
 | 
						|
				containment[groupIdx] = append(containment[groupIdx], g)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	regionContainment := make([]uint32, len(b.groups))
 | 
						|
	for _, g := range b.groups {
 | 
						|
		l := containment[g]
 | 
						|
 | 
						|
		// Compute the transitive closure of containment.
 | 
						|
		for i := 0; i < len(l); i++ {
 | 
						|
			l = append(l, containment[l[i]]...)
 | 
						|
		}
 | 
						|
 | 
						|
		// Compute the bitmask.
 | 
						|
		regionContainment[g] = 1 << g
 | 
						|
		for _, v := range l {
 | 
						|
			regionContainment[g] |= 1 << v
 | 
						|
		}
 | 
						|
		// log.Printf("%d: %X", g, regionContainment[g])
 | 
						|
	}
 | 
						|
	b.writeSlice("regionContainment", regionContainment)
 | 
						|
 | 
						|
	regionInclusion := make([]uint8, len(b.region.s))
 | 
						|
	bvs := make(map[uint32]index)
 | 
						|
	// Make the first bitvector positions correspond with the groups.
 | 
						|
	for r, i := range b.groups {
 | 
						|
		bv := uint32(1 << i)
 | 
						|
		for _, g := range mm[r] {
 | 
						|
			bv |= 1 << g
 | 
						|
		}
 | 
						|
		bvs[bv] = i
 | 
						|
		regionInclusion[r] = uint8(bvs[bv])
 | 
						|
	}
 | 
						|
	for r := 1; r < len(b.region.s); r++ {
 | 
						|
		if _, ok := b.groups[r]; !ok {
 | 
						|
			bv := uint32(0)
 | 
						|
			for _, g := range mm[r] {
 | 
						|
				bv |= 1 << g
 | 
						|
			}
 | 
						|
			if bv == 0 {
 | 
						|
				// Pick the world for unspecified regions.
 | 
						|
				bv = 1 << b.groups[b.region.index("001")]
 | 
						|
			}
 | 
						|
			if _, ok := bvs[bv]; !ok {
 | 
						|
				bvs[bv] = index(len(bvs))
 | 
						|
			}
 | 
						|
			regionInclusion[r] = uint8(bvs[bv])
 | 
						|
		}
 | 
						|
	}
 | 
						|
	b.writeSlice("regionInclusion", regionInclusion)
 | 
						|
	regionInclusionBits := make([]uint32, len(bvs))
 | 
						|
	for k, v := range bvs {
 | 
						|
		regionInclusionBits[v] = uint32(k)
 | 
						|
	}
 | 
						|
	// Add bit vectors for increasingly large distances until a fixed point is reached.
 | 
						|
	regionInclusionNext := []uint8{}
 | 
						|
	for i := 0; i < len(regionInclusionBits); i++ {
 | 
						|
		bits := regionInclusionBits[i]
 | 
						|
		next := bits
 | 
						|
		for i := uint(0); i < uint(len(b.groups)); i++ {
 | 
						|
			if bits&(1<<i) != 0 {
 | 
						|
				next |= regionInclusionBits[i]
 | 
						|
			}
 | 
						|
		}
 | 
						|
		if _, ok := bvs[next]; !ok {
 | 
						|
			bvs[next] = index(len(bvs))
 | 
						|
			regionInclusionBits = append(regionInclusionBits, next)
 | 
						|
		}
 | 
						|
		regionInclusionNext = append(regionInclusionNext, uint8(bvs[next]))
 | 
						|
	}
 | 
						|
	b.writeSlice("regionInclusionBits", regionInclusionBits)
 | 
						|
	b.writeSlice("regionInclusionNext", regionInclusionNext)
 | 
						|
}
 | 
						|
 | 
						|
type parentRel struct {
 | 
						|
	lang       uint16
 | 
						|
	script     uint8
 | 
						|
	maxScript  uint8
 | 
						|
	toRegion   uint16
 | 
						|
	fromRegion []uint16
 | 
						|
}
 | 
						|
 | 
						|
func (b *builder) writeParents() {
 | 
						|
	b.writeType(parentRel{})
 | 
						|
 | 
						|
	parents := []parentRel{}
 | 
						|
 | 
						|
	// Construct parent overrides.
 | 
						|
	n := 0
 | 
						|
	for _, p := range b.data.Supplemental().ParentLocales.ParentLocale {
 | 
						|
		// Skipping non-standard scripts to root is implemented using addTags.
 | 
						|
		if p.Parent == "root" {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		sub := strings.Split(p.Parent, "_")
 | 
						|
		parent := parentRel{lang: b.langIndex(sub[0])}
 | 
						|
		if len(sub) == 2 {
 | 
						|
			// TODO: check that all undefined scripts are indeed Latn in these
 | 
						|
			// cases.
 | 
						|
			parent.maxScript = uint8(b.script.index("Latn"))
 | 
						|
			parent.toRegion = uint16(b.region.index(sub[1]))
 | 
						|
		} else {
 | 
						|
			parent.script = uint8(b.script.index(sub[1]))
 | 
						|
			parent.maxScript = parent.script
 | 
						|
			parent.toRegion = uint16(b.region.index(sub[2]))
 | 
						|
		}
 | 
						|
		for _, c := range strings.Split(p.Locales, " ") {
 | 
						|
			region := b.region.index(c[strings.LastIndex(c, "_")+1:])
 | 
						|
			parent.fromRegion = append(parent.fromRegion, uint16(region))
 | 
						|
		}
 | 
						|
		parents = append(parents, parent)
 | 
						|
		n += len(parent.fromRegion)
 | 
						|
	}
 | 
						|
	b.writeSliceAddSize("parents", n*2, parents)
 | 
						|
}
 | 
						|
 | 
						|
func main() {
 | 
						|
	gen.Init()
 | 
						|
 | 
						|
	gen.Repackage("gen_common.go", "common.go", "language")
 | 
						|
 | 
						|
	w := gen.NewCodeWriter()
 | 
						|
	defer w.WriteGoFile("tables.go", "language")
 | 
						|
 | 
						|
	fmt.Fprintln(w, `import "golang.org/x/text/internal/tag"`)
 | 
						|
 | 
						|
	b := newBuilder(w)
 | 
						|
	gen.WriteCLDRVersion(w)
 | 
						|
 | 
						|
	b.parseIndices()
 | 
						|
	b.writeType(fromTo{})
 | 
						|
	b.writeLanguage()
 | 
						|
	b.writeScript()
 | 
						|
	b.writeRegion()
 | 
						|
	b.writeVariant()
 | 
						|
	// TODO: b.writeLocale()
 | 
						|
	b.computeRegionGroups()
 | 
						|
	b.writeLikelyData()
 | 
						|
	b.writeMatchData()
 | 
						|
	b.writeRegionInclusionData()
 | 
						|
	b.writeParents()
 | 
						|
}
 |