blob: 987fc169cc04a1b914770a475a619d26cbcf2c21 [file] [log] [blame]
Don Newton98fd8812019-09-23 15:15:02 -04001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9import (
10 "flag"
11 "log"
12
13 "golang.org/x/text/internal/gen"
14 "golang.org/x/text/internal/triegen"
15 "golang.org/x/text/internal/ucd"
16)
17
18var outputFile = flag.String("out", "tables.go", "output file")
19
20func main() {
21 gen.Init()
22 gen.Repackage("gen_trieval.go", "trieval.go", "bidi")
23 gen.Repackage("gen_ranges.go", "ranges_test.go", "bidi")
24
25 genTables()
26}
27
28// bidiClass names and codes taken from class "bc" in
Don Newton7577f072020-01-06 12:41:11 -050029// https://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
Don Newton98fd8812019-09-23 15:15:02 -040030var bidiClass = map[string]Class{
31 "AL": AL, // ArabicLetter
32 "AN": AN, // ArabicNumber
33 "B": B, // ParagraphSeparator
34 "BN": BN, // BoundaryNeutral
35 "CS": CS, // CommonSeparator
36 "EN": EN, // EuropeanNumber
37 "ES": ES, // EuropeanSeparator
38 "ET": ET, // EuropeanTerminator
39 "L": L, // LeftToRight
40 "NSM": NSM, // NonspacingMark
41 "ON": ON, // OtherNeutral
42 "R": R, // RightToLeft
43 "S": S, // SegmentSeparator
44 "WS": WS, // WhiteSpace
45
46 "FSI": Control,
47 "PDF": Control,
48 "PDI": Control,
49 "LRE": Control,
50 "LRI": Control,
51 "LRO": Control,
52 "RLE": Control,
53 "RLI": Control,
54 "RLO": Control,
55}
56
57func genTables() {
58 if numClass > 0x0F {
59 log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
60 }
61 w := gen.NewCodeWriter()
62 defer w.WriteVersionedGoFile(*outputFile, "bidi")
63
64 gen.WriteUnicodeVersion(w)
65
66 t := triegen.NewTrie("bidi")
67
68 // Build data about bracket mapping. These bits need to be or-ed with
69 // any other bits.
70 orMask := map[rune]uint64{}
71
72 xorMap := map[rune]int{}
73 xorMasks := []rune{0} // First value is no-op.
74
75 ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
76 r1 := p.Rune(0)
77 r2 := p.Rune(1)
78 xor := r1 ^ r2
79 if _, ok := xorMap[xor]; !ok {
80 xorMap[xor] = len(xorMasks)
81 xorMasks = append(xorMasks, xor)
82 }
83 entry := uint64(xorMap[xor]) << xorMaskShift
84 switch p.String(2) {
85 case "o":
86 entry |= openMask
87 case "c", "n":
88 default:
89 log.Fatalf("Unknown bracket class %q.", p.String(2))
90 }
91 orMask[r1] = entry
92 })
93
94 w.WriteComment(`
95 xorMasks contains masks to be xor-ed with brackets to get the reverse
96 version.`)
97 w.WriteVar("xorMasks", xorMasks)
98
99 done := map[rune]bool{}
100
101 insert := func(r rune, c Class) {
102 if !done[r] {
103 t.Insert(r, orMask[r]|uint64(c))
104 done[r] = true
105 }
106 }
107
108 // Insert the derived BiDi properties.
109 ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
110 r := p.Rune(0)
111 class, ok := bidiClass[p.String(1)]
112 if !ok {
113 log.Fatalf("%U: Unknown BiDi class %q", r, p.String(1))
114 }
115 insert(r, class)
116 })
117 visitDefaults(insert)
118
119 // TODO: use sparse blocks. This would reduce table size considerably
120 // from the looks of it.
121
122 sz, err := t.Gen(w)
123 if err != nil {
124 log.Fatal(err)
125 }
126 w.Size += sz
127}
128
129// dummy values to make methods in gen_common compile. The real versions
130// will be generated by this file to tables.go.
131var (
132 xorMasks []rune
133)