William Kurkian | ea86948 | 2019-04-09 15:16:11 -0400 | [diff] [blame] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // +build ignore |
| 6 | |
| 7 | // Trie table generator. |
| 8 | // Used by make*tables tools to generate a go file with trie data structures |
| 9 | // for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte |
| 10 | // sequence are used to lookup offsets in the index table to be used for the |
| 11 | // next byte. The last byte is used to index into a table with 16-bit values. |
| 12 | |
| 13 | package main |
| 14 | |
| 15 | import ( |
| 16 | "fmt" |
| 17 | "io" |
| 18 | ) |
| 19 | |
| 20 | const maxSparseEntries = 16 |
| 21 | |
| 22 | type normCompacter struct { |
| 23 | sparseBlocks [][]uint64 |
| 24 | sparseOffset []uint16 |
| 25 | sparseCount int |
| 26 | name string |
| 27 | } |
| 28 | |
| 29 | func mostFrequentStride(a []uint64) int { |
| 30 | counts := make(map[int]int) |
| 31 | var v int |
| 32 | for _, x := range a { |
| 33 | if stride := int(x) - v; v != 0 && stride >= 0 { |
| 34 | counts[stride]++ |
| 35 | } |
| 36 | v = int(x) |
| 37 | } |
| 38 | var maxs, maxc int |
| 39 | for stride, cnt := range counts { |
| 40 | if cnt > maxc || (cnt == maxc && stride < maxs) { |
| 41 | maxs, maxc = stride, cnt |
| 42 | } |
| 43 | } |
| 44 | return maxs |
| 45 | } |
| 46 | |
| 47 | func countSparseEntries(a []uint64) int { |
| 48 | stride := mostFrequentStride(a) |
| 49 | var v, count int |
| 50 | for _, tv := range a { |
| 51 | if int(tv)-v != stride { |
| 52 | if tv != 0 { |
| 53 | count++ |
| 54 | } |
| 55 | } |
| 56 | v = int(tv) |
| 57 | } |
| 58 | return count |
| 59 | } |
| 60 | |
| 61 | func (c *normCompacter) Size(v []uint64) (sz int, ok bool) { |
| 62 | if n := countSparseEntries(v); n <= maxSparseEntries { |
| 63 | return (n+1)*4 + 2, true |
| 64 | } |
| 65 | return 0, false |
| 66 | } |
| 67 | |
| 68 | func (c *normCompacter) Store(v []uint64) uint32 { |
| 69 | h := uint32(len(c.sparseOffset)) |
| 70 | c.sparseBlocks = append(c.sparseBlocks, v) |
| 71 | c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount)) |
| 72 | c.sparseCount += countSparseEntries(v) + 1 |
| 73 | return h |
| 74 | } |
| 75 | |
| 76 | func (c *normCompacter) Handler() string { |
| 77 | return c.name + "Sparse.lookup" |
| 78 | } |
| 79 | |
| 80 | func (c *normCompacter) Print(w io.Writer) (retErr error) { |
| 81 | p := func(f string, x ...interface{}) { |
| 82 | if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil { |
| 83 | retErr = err |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | ls := len(c.sparseBlocks) |
| 88 | p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2) |
| 89 | p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset) |
| 90 | |
| 91 | ns := c.sparseCount |
| 92 | p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4) |
| 93 | p("var %sSparseValues = [%d]valueRange {", c.name, ns) |
| 94 | for i, b := range c.sparseBlocks { |
| 95 | p("\n// Block %#x, offset %#x", i, c.sparseOffset[i]) |
| 96 | var v int |
| 97 | stride := mostFrequentStride(b) |
| 98 | n := countSparseEntries(b) |
| 99 | p("\n{value:%#04x,lo:%#02x},", stride, uint8(n)) |
| 100 | for i, nv := range b { |
| 101 | if int(nv)-v != stride { |
| 102 | if v != 0 { |
| 103 | p(",hi:%#02x},", 0x80+i-1) |
| 104 | } |
| 105 | if nv != 0 { |
| 106 | p("\n{value:%#04x,lo:%#02x", nv, 0x80+i) |
| 107 | } |
| 108 | } |
| 109 | v = int(nv) |
| 110 | } |
| 111 | if v != 0 { |
| 112 | p(",hi:%#02x},", 0x80+len(b)-1) |
| 113 | } |
| 114 | } |
| 115 | p("\n}\n\n") |
| 116 | return |
| 117 | } |