Don Newton | 379ae25 | 2019-04-01 12:17:06 -0400 | [diff] [blame^] | 1 | // Copyright 2013 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package cldr |
| 6 | |
| 7 | import ( |
| 8 | "archive/zip" |
| 9 | "bytes" |
| 10 | "encoding/xml" |
| 11 | "fmt" |
| 12 | "io" |
| 13 | "io/ioutil" |
| 14 | "log" |
| 15 | "os" |
| 16 | "path/filepath" |
| 17 | "regexp" |
| 18 | ) |
| 19 | |
| 20 | // A Decoder loads an archive of CLDR data. |
| 21 | type Decoder struct { |
| 22 | dirFilter []string |
| 23 | sectionFilter []string |
| 24 | loader Loader |
| 25 | cldr *CLDR |
| 26 | curLocale string |
| 27 | } |
| 28 | |
| 29 | // SetSectionFilter takes a list top-level LDML element names to which |
| 30 | // evaluation of LDML should be limited. It automatically calls SetDirFilter. |
| 31 | func (d *Decoder) SetSectionFilter(filter ...string) { |
| 32 | d.sectionFilter = filter |
| 33 | // TODO: automatically set dir filter |
| 34 | } |
| 35 | |
| 36 | // SetDirFilter limits the loading of LDML XML files of the specied directories. |
| 37 | // Note that sections may be split across directories differently for different CLDR versions. |
| 38 | // For more robust code, use SetSectionFilter. |
| 39 | func (d *Decoder) SetDirFilter(dir ...string) { |
| 40 | d.dirFilter = dir |
| 41 | } |
| 42 | |
| 43 | // A Loader provides access to the files of a CLDR archive. |
| 44 | type Loader interface { |
| 45 | Len() int |
| 46 | Path(i int) string |
| 47 | Reader(i int) (io.ReadCloser, error) |
| 48 | } |
| 49 | |
| 50 | var fileRe = regexp.MustCompile(`.*[/\\](.*)[/\\](.*)\.xml`) |
| 51 | |
| 52 | // Decode loads and decodes the files represented by l. |
| 53 | func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error) { |
| 54 | d.cldr = makeCLDR() |
| 55 | for i := 0; i < l.Len(); i++ { |
| 56 | fname := l.Path(i) |
| 57 | if m := fileRe.FindStringSubmatch(fname); m != nil { |
| 58 | if len(d.dirFilter) > 0 && !in(d.dirFilter, m[1]) { |
| 59 | continue |
| 60 | } |
| 61 | var r io.Reader |
| 62 | if r, err = l.Reader(i); err == nil { |
| 63 | err = d.decode(m[1], m[2], r) |
| 64 | } |
| 65 | if err != nil { |
| 66 | return nil, err |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | d.cldr.finalize(d.sectionFilter) |
| 71 | return d.cldr, nil |
| 72 | } |
| 73 | |
| 74 | func (d *Decoder) decode(dir, id string, r io.Reader) error { |
| 75 | var v interface{} |
| 76 | var l *LDML |
| 77 | cldr := d.cldr |
| 78 | switch { |
| 79 | case dir == "supplemental": |
| 80 | v = cldr.supp |
| 81 | case dir == "transforms": |
| 82 | return nil |
| 83 | case dir == "bcp47": |
| 84 | v = cldr.bcp47 |
| 85 | case dir == "validity": |
| 86 | return nil |
| 87 | default: |
| 88 | ok := false |
| 89 | if v, ok = cldr.locale[id]; !ok { |
| 90 | l = &LDML{} |
| 91 | v, cldr.locale[id] = l, l |
| 92 | } |
| 93 | } |
| 94 | x := xml.NewDecoder(r) |
| 95 | if err := x.Decode(v); err != nil { |
| 96 | log.Printf("%s/%s: %v", dir, id, err) |
| 97 | return err |
| 98 | } |
| 99 | if l != nil { |
| 100 | if l.Identity == nil { |
| 101 | return fmt.Errorf("%s/%s: missing identity element", dir, id) |
| 102 | } |
| 103 | // TODO: verify when CLDR bug http://unicode.org/cldr/trac/ticket/8970 |
| 104 | // is resolved. |
| 105 | // path := strings.Split(id, "_") |
| 106 | // if lang := l.Identity.Language.Type; lang != path[0] { |
| 107 | // return fmt.Errorf("%s/%s: language was %s; want %s", dir, id, lang, path[0]) |
| 108 | // } |
| 109 | } |
| 110 | return nil |
| 111 | } |
| 112 | |
| 113 | type pathLoader []string |
| 114 | |
| 115 | func makePathLoader(path string) (pl pathLoader, err error) { |
| 116 | err = filepath.Walk(path, func(path string, _ os.FileInfo, err error) error { |
| 117 | pl = append(pl, path) |
| 118 | return err |
| 119 | }) |
| 120 | return pl, err |
| 121 | } |
| 122 | |
| 123 | func (pl pathLoader) Len() int { |
| 124 | return len(pl) |
| 125 | } |
| 126 | |
| 127 | func (pl pathLoader) Path(i int) string { |
| 128 | return pl[i] |
| 129 | } |
| 130 | |
| 131 | func (pl pathLoader) Reader(i int) (io.ReadCloser, error) { |
| 132 | return os.Open(pl[i]) |
| 133 | } |
| 134 | |
| 135 | // DecodePath loads CLDR data from the given path. |
| 136 | func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error) { |
| 137 | loader, err := makePathLoader(path) |
| 138 | if err != nil { |
| 139 | return nil, err |
| 140 | } |
| 141 | return d.Decode(loader) |
| 142 | } |
| 143 | |
| 144 | type zipLoader struct { |
| 145 | r *zip.Reader |
| 146 | } |
| 147 | |
| 148 | func (zl zipLoader) Len() int { |
| 149 | return len(zl.r.File) |
| 150 | } |
| 151 | |
| 152 | func (zl zipLoader) Path(i int) string { |
| 153 | return zl.r.File[i].Name |
| 154 | } |
| 155 | |
| 156 | func (zl zipLoader) Reader(i int) (io.ReadCloser, error) { |
| 157 | return zl.r.File[i].Open() |
| 158 | } |
| 159 | |
| 160 | // DecodeZip loads CLDR data from the zip archive for which r is the source. |
| 161 | func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error) { |
| 162 | buffer, err := ioutil.ReadAll(r) |
| 163 | if err != nil { |
| 164 | return nil, err |
| 165 | } |
| 166 | archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer))) |
| 167 | if err != nil { |
| 168 | return nil, err |
| 169 | } |
| 170 | return d.Decode(zipLoader{archive}) |
| 171 | } |