blob: fd057601bd9178c5def359f002b06ef0769bd6f3 [file] [log] [blame]
khenaidooac637102019-01-14 15:44:34 -05001// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:generate go run gen.go gen_trieval.go gen_ranges.go
6
7// Package bidi contains functionality for bidirectional text support.
8//
Scott Baker8461e152019-10-01 14:44:30 -07009// See https://www.unicode.org/reports/tr9.
khenaidooac637102019-01-14 15:44:34 -050010//
11// NOTE: UNDER CONSTRUCTION. This API may change in backwards incompatible ways
12// and without notice.
13package bidi // import "golang.org/x/text/unicode/bidi"
14
khenaidood948f772021-08-11 17:49:24 -040015// TODO
khenaidooac637102019-01-14 15:44:34 -050016// - Transformer for reordering?
17// - Transformer (validator, really) for Bidi Rule.
18
khenaidood948f772021-08-11 17:49:24 -040019import (
20 "bytes"
21)
22
khenaidooac637102019-01-14 15:44:34 -050023// This API tries to avoid dealing with embedding levels for now. Under the hood
24// these will be computed, but the question is to which extent the user should
25// know they exist. We should at some point allow the user to specify an
26// embedding hierarchy, though.
27
28// A Direction indicates the overall flow of text.
29type Direction int
30
31const (
32 // LeftToRight indicates the text contains no right-to-left characters and
33 // that either there are some left-to-right characters or the option
34 // DefaultDirection(LeftToRight) was passed.
35 LeftToRight Direction = iota
36
37 // RightToLeft indicates the text contains no left-to-right characters and
38 // that either there are some right-to-left characters or the option
39 // DefaultDirection(RightToLeft) was passed.
40 RightToLeft
41
42 // Mixed indicates text contains both left-to-right and right-to-left
43 // characters.
44 Mixed
45
46 // Neutral means that text contains no left-to-right and right-to-left
47 // characters and that no default direction has been set.
48 Neutral
49)
50
khenaidood948f772021-08-11 17:49:24 -040051type options struct {
52 defaultDirection Direction
53}
khenaidooac637102019-01-14 15:44:34 -050054
55// An Option is an option for Bidi processing.
56type Option func(*options)
57
58// ICU allows the user to define embedding levels. This may be used, for example,
59// to use hierarchical structure of markup languages to define embeddings.
60// The following option may be a way to expose this functionality in this API.
61// // LevelFunc sets a function that associates nesting levels with the given text.
62// // The levels function will be called with monotonically increasing values for p.
63// func LevelFunc(levels func(p int) int) Option {
64// panic("unimplemented")
65// }
66
67// DefaultDirection sets the default direction for a Paragraph. The direction is
68// overridden if the text contains directional characters.
69func DefaultDirection(d Direction) Option {
khenaidood948f772021-08-11 17:49:24 -040070 return func(opts *options) {
71 opts.defaultDirection = d
72 }
khenaidooac637102019-01-14 15:44:34 -050073}
74
75// A Paragraph holds a single Paragraph for Bidi processing.
76type Paragraph struct {
khenaidood948f772021-08-11 17:49:24 -040077 p []byte
78 o Ordering
79 opts []Option
80 types []Class
81 pairTypes []bracketType
82 pairValues []rune
83 runes []rune
84 options options
85}
86
87// Initialize the p.pairTypes, p.pairValues and p.types from the input previously
88// set by p.SetBytes() or p.SetString(). Also limit the input up to (and including) a paragraph
89// separator (bidi class B).
90//
91// The function p.Order() needs these values to be set, so this preparation could be postponed.
92// But since the SetBytes and SetStrings functions return the length of the input up to the paragraph
93// separator, the whole input needs to be processed anyway and should not be done twice.
94//
95// The function has the same return values as SetBytes() / SetString()
96func (p *Paragraph) prepareInput() (n int, err error) {
97 p.runes = bytes.Runes(p.p)
98 bytecount := 0
99 // clear slices from previous SetString or SetBytes
100 p.pairTypes = nil
101 p.pairValues = nil
102 p.types = nil
103
104 for _, r := range p.runes {
105 props, i := LookupRune(r)
106 bytecount += i
107 cls := props.Class()
108 if cls == B {
109 return bytecount, nil
110 }
111 p.types = append(p.types, cls)
112 if props.IsOpeningBracket() {
113 p.pairTypes = append(p.pairTypes, bpOpen)
114 p.pairValues = append(p.pairValues, r)
115 } else if props.IsBracket() {
116 // this must be a closing bracket,
117 // since IsOpeningBracket is not true
118 p.pairTypes = append(p.pairTypes, bpClose)
119 p.pairValues = append(p.pairValues, r)
120 } else {
121 p.pairTypes = append(p.pairTypes, bpNone)
122 p.pairValues = append(p.pairValues, 0)
123 }
124 }
125 return bytecount, nil
khenaidooac637102019-01-14 15:44:34 -0500126}
127
128// SetBytes configures p for the given paragraph text. It replaces text
129// previously set by SetBytes or SetString. If b contains a paragraph separator
130// it will only process the first paragraph and report the number of bytes
131// consumed from b including this separator. Error may be non-nil if options are
132// given.
133func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error) {
khenaidood948f772021-08-11 17:49:24 -0400134 p.p = b
135 p.opts = opts
136 return p.prepareInput()
khenaidooac637102019-01-14 15:44:34 -0500137}
138
khenaidood948f772021-08-11 17:49:24 -0400139// SetString configures s for the given paragraph text. It replaces text
140// previously set by SetBytes or SetString. If s contains a paragraph separator
khenaidooac637102019-01-14 15:44:34 -0500141// it will only process the first paragraph and report the number of bytes
khenaidood948f772021-08-11 17:49:24 -0400142// consumed from s including this separator. Error may be non-nil if options are
khenaidooac637102019-01-14 15:44:34 -0500143// given.
144func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error) {
khenaidood948f772021-08-11 17:49:24 -0400145 p.p = []byte(s)
146 p.opts = opts
147 return p.prepareInput()
khenaidooac637102019-01-14 15:44:34 -0500148}
149
150// IsLeftToRight reports whether the principle direction of rendering for this
151// paragraphs is left-to-right. If this returns false, the principle direction
152// of rendering is right-to-left.
153func (p *Paragraph) IsLeftToRight() bool {
khenaidood948f772021-08-11 17:49:24 -0400154 return p.Direction() == LeftToRight
khenaidooac637102019-01-14 15:44:34 -0500155}
156
157// Direction returns the direction of the text of this paragraph.
158//
159// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
160func (p *Paragraph) Direction() Direction {
khenaidood948f772021-08-11 17:49:24 -0400161 return p.o.Direction()
khenaidooac637102019-01-14 15:44:34 -0500162}
163
khenaidood948f772021-08-11 17:49:24 -0400164// TODO: what happens if the position is > len(input)? This should return an error.
165
khenaidooac637102019-01-14 15:44:34 -0500166// RunAt reports the Run at the given position of the input text.
167//
168// This method can be used for computing line breaks on paragraphs.
169func (p *Paragraph) RunAt(pos int) Run {
khenaidood948f772021-08-11 17:49:24 -0400170 c := 0
171 runNumber := 0
172 for i, r := range p.o.runes {
173 c += len(r)
174 if pos < c {
175 runNumber = i
176 }
177 }
178 return p.o.Run(runNumber)
179}
180
181func calculateOrdering(levels []level, runes []rune) Ordering {
182 var curDir Direction
183
184 prevDir := Neutral
185 prevI := 0
186
187 o := Ordering{}
188 // lvl = 0,2,4,...: left to right
189 // lvl = 1,3,5,...: right to left
190 for i, lvl := range levels {
191 if lvl%2 == 0 {
192 curDir = LeftToRight
193 } else {
194 curDir = RightToLeft
195 }
196 if curDir != prevDir {
197 if i > 0 {
198 o.runes = append(o.runes, runes[prevI:i])
199 o.directions = append(o.directions, prevDir)
200 o.startpos = append(o.startpos, prevI)
201 }
202 prevI = i
203 prevDir = curDir
204 }
205 }
206 o.runes = append(o.runes, runes[prevI:])
207 o.directions = append(o.directions, prevDir)
208 o.startpos = append(o.startpos, prevI)
209 return o
khenaidooac637102019-01-14 15:44:34 -0500210}
211
212// Order computes the visual ordering of all the runs in a Paragraph.
213func (p *Paragraph) Order() (Ordering, error) {
khenaidood948f772021-08-11 17:49:24 -0400214 if len(p.types) == 0 {
215 return Ordering{}, nil
216 }
217
218 for _, fn := range p.opts {
219 fn(&p.options)
220 }
221 lvl := level(-1)
222 if p.options.defaultDirection == RightToLeft {
223 lvl = 1
224 }
225 para, err := newParagraph(p.types, p.pairTypes, p.pairValues, lvl)
226 if err != nil {
227 return Ordering{}, err
228 }
229
230 levels := para.getLevels([]int{len(p.types)})
231
232 p.o = calculateOrdering(levels, p.runes)
233 return p.o, nil
khenaidooac637102019-01-14 15:44:34 -0500234}
235
236// Line computes the visual ordering of runs for a single line starting and
237// ending at the given positions in the original text.
238func (p *Paragraph) Line(start, end int) (Ordering, error) {
khenaidood948f772021-08-11 17:49:24 -0400239 lineTypes := p.types[start:end]
240 para, err := newParagraph(lineTypes, p.pairTypes[start:end], p.pairValues[start:end], -1)
241 if err != nil {
242 return Ordering{}, err
243 }
244 levels := para.getLevels([]int{len(lineTypes)})
245 o := calculateOrdering(levels, p.runes[start:end])
246 return o, nil
khenaidooac637102019-01-14 15:44:34 -0500247}
248
249// An Ordering holds the computed visual order of runs of a Paragraph. Calling
250// SetBytes or SetString on the originating Paragraph invalidates an Ordering.
251// The methods of an Ordering should only be called by one goroutine at a time.
khenaidood948f772021-08-11 17:49:24 -0400252type Ordering struct {
253 runes [][]rune
254 directions []Direction
255 startpos []int
256}
khenaidooac637102019-01-14 15:44:34 -0500257
258// Direction reports the directionality of the runs.
259//
260// The direction may be LeftToRight, RightToLeft, Mixed, or Neutral.
261func (o *Ordering) Direction() Direction {
khenaidood948f772021-08-11 17:49:24 -0400262 return o.directions[0]
khenaidooac637102019-01-14 15:44:34 -0500263}
264
265// NumRuns returns the number of runs.
266func (o *Ordering) NumRuns() int {
khenaidood948f772021-08-11 17:49:24 -0400267 return len(o.runes)
khenaidooac637102019-01-14 15:44:34 -0500268}
269
270// Run returns the ith run within the ordering.
271func (o *Ordering) Run(i int) Run {
khenaidood948f772021-08-11 17:49:24 -0400272 r := Run{
273 runes: o.runes[i],
274 direction: o.directions[i],
275 startpos: o.startpos[i],
276 }
277 return r
khenaidooac637102019-01-14 15:44:34 -0500278}
279
280// TODO: perhaps with options.
281// // Reorder creates a reader that reads the runes in visual order per character.
282// // Modifiers remain after the runes they modify.
283// func (l *Runs) Reorder() io.Reader {
284// panic("unimplemented")
285// }
286
287// A Run is a continuous sequence of characters of a single direction.
288type Run struct {
khenaidood948f772021-08-11 17:49:24 -0400289 runes []rune
290 direction Direction
291 startpos int
khenaidooac637102019-01-14 15:44:34 -0500292}
293
294// String returns the text of the run in its original order.
295func (r *Run) String() string {
khenaidood948f772021-08-11 17:49:24 -0400296 return string(r.runes)
khenaidooac637102019-01-14 15:44:34 -0500297}
298
299// Bytes returns the text of the run in its original order.
300func (r *Run) Bytes() []byte {
khenaidood948f772021-08-11 17:49:24 -0400301 return []byte(r.String())
khenaidooac637102019-01-14 15:44:34 -0500302}
303
304// TODO: methods for
305// - Display order
306// - headers and footers
307// - bracket replacement.
308
309// Direction reports the direction of the run.
310func (r *Run) Direction() Direction {
khenaidood948f772021-08-11 17:49:24 -0400311 return r.direction
khenaidooac637102019-01-14 15:44:34 -0500312}
313
khenaidood948f772021-08-11 17:49:24 -0400314// Pos returns the position of the Run within the text passed to SetBytes or SetString of the
khenaidooac637102019-01-14 15:44:34 -0500315// originating Paragraph value.
316func (r *Run) Pos() (start, end int) {
khenaidood948f772021-08-11 17:49:24 -0400317 return r.startpos, r.startpos + len(r.runes) - 1
khenaidooac637102019-01-14 15:44:34 -0500318}
319
320// AppendReverse reverses the order of characters of in, appends them to out,
321// and returns the result. Modifiers will still follow the runes they modify.
322// Brackets are replaced with their counterparts.
323func AppendReverse(out, in []byte) []byte {
khenaidood948f772021-08-11 17:49:24 -0400324 ret := make([]byte, len(in)+len(out))
325 copy(ret, out)
326 inRunes := bytes.Runes(in)
327
328 for i, r := range inRunes {
329 prop, _ := LookupRune(r)
330 if prop.IsBracket() {
331 inRunes[i] = prop.reverseBracket(r)
332 }
333 }
334
335 for i, j := 0, len(inRunes)-1; i < j; i, j = i+1, j-1 {
336 inRunes[i], inRunes[j] = inRunes[j], inRunes[i]
337 }
338 copy(ret[len(out):], string(inRunes))
339
340 return ret
khenaidooac637102019-01-14 15:44:34 -0500341}
342
343// ReverseString reverses the order of characters in s and returns a new string.
344// Modifiers will still follow the runes they modify. Brackets are replaced with
345// their counterparts.
346func ReverseString(s string) string {
khenaidood948f772021-08-11 17:49:24 -0400347 input := []rune(s)
348 li := len(input)
349 ret := make([]rune, li)
350 for i, r := range input {
351 prop, _ := LookupRune(r)
352 if prop.IsBracket() {
353 ret[li-i-1] = prop.reverseBracket(r)
354 } else {
355 ret[li-i-1] = r
356 }
357 }
358 return string(ret)
khenaidooac637102019-01-14 15:44:34 -0500359}