Blame - vendor/golang.org/x/text/unicode/bidi/core.go - voltctl

blob: 48d144008aa9c3910e2c2724ef867a51fddd5248 [file] [log] [blame]

Zack Williams	e940c7a	2019-08-21 14:25:39 -0700	[diff] [blame]	1	// Copyright 2015 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package bidi
				6
				7	import "log"
				8
				9	// This implementation is a port based on the reference implementation found at:
				10	// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
				11	//
				12	// described in Unicode Bidirectional Algorithm (UAX #9).
				13	//
				14	// Input:
				15	// There are two levels of input to the algorithm, since clients may prefer to
				16	// supply some information from out-of-band sources rather than relying on the
				17	// default behavior.
				18	//
				19	// - Bidi class array
				20	// - Bidi class array, with externally supplied base line direction
				21	//
				22	// Output:
				23	// Output is separated into several stages:
				24	//
				25	// - levels array over entire paragraph
				26	// - reordering array over entire paragraph
				27	// - levels array over line
				28	// - reordering array over line
				29	//
				30	// Note that for conformance to the Unicode Bidirectional Algorithm,
				31	// implementations are only required to generate correct reordering and
				32	// character directionality (odd or even levels) over a line. Generating
				33	// identical level arrays over a line is not required. Bidi explicit format
				34	// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
				35	// positions as long as the rest of the input is properly reordered.
				36	//
				37	// As the algorithm is defined to operate on a single paragraph at a time, this
				38	// implementation is written to handle single paragraphs. Thus rule P1 is
				39	// presumed by this implementation-- the data provided to the implementation is
				40	// assumed to be a single paragraph, and either contains no 'B' codes, or a
				41	// single 'B' code at the end of the input. 'B' is allowed as input to
				42	// illustrate how the algorithm assigns it a level.
				43	//
				44	// Also note that rules L3 and L4 depend on the rendering engine that uses the
				45	// result of the bidi algorithm. This implementation assumes that the rendering
				46	// engine expects combining marks in visual order (e.g. to the left of their
				47	// base character in RTL runs) and that it adjusts the glyphs used to render
				48	// mirrored characters that are in RTL runs so that they render appropriately.
				49
				50	// level is the embedding level of a character. Even embedding levels indicate
				51	// left-to-right order and odd levels indicate right-to-left order. The special
				52	// level of -1 is reserved for undefined order.
				53	type level int8
				54
				55	const implicitLevel level = -1
				56
				57	// in returns if x is equal to any of the values in set.
				58	func (c Class) in(set ...Class) bool {
				59	for _, s := range set {
				60	if c == s {
				61	return true
				62	}
				63	}
				64	return false
				65	}
				66
				67	// A paragraph contains the state of a paragraph.
				68	type paragraph struct {
				69	initialTypes []Class
				70
				71	// Arrays of properties needed for paired bracket evaluation in N0
				72	pairTypes []bracketType // paired Bracket types for paragraph
				73	pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
				74
				75	embeddingLevel level // default: = implicitLevel;
				76
				77	// at the paragraph levels
				78	resultTypes []Class
				79	resultLevels []level
				80
				81	// Index of matching PDI for isolate initiator characters. For other
				82	// characters, the value of matchingPDI will be set to -1. For isolate
				83	// initiators with no matching PDI, matchingPDI will be set to the length of
				84	// the input string.
				85	matchingPDI []int
				86
				87	// Index of matching isolate initiator for PDI characters. For other
				88	// characters, and for PDIs with no matching isolate initiator, the value of
				89	// matchingIsolateInitiator will be set to -1.
				90	matchingIsolateInitiator []int
				91	}
				92
				93	// newParagraph initializes a paragraph. The user needs to supply a few arrays
				94	// corresponding to the preprocessed text input. The types correspond to the
				95	// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
				96	// each rune. pairValues provides a unique bracket class identifier for each
				97	// rune (suggested is the rune of the open bracket for opening and matching
				98	// close brackets, after normalization). The embedding levels are optional, but
				99	// may be supplied to encode embedding levels of styled text.
				100	//
				101	// TODO: return an error.
				102	func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) *paragraph {
				103	validateTypes(types)
				104	validatePbTypes(pairTypes)
				105	validatePbValues(pairValues, pairTypes)
				106	validateParagraphEmbeddingLevel(levels)
				107
				108	p := &paragraph{
				109	initialTypes: append([]Class(nil), types...),
				110	embeddingLevel: levels,
				111
				112	pairTypes: pairTypes,
				113	pairValues: pairValues,
				114
				115	resultTypes: append([]Class(nil), types...),
				116	}
				117	p.run()
				118	return p
				119	}
				120
				121	func (p *paragraph) Len() int { return len(p.initialTypes) }
				122
				123	// The algorithm. Does not include line-based processing (Rules L1, L2).
				124	// These are applied later in the line-based phase of the algorithm.
				125	func (p *paragraph) run() {
				126	p.determineMatchingIsolates()
				127
				128	// 1) determining the paragraph level
				129	// Rule P1 is the requirement for entering this algorithm.
				130	// Rules P2, P3.
				131	// If no externally supplied paragraph embedding level, use default.
				132	if p.embeddingLevel == implicitLevel {
				133	p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
				134	}
				135
				136	// Initialize result levels to paragraph embedding level.
				137	p.resultLevels = make([]level, p.Len())
				138	setLevels(p.resultLevels, p.embeddingLevel)
				139
				140	// 2) Explicit levels and directions
				141	// Rules X1-X8.
				142	p.determineExplicitEmbeddingLevels()
				143
				144	// Rule X9.
				145	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
				146	// from the string explicitly. But they are not copied into isolating run
				147	// sequences when they are created, so they are removed for all
				148	// practical purposes.
				149
				150	// Rule X10.
				151	// Run remainder of algorithm one isolating run sequence at a time
				152	for _, seq := range p.determineIsolatingRunSequences() {
				153	// 3) resolving weak types
				154	// Rules W1-W7.
				155	seq.resolveWeakTypes()
				156
				157	// 4a) resolving paired brackets
				158	// Rule N0
				159	resolvePairedBrackets(seq)
				160
				161	// 4b) resolving neutral types
				162	// Rules N1-N3.
				163	seq.resolveNeutralTypes()
				164
				165	// 5) resolving implicit embedding levels
				166	// Rules I1, I2.
				167	seq.resolveImplicitLevels()
				168
				169	// Apply the computed levels and types
				170	seq.applyLevelsAndTypes()
				171	}
				172
				173	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
				174	// BNs. This is for convenience, so the resulting level array will have
				175	// a value for every character.
				176	p.assignLevelsToCharactersRemovedByX9()
				177	}
				178
				179	// determineMatchingIsolates determines the matching PDI for each isolate
				180	// initiator and vice versa.
				181	//
				182	// Definition BD9.
				183	//
				184	// At the end of this function:
				185	//
				186	// - The member variable matchingPDI is set to point to the index of the
				187	// matching PDI character for each isolate initiator character. If there is
				188	// no matching PDI, it is set to the length of the input text. For other
				189	// characters, it is set to -1.
				190	// - The member variable matchingIsolateInitiator is set to point to the
				191	// index of the matching isolate initiator character for each PDI character.
				192	// If there is no matching isolate initiator, or the character is not a PDI,
				193	// it is set to -1.
				194	func (p *paragraph) determineMatchingIsolates() {
				195	p.matchingPDI = make([]int, p.Len())
				196	p.matchingIsolateInitiator = make([]int, p.Len())
				197
				198	for i := range p.matchingIsolateInitiator {
				199	p.matchingIsolateInitiator[i] = -1
				200	}
				201
				202	for i := range p.matchingPDI {
				203	p.matchingPDI[i] = -1
				204
				205	if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
				206	depthCounter := 1
				207	for j := i + 1; j < p.Len(); j++ {
				208	if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
				209	depthCounter++
				210	} else if u == PDI {
				211	if depthCounter--; depthCounter == 0 {
				212	p.matchingPDI[i] = j
				213	p.matchingIsolateInitiator[j] = i
				214	break
				215	}
				216	}
				217	}
				218	if p.matchingPDI[i] == -1 {
				219	p.matchingPDI[i] = p.Len()
				220	}
				221	}
				222	}
				223	}
				224
				225	// determineParagraphEmbeddingLevel reports the resolved paragraph direction of
				226	// the substring limited by the given range [start, end).
				227	//
				228	// Determines the paragraph level based on rules P2, P3. This is also used
				229	// in rule X5c to find if an FSI should resolve to LRI or RLI.
				230	func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
				231	var strongType Class = unknownClass
				232
				233	// Rule P2.
				234	for i := start; i < end; i++ {
				235	if t := p.resultTypes[i]; t.in(L, AL, R) {
				236	strongType = t
				237	break
				238	} else if t.in(FSI, LRI, RLI) {
				239	i = p.matchingPDI[i] // skip over to the matching PDI
				240	if i > end {
				241	log.Panic("assert (i <= end)")
				242	}
				243	}
				244	}
				245	// Rule P3.
				246	switch strongType {
				247	case unknownClass: // none found
				248	// default embedding level when no strong types found is 0.
				249	return 0
				250	case L:
				251	return 0
				252	default: // AL, R
				253	return 1
				254	}
				255	}
				256
				257	const maxDepth = 125
				258
				259	// This stack will store the embedding levels and override and isolated
				260	// statuses
				261	type directionalStatusStack struct {
				262	stackCounter int
				263	embeddingLevelStack [maxDepth + 1]level
				264	overrideStatusStack [maxDepth + 1]Class
				265	isolateStatusStack [maxDepth + 1]bool
				266	}
				267
				268	func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
				269	func (s *directionalStatusStack) pop() { s.stackCounter-- }
				270	func (s *directionalStatusStack) depth() int { return s.stackCounter }
				271
				272	func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
				273	s.embeddingLevelStack[s.stackCounter] = level
				274	s.overrideStatusStack[s.stackCounter] = overrideStatus
				275	s.isolateStatusStack[s.stackCounter] = isolateStatus
				276	s.stackCounter++
				277	}
				278
				279	func (s *directionalStatusStack) lastEmbeddingLevel() level {
				280	return s.embeddingLevelStack[s.stackCounter-1]
				281	}
				282
				283	func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
				284	return s.overrideStatusStack[s.stackCounter-1]
				285	}
				286
				287	func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
				288	return s.isolateStatusStack[s.stackCounter-1]
				289	}
				290
				291	// Determine explicit levels using rules X1 - X8
				292	func (p *paragraph) determineExplicitEmbeddingLevels() {
				293	var stack directionalStatusStack
				294	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
				295
				296	// Rule X1.
				297	stack.push(p.embeddingLevel, ON, false)
				298
				299	for i, t := range p.resultTypes {
				300	// Rules X2, X3, X4, X5, X5a, X5b, X5c
				301	switch t {
				302	case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
				303	isIsolate := t.in(RLI, LRI, FSI)
				304	isRTL := t.in(RLE, RLO, RLI)
				305
				306	// override if this is an FSI that resolves to RLI
				307	if t == FSI {
				308	isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
				309	}
				310	if isIsolate {
				311	p.resultLevels[i] = stack.lastEmbeddingLevel()
				312	if stack.lastDirectionalOverrideStatus() != ON {
				313	p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
				314	}
				315	}
				316
				317	var newLevel level
				318	if isRTL {
				319	// least greater odd
				320	newLevel = (stack.lastEmbeddingLevel() + 1) \| 1
				321	} else {
				322	// least greater even
				323	newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
				324	}
				325
				326	if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
				327	if isIsolate {
				328	validIsolateCount++
				329	}
				330	// Push new embedding level, override status, and isolated
				331	// status.
				332	// No check for valid stack counter, since the level check
				333	// suffices.
				334	switch t {
				335	case LRO:
				336	stack.push(newLevel, L, isIsolate)
				337	case RLO:
				338	stack.push(newLevel, R, isIsolate)
				339	default:
				340	stack.push(newLevel, ON, isIsolate)
				341	}
				342	// Not really part of the spec
				343	if !isIsolate {
				344	p.resultLevels[i] = newLevel
				345	}
				346	} else {
				347	// This is an invalid explicit formatting character,
				348	// so apply the "Otherwise" part of rules X2-X5b.
				349	if isIsolate {
				350	overflowIsolateCount++
				351	} else { // !isIsolate
				352	if overflowIsolateCount == 0 {
				353	overflowEmbeddingCount++
				354	}
				355	}
				356	}
				357
				358	// Rule X6a
				359	case PDI:
				360	if overflowIsolateCount > 0 {
				361	overflowIsolateCount--
				362	} else if validIsolateCount == 0 {
				363	// do nothing
				364	} else {
				365	overflowEmbeddingCount = 0
				366	for !stack.lastDirectionalIsolateStatus() {
				367	stack.pop()
				368	}
				369	stack.pop()
				370	validIsolateCount--
				371	}
				372	p.resultLevels[i] = stack.lastEmbeddingLevel()
				373
				374	// Rule X7
				375	case PDF:
				376	// Not really part of the spec
				377	p.resultLevels[i] = stack.lastEmbeddingLevel()
				378
				379	if overflowIsolateCount > 0 {
				380	// do nothing
				381	} else if overflowEmbeddingCount > 0 {
				382	overflowEmbeddingCount--
				383	} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
				384	stack.pop()
				385	}
				386
				387	case B: // paragraph separator.
				388	// Rule X8.
				389
				390	// These values are reset for clarity, in this implementation B
				391	// can only occur as the last code in the array.
				392	stack.empty()
				393	overflowIsolateCount = 0
				394	overflowEmbeddingCount = 0
				395	validIsolateCount = 0
				396	p.resultLevels[i] = p.embeddingLevel
				397
				398	default:
				399	p.resultLevels[i] = stack.lastEmbeddingLevel()
				400	if stack.lastDirectionalOverrideStatus() != ON {
				401	p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
				402	}
				403	}
				404	}
				405	}
				406
				407	type isolatingRunSequence struct {
				408	p *paragraph
				409
				410	indexes []int // indexes to the original string
				411
				412	types []Class // type of each character using the index
				413	resolvedLevels []level // resolved levels after application of rules
				414	level level
				415	sos, eos Class
				416	}
				417
				418	func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
				419
				420	func maxLevel(a, b level) level {
				421	if a > b {
				422	return a
				423	}
				424	return b
				425	}
				426
				427	// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
				428	// either L or R, for each isolating run sequence.
				429	func (p paragraph) isolatingRunSequence(indexes []int) isolatingRunSequence {
				430	length := len(indexes)
				431	types := make([]Class, length)
				432	for i, x := range indexes {
				433	types[i] = p.resultTypes[x]
				434	}
				435
				436	// assign level, sos and eos
				437	prevChar := indexes[0] - 1
				438	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
				439	prevChar--
				440	}
				441	prevLevel := p.embeddingLevel
				442	if prevChar >= 0 {
				443	prevLevel = p.resultLevels[prevChar]
				444	}
				445
				446	var succLevel level
				447	lastType := types[length-1]
				448	if lastType.in(LRI, RLI, FSI) {
				449	succLevel = p.embeddingLevel
				450	} else {
				451	// the first character after the end of run sequence
				452	limit := indexes[length-1] + 1
				453	for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
				454
				455	}
				456	succLevel = p.embeddingLevel
				457	if limit < p.Len() {
				458	succLevel = p.resultLevels[limit]
				459	}
				460	}
				461	level := p.resultLevels[indexes[0]]
				462	return &isolatingRunSequence{
				463	p: p,
				464	indexes: indexes,
				465	types: types,
				466	level: level,
				467	sos: typeForLevel(maxLevel(prevLevel, level)),
				468	eos: typeForLevel(maxLevel(succLevel, level)),
				469	}
				470	}
				471
				472	// Resolving weak types Rules W1-W7.
				473	//
				474	// Note that some weak types (EN, AN) remain after this processing is
				475	// complete.
				476	func (s *isolatingRunSequence) resolveWeakTypes() {
				477
				478	// on entry, only these types remain
				479	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
				480
				481	// Rule W1.
				482	// Changes all NSMs.
				483	preceedingCharacterType := s.sos
				484	for i, t := range s.types {
				485	if t == NSM {
				486	s.types[i] = preceedingCharacterType
				487	} else {
				488	if t.in(LRI, RLI, FSI, PDI) {
				489	preceedingCharacterType = ON
				490	}
				491	preceedingCharacterType = t
				492	}
				493	}
				494
				495	// Rule W2.
				496	// EN does not change at the start of the run, because sos != AL.
				497	for i, t := range s.types {
				498	if t == EN {
				499	for j := i - 1; j >= 0; j-- {
				500	if t := s.types[j]; t.in(L, R, AL) {
				501	if t == AL {
				502	s.types[i] = AN
				503	}
				504	break
				505	}
				506	}
				507	}
				508	}
				509
				510	// Rule W3.
				511	for i, t := range s.types {
				512	if t == AL {
				513	s.types[i] = R
				514	}
				515	}
				516
				517	// Rule W4.
				518	// Since there must be values on both sides for this rule to have an
				519	// effect, the scan skips the first and last value.
				520	//
				521	// Although the scan proceeds left to right, and changes the type
				522	// values in a way that would appear to affect the computations
				523	// later in the scan, there is actually no problem. A change in the
				524	// current value can only affect the value to its immediate right,
				525	// and only affect it if it is ES or CS. But the current value can
				526	// only change if the value to its right is not ES or CS. Thus
				527	// either the current value will not change, or its change will have
				528	// no effect on the remainder of the analysis.
				529
				530	for i := 1; i < s.Len()-1; i++ {
				531	t := s.types[i]
				532	if t == ES \|\| t == CS {
				533	prevSepType := s.types[i-1]
				534	succSepType := s.types[i+1]
				535	if prevSepType == EN && succSepType == EN {
				536	s.types[i] = EN
				537	} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
				538	s.types[i] = AN
				539	}
				540	}
				541	}
				542
				543	// Rule W5.
				544	for i, t := range s.types {
				545	if t == ET {
				546	// locate end of sequence
				547	runStart := i
				548	runEnd := s.findRunLimit(runStart, ET)
				549
				550	// check values at ends of sequence
				551	t := s.sos
				552	if runStart > 0 {
				553	t = s.types[runStart-1]
				554	}
				555	if t != EN {
				556	t = s.eos
				557	if runEnd < len(s.types) {
				558	t = s.types[runEnd]
				559	}
				560	}
				561	if t == EN {
				562	setTypes(s.types[runStart:runEnd], EN)
				563	}
				564	// continue at end of sequence
				565	i = runEnd
				566	}
				567	}
				568
				569	// Rule W6.
				570	for i, t := range s.types {
				571	if t.in(ES, ET, CS) {
				572	s.types[i] = ON
				573	}
				574	}
				575
				576	// Rule W7.
				577	for i, t := range s.types {
				578	if t == EN {
				579	// set default if we reach start of run
				580	prevStrongType := s.sos
				581	for j := i - 1; j >= 0; j-- {
				582	t = s.types[j]
				583	if t == L \|\| t == R { // AL's have been changed to R
				584	prevStrongType = t
				585	break
				586	}
				587	}
				588	if prevStrongType == L {
				589	s.types[i] = L
				590	}
				591	}
				592	}
				593	}
				594
				595	// 6) resolving neutral types Rules N1-N2.
				596	func (s *isolatingRunSequence) resolveNeutralTypes() {
				597
				598	// on entry, only these types can be in resultTypes
				599	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
				600
				601	for i, t := range s.types {
				602	switch t {
				603	case WS, ON, B, S, RLI, LRI, FSI, PDI:
				604	// find bounds of run of neutrals
				605	runStart := i
				606	runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
				607
				608	// determine effective types at ends of run
				609	var leadType, trailType Class
				610
				611	// Note that the character found can only be L, R, AN, or
				612	// EN.
				613	if runStart == 0 {
				614	leadType = s.sos
				615	} else {
				616	leadType = s.types[runStart-1]
				617	if leadType.in(AN, EN) {
				618	leadType = R
				619	}
				620	}
				621	if runEnd == len(s.types) {
				622	trailType = s.eos
				623	} else {
				624	trailType = s.types[runEnd]
				625	if trailType.in(AN, EN) {
				626	trailType = R
				627	}
				628	}
				629
				630	var resolvedType Class
				631	if leadType == trailType {
				632	// Rule N1.
				633	resolvedType = leadType
				634	} else {
				635	// Rule N2.
				636	// Notice the embedding level of the run is used, not
				637	// the paragraph embedding level.
				638	resolvedType = typeForLevel(s.level)
				639	}
				640
				641	setTypes(s.types[runStart:runEnd], resolvedType)
				642
				643	// skip over run of (former) neutrals
				644	i = runEnd
				645	}
				646	}
				647	}
				648
				649	func setLevels(levels []level, newLevel level) {
				650	for i := range levels {
				651	levels[i] = newLevel
				652	}
				653	}
				654
				655	func setTypes(types []Class, newType Class) {
				656	for i := range types {
				657	types[i] = newType
				658	}
				659	}
				660
				661	// 7) resolving implicit embedding levels Rules I1, I2.
				662	func (s *isolatingRunSequence) resolveImplicitLevels() {
				663
				664	// on entry, only these types can be in resultTypes
				665	s.assertOnly(L, R, EN, AN)
				666
				667	s.resolvedLevels = make([]level, len(s.types))
				668	setLevels(s.resolvedLevels, s.level)
				669
				670	if (s.level & 1) == 0 { // even level
				671	for i, t := range s.types {
				672	// Rule I1.
				673	if t == L {
				674	// no change
				675	} else if t == R {
				676	s.resolvedLevels[i] += 1
				677	} else { // t == AN \|\| t == EN
				678	s.resolvedLevels[i] += 2
				679	}
				680	}
				681	} else { // odd level
				682	for i, t := range s.types {
				683	// Rule I2.
				684	if t == R {
				685	// no change
				686	} else { // t == L \|\| t == AN \|\| t == EN
				687	s.resolvedLevels[i] += 1
				688	}
				689	}
				690	}
				691	}
				692
				693	// Applies the levels and types resolved in rules W1-I2 to the
				694	// resultLevels array.
				695	func (s *isolatingRunSequence) applyLevelsAndTypes() {
				696	for i, x := range s.indexes {
				697	s.p.resultTypes[x] = s.types[i]
				698	s.p.resultLevels[x] = s.resolvedLevels[i]
				699	}
				700	}
				701
				702	// Return the limit of the run consisting only of the types in validSet
				703	// starting at index. This checks the value at index, and will return
				704	// index if that value is not in validSet.
				705	func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
				706	loop:
				707	for ; index < len(s.types); index++ {
				708	t := s.types[index]
				709	for _, valid := range validSet {
				710	if t == valid {
				711	continue loop
				712	}
				713	}
				714	return index // didn't find a match in validSet
				715	}
				716	return len(s.types)
				717	}
				718
				719	// Algorithm validation. Assert that all values in types are in the
				720	// provided set.
				721	func (s *isolatingRunSequence) assertOnly(codes ...Class) {
				722	loop:
				723	for i, t := range s.types {
				724	for _, c := range codes {
				725	if t == c {
				726	continue loop
				727	}
				728	}
				729	log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
				730	}
				731	}
				732
				733	// determineLevelRuns returns an array of level runs. Each level run is
				734	// described as an array of indexes into the input string.
				735	//
				736	// Determines the level runs. Rule X9 will be applied in determining the
				737	// runs, in the way that makes sure the characters that are supposed to be
				738	// removed are not included in the runs.
				739	func (p *paragraph) determineLevelRuns() [][]int {
				740	run := []int{}
				741	allRuns := [][]int{}
				742	currentLevel := implicitLevel
				743
				744	for i := range p.initialTypes {
				745	if !isRemovedByX9(p.initialTypes[i]) {
				746	if p.resultLevels[i] != currentLevel {
				747	// we just encountered a new run; wrap up last run
				748	if currentLevel >= 0 { // only wrap it up if there was a run
				749	allRuns = append(allRuns, run)
				750	run = nil
				751	}
				752	// Start new run
				753	currentLevel = p.resultLevels[i]
				754	}
				755	run = append(run, i)
				756	}
				757	}
				758	// Wrap up the final run, if any
				759	if len(run) > 0 {
				760	allRuns = append(allRuns, run)
				761	}
				762	return allRuns
				763	}
				764
				765	// Definition BD13. Determine isolating run sequences.
				766	func (p paragraph) determineIsolatingRunSequences() []isolatingRunSequence {
				767	levelRuns := p.determineLevelRuns()
				768
				769	// Compute the run that each character belongs to
				770	runForCharacter := make([]int, p.Len())
				771	for i, run := range levelRuns {
				772	for _, index := range run {
				773	runForCharacter[index] = i
				774	}
				775	}
				776
				777	sequences := []*isolatingRunSequence{}
				778
				779	var currentRunSequence []int
				780
				781	for _, run := range levelRuns {
				782	first := run[0]
				783	if p.initialTypes[first] != PDI \|\| p.matchingIsolateInitiator[first] == -1 {
				784	currentRunSequence = nil
				785	// int run = i;
				786	for {
				787	// Copy this level run into currentRunSequence
				788	currentRunSequence = append(currentRunSequence, run...)
				789
				790	last := currentRunSequence[len(currentRunSequence)-1]
				791	lastT := p.initialTypes[last]
				792	if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
				793	run = levelRuns[runForCharacter[p.matchingPDI[last]]]
				794	} else {
				795	break
				796	}
				797	}
				798	sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
				799	}
				800	}
				801	return sequences
				802	}
				803
				804	// Assign level information to characters removed by rule X9. This is for
				805	// ease of relating the level information to the original input data. Note
				806	// that the levels assigned to these codes are arbitrary, they're chosen so
				807	// as to avoid breaking level runs.
				808	func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
				809	for i, t := range p.initialTypes {
				810	if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
				811	p.resultTypes[i] = t
				812	p.resultLevels[i] = -1
				813	}
				814	}
				815	// now propagate forward the levels information (could have
				816	// propagated backward, the main thing is not to introduce a level
				817	// break where one doesn't already exist).
				818
				819	if p.resultLevels[0] == -1 {
				820	p.resultLevels[0] = p.embeddingLevel
				821	}
				822	for i := 1; i < len(p.initialTypes); i++ {
				823	if p.resultLevels[i] == -1 {
				824	p.resultLevels[i] = p.resultLevels[i-1]
				825	}
				826	}
				827	// Embedding information is for informational purposes only so need not be
				828	// adjusted.
				829	}
				830
				831	//
				832	// Output
				833	//
				834
				835	// getLevels computes levels array breaking lines at offsets in linebreaks.
				836	// Rule L1.
				837	//
				838	// The linebreaks array must include at least one value. The values must be
				839	// in strictly increasing order (no duplicates) between 1 and the length of
				840	// the text, inclusive. The last value must be the length of the text.
				841	func (p *paragraph) getLevels(linebreaks []int) []level {
				842	// Note that since the previous processing has removed all
				843	// P, S, and WS values from resultTypes, the values referred to
				844	// in these rules are the initial types, before any processing
				845	// has been applied (including processing of overrides).
				846	//
				847	// This example implementation has reinserted explicit format codes
				848	// and BN, in order that the levels array correspond to the
				849	// initial text. Their final placement is not normative.
				850	// These codes are treated like WS in this implementation,
				851	// so they don't interrupt sequences of WS.
				852
				853	validateLineBreaks(linebreaks, p.Len())
				854
				855	result := append([]level(nil), p.resultLevels...)
				856
				857	// don't worry about linebreaks since if there is a break within
				858	// a series of WS values preceding S, the linebreak itself
				859	// causes the reset.
				860	for i, t := range p.initialTypes {
				861	if t.in(B, S) {
				862	// Rule L1, clauses one and two.
				863	result[i] = p.embeddingLevel
				864
				865	// Rule L1, clause three.
				866	for j := i - 1; j >= 0; j-- {
				867	if isWhitespace(p.initialTypes[j]) { // including format codes
				868	result[j] = p.embeddingLevel
				869	} else {
				870	break
				871	}
				872	}
				873	}
				874	}
				875
				876	// Rule L1, clause four.
				877	start := 0
				878	for _, limit := range linebreaks {
				879	for j := limit - 1; j >= start; j-- {
				880	if isWhitespace(p.initialTypes[j]) { // including format codes
				881	result[j] = p.embeddingLevel
				882	} else {
				883	break
				884	}
				885	}
				886	start = limit
				887	}
				888
				889	return result
				890	}
				891
				892	// getReordering returns the reordering of lines from a visual index to a
				893	// logical index for line breaks at the given offsets.
				894	//
				895	// Lines are concatenated from left to right. So for example, the fifth
				896	// character from the left on the third line is
				897	//
				898	// getReordering(linebreaks)[linebreaks[1] + 4]
				899	//
				900	// (linebreaks[1] is the position after the last character of the second
				901	// line, which is also the index of the first character on the third line,
				902	// and adding four gets the fifth character from the left).
				903	//
				904	// The linebreaks array must include at least one value. The values must be
				905	// in strictly increasing order (no duplicates) between 1 and the length of
				906	// the text, inclusive. The last value must be the length of the text.
				907	func (p *paragraph) getReordering(linebreaks []int) []int {
				908	validateLineBreaks(linebreaks, p.Len())
				909
				910	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
				911	}
				912
				913	// Return multiline reordering array for a given level array. Reordering
				914	// does not occur across a line break.
				915	func computeMultilineReordering(levels []level, linebreaks []int) []int {
				916	result := make([]int, len(levels))
				917
				918	start := 0
				919	for _, limit := range linebreaks {
				920	tempLevels := make([]level, limit-start)
				921	copy(tempLevels, levels[start:])
				922
				923	for j, order := range computeReordering(tempLevels) {
				924	result[start+j] = order + start
				925	}
				926	start = limit
				927	}
				928	return result
				929	}
				930
				931	// Return reordering array for a given level array. This reorders a single
				932	// line. The reordering is a visual to logical map. For example, the
				933	// leftmost char is string.charAt(order[0]). Rule L2.
				934	func computeReordering(levels []level) []int {
				935	result := make([]int, len(levels))
				936	// initialize order
				937	for i := range result {
				938	result[i] = i
				939	}
				940
				941	// locate highest level found on line.
				942	// Note the rules say text, but no reordering across line bounds is
				943	// performed, so this is sufficient.
				944	highestLevel := level(0)
				945	lowestOddLevel := level(maxDepth + 2)
				946	for _, level := range levels {
				947	if level > highestLevel {
				948	highestLevel = level
				949	}
				950	if level&1 != 0 && level < lowestOddLevel {
				951	lowestOddLevel = level
				952	}
				953	}
				954
				955	for level := highestLevel; level >= lowestOddLevel; level-- {
				956	for i := 0; i < len(levels); i++ {
				957	if levels[i] >= level {
				958	// find range of text at or above this level
				959	start := i
				960	limit := i + 1
				961	for limit < len(levels) && levels[limit] >= level {
				962	limit++
				963	}
				964
				965	for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
				966	result[j], result[k] = result[k], result[j]
				967	}
				968	// skip to end of level run
				969	i = limit
				970	}
				971	}
				972	}
				973
				974	return result
				975	}
				976
				977	// isWhitespace reports whether the type is considered a whitespace type for the
				978	// line break rules.
				979	func isWhitespace(c Class) bool {
				980	switch c {
				981	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
				982	return true
				983	}
				984	return false
				985	}
				986
				987	// isRemovedByX9 reports whether the type is one of the types removed in X9.
				988	func isRemovedByX9(c Class) bool {
				989	switch c {
				990	case LRE, RLE, LRO, RLO, PDF, BN:
				991	return true
				992	}
				993	return false
				994	}
				995
				996	// typeForLevel reports the strong type (L or R) corresponding to the level.
				997	func typeForLevel(level level) Class {
				998	if (level & 0x1) == 0 {
				999	return L
				1000	}
				1001	return R
				1002	}
				1003
				1004	// TODO: change validation to not panic
				1005
				1006	func validateTypes(types []Class) {
				1007	if len(types) == 0 {
				1008	log.Panic("types is null")
				1009	}
				1010	for i, t := range types[:len(types)-1] {
				1011	if t == B {
				1012	log.Panicf("B type before end of paragraph at index: %d", i)
				1013	}
				1014	}
				1015	}
				1016
				1017	func validateParagraphEmbeddingLevel(embeddingLevel level) {
				1018	if embeddingLevel != implicitLevel &&
				1019	embeddingLevel != 0 &&
				1020	embeddingLevel != 1 {
				1021	log.Panicf("illegal paragraph embedding level: %d", embeddingLevel)
				1022	}
				1023	}
				1024
				1025	func validateLineBreaks(linebreaks []int, textLength int) {
				1026	prev := 0
				1027	for i, next := range linebreaks {
				1028	if next <= prev {
				1029	log.Panicf("bad linebreak: %d at index: %d", next, i)
				1030	}
				1031	prev = next
				1032	}
				1033	if prev != textLength {
				1034	log.Panicf("last linebreak was %d, want %d", prev, textLength)
				1035	}
				1036	}
				1037
				1038	func validatePbTypes(pairTypes []bracketType) {
				1039	if len(pairTypes) == 0 {
				1040	log.Panic("pairTypes is null")
				1041	}
				1042	for i, pt := range pairTypes {
				1043	switch pt {
				1044	case bpNone, bpOpen, bpClose:
				1045	default:
				1046	log.Panicf("illegal pairType value at %d: %v", i, pairTypes[i])
				1047	}
				1048	}
				1049	}
				1050
				1051	func validatePbValues(pairValues []rune, pairTypes []bracketType) {
				1052	if pairValues == nil {
				1053	log.Panic("pairValues is null")
				1054	}
				1055	if len(pairTypes) != len(pairValues) {
				1056	log.Panic("pairTypes is different length from pairValues")
				1057	}
				1058	}