Blame - vendor/golang.org/x/text/unicode/bidi/core.go - voltha-protos

blob: 9d2ae547b5ed4d91ba20783bd00eec10fc2d0247 [file] [log] [blame]

khenaidoo	5fc5cea	2021-08-11 17:39:16 -0400	[diff] [blame]	1	// Copyright 2015 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package bidi
				6
				7	import (
				8	"fmt"
				9	"log"
				10	)
				11
				12	// This implementation is a port based on the reference implementation found at:
				13	// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/
				14	//
				15	// described in Unicode Bidirectional Algorithm (UAX #9).
				16	//
				17	// Input:
				18	// There are two levels of input to the algorithm, since clients may prefer to
				19	// supply some information from out-of-band sources rather than relying on the
				20	// default behavior.
				21	//
				22	// - Bidi class array
				23	// - Bidi class array, with externally supplied base line direction
				24	//
				25	// Output:
				26	// Output is separated into several stages:
				27	//
				28	// - levels array over entire paragraph
				29	// - reordering array over entire paragraph
				30	// - levels array over line
				31	// - reordering array over line
				32	//
				33	// Note that for conformance to the Unicode Bidirectional Algorithm,
				34	// implementations are only required to generate correct reordering and
				35	// character directionality (odd or even levels) over a line. Generating
				36	// identical level arrays over a line is not required. Bidi explicit format
				37	// codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned arbitrary levels and
				38	// positions as long as the rest of the input is properly reordered.
				39	//
				40	// As the algorithm is defined to operate on a single paragraph at a time, this
				41	// implementation is written to handle single paragraphs. Thus rule P1 is
				42	// presumed by this implementation-- the data provided to the implementation is
				43	// assumed to be a single paragraph, and either contains no 'B' codes, or a
				44	// single 'B' code at the end of the input. 'B' is allowed as input to
				45	// illustrate how the algorithm assigns it a level.
				46	//
				47	// Also note that rules L3 and L4 depend on the rendering engine that uses the
				48	// result of the bidi algorithm. This implementation assumes that the rendering
				49	// engine expects combining marks in visual order (e.g. to the left of their
				50	// base character in RTL runs) and that it adjusts the glyphs used to render
				51	// mirrored characters that are in RTL runs so that they render appropriately.
				52
				53	// level is the embedding level of a character. Even embedding levels indicate
				54	// left-to-right order and odd levels indicate right-to-left order. The special
				55	// level of -1 is reserved for undefined order.
				56	type level int8
				57
				58	const implicitLevel level = -1
				59
				60	// in returns if x is equal to any of the values in set.
				61	func (c Class) in(set ...Class) bool {
				62	for _, s := range set {
				63	if c == s {
				64	return true
				65	}
				66	}
				67	return false
				68	}
				69
				70	// A paragraph contains the state of a paragraph.
				71	type paragraph struct {
				72	initialTypes []Class
				73
				74	// Arrays of properties needed for paired bracket evaluation in N0
				75	pairTypes []bracketType // paired Bracket types for paragraph
				76	pairValues []rune // rune for opening bracket or pbOpen and pbClose; 0 for pbNone
				77
				78	embeddingLevel level // default: = implicitLevel;
				79
				80	// at the paragraph levels
				81	resultTypes []Class
				82	resultLevels []level
				83
				84	// Index of matching PDI for isolate initiator characters. For other
				85	// characters, the value of matchingPDI will be set to -1. For isolate
				86	// initiators with no matching PDI, matchingPDI will be set to the length of
				87	// the input string.
				88	matchingPDI []int
				89
				90	// Index of matching isolate initiator for PDI characters. For other
				91	// characters, and for PDIs with no matching isolate initiator, the value of
				92	// matchingIsolateInitiator will be set to -1.
				93	matchingIsolateInitiator []int
				94	}
				95
				96	// newParagraph initializes a paragraph. The user needs to supply a few arrays
				97	// corresponding to the preprocessed text input. The types correspond to the
				98	// Unicode BiDi classes for each rune. pairTypes indicates the bracket type for
				99	// each rune. pairValues provides a unique bracket class identifier for each
				100	// rune (suggested is the rune of the open bracket for opening and matching
				101	// close brackets, after normalization). The embedding levels are optional, but
				102	// may be supplied to encode embedding levels of styled text.
				103	func newParagraph(types []Class, pairTypes []bracketType, pairValues []rune, levels level) (*paragraph, error) {
				104	var err error
				105	if err = validateTypes(types); err != nil {
				106	return nil, err
				107	}
				108	if err = validatePbTypes(pairTypes); err != nil {
				109	return nil, err
				110	}
				111	if err = validatePbValues(pairValues, pairTypes); err != nil {
				112	return nil, err
				113	}
				114	if err = validateParagraphEmbeddingLevel(levels); err != nil {
				115	return nil, err
				116	}
				117
				118	p := &paragraph{
				119	initialTypes: append([]Class(nil), types...),
				120	embeddingLevel: levels,
				121
				122	pairTypes: pairTypes,
				123	pairValues: pairValues,
				124
				125	resultTypes: append([]Class(nil), types...),
				126	}
				127	p.run()
				128	return p, nil
				129	}
				130
				131	func (p *paragraph) Len() int { return len(p.initialTypes) }
				132
				133	// The algorithm. Does not include line-based processing (Rules L1, L2).
				134	// These are applied later in the line-based phase of the algorithm.
				135	func (p *paragraph) run() {
				136	p.determineMatchingIsolates()
				137
				138	// 1) determining the paragraph level
				139	// Rule P1 is the requirement for entering this algorithm.
				140	// Rules P2, P3.
				141	// If no externally supplied paragraph embedding level, use default.
				142	if p.embeddingLevel == implicitLevel {
				143	p.embeddingLevel = p.determineParagraphEmbeddingLevel(0, p.Len())
				144	}
				145
				146	// Initialize result levels to paragraph embedding level.
				147	p.resultLevels = make([]level, p.Len())
				148	setLevels(p.resultLevels, p.embeddingLevel)
				149
				150	// 2) Explicit levels and directions
				151	// Rules X1-X8.
				152	p.determineExplicitEmbeddingLevels()
				153
				154	// Rule X9.
				155	// We do not remove the embeddings, the overrides, the PDFs, and the BNs
				156	// from the string explicitly. But they are not copied into isolating run
				157	// sequences when they are created, so they are removed for all
				158	// practical purposes.
				159
				160	// Rule X10.
				161	// Run remainder of algorithm one isolating run sequence at a time
				162	for _, seq := range p.determineIsolatingRunSequences() {
				163	// 3) resolving weak types
				164	// Rules W1-W7.
				165	seq.resolveWeakTypes()
				166
				167	// 4a) resolving paired brackets
				168	// Rule N0
				169	resolvePairedBrackets(seq)
				170
				171	// 4b) resolving neutral types
				172	// Rules N1-N3.
				173	seq.resolveNeutralTypes()
				174
				175	// 5) resolving implicit embedding levels
				176	// Rules I1, I2.
				177	seq.resolveImplicitLevels()
				178
				179	// Apply the computed levels and types
				180	seq.applyLevelsAndTypes()
				181	}
				182
				183	// Assign appropriate levels to 'hide' LREs, RLEs, LROs, RLOs, PDFs, and
				184	// BNs. This is for convenience, so the resulting level array will have
				185	// a value for every character.
				186	p.assignLevelsToCharactersRemovedByX9()
				187	}
				188
				189	// determineMatchingIsolates determines the matching PDI for each isolate
				190	// initiator and vice versa.
				191	//
				192	// Definition BD9.
				193	//
				194	// At the end of this function:
				195	//
Joey Armstrong	ba3d9d1	2024-01-15 14:22:11 -0500	[diff] [blame]	196	// - The member variable matchingPDI is set to point to the index of the
				197	// matching PDI character for each isolate initiator character. If there is
				198	// no matching PDI, it is set to the length of the input text. For other
				199	// characters, it is set to -1.
				200	// - The member variable matchingIsolateInitiator is set to point to the
				201	// index of the matching isolate initiator character for each PDI character.
				202	// If there is no matching isolate initiator, or the character is not a PDI,
				203	// it is set to -1.
khenaidoo	5fc5cea	2021-08-11 17:39:16 -0400	[diff] [blame]	204	func (p *paragraph) determineMatchingIsolates() {
				205	p.matchingPDI = make([]int, p.Len())
				206	p.matchingIsolateInitiator = make([]int, p.Len())
				207
				208	for i := range p.matchingIsolateInitiator {
				209	p.matchingIsolateInitiator[i] = -1
				210	}
				211
				212	for i := range p.matchingPDI {
				213	p.matchingPDI[i] = -1
				214
				215	if t := p.resultTypes[i]; t.in(LRI, RLI, FSI) {
				216	depthCounter := 1
				217	for j := i + 1; j < p.Len(); j++ {
				218	if u := p.resultTypes[j]; u.in(LRI, RLI, FSI) {
				219	depthCounter++
				220	} else if u == PDI {
				221	if depthCounter--; depthCounter == 0 {
				222	p.matchingPDI[i] = j
				223	p.matchingIsolateInitiator[j] = i
				224	break
				225	}
				226	}
				227	}
				228	if p.matchingPDI[i] == -1 {
				229	p.matchingPDI[i] = p.Len()
				230	}
				231	}
				232	}
				233	}
				234
				235	// determineParagraphEmbeddingLevel reports the resolved paragraph direction of
				236	// the substring limited by the given range [start, end).
				237	//
				238	// Determines the paragraph level based on rules P2, P3. This is also used
				239	// in rule X5c to find if an FSI should resolve to LRI or RLI.
				240	func (p *paragraph) determineParagraphEmbeddingLevel(start, end int) level {
				241	var strongType Class = unknownClass
				242
				243	// Rule P2.
				244	for i := start; i < end; i++ {
				245	if t := p.resultTypes[i]; t.in(L, AL, R) {
				246	strongType = t
				247	break
				248	} else if t.in(FSI, LRI, RLI) {
				249	i = p.matchingPDI[i] // skip over to the matching PDI
				250	if i > end {
				251	log.Panic("assert (i <= end)")
				252	}
				253	}
				254	}
				255	// Rule P3.
				256	switch strongType {
				257	case unknownClass: // none found
				258	// default embedding level when no strong types found is 0.
				259	return 0
				260	case L:
				261	return 0
				262	default: // AL, R
				263	return 1
				264	}
				265	}
				266
				267	const maxDepth = 125
				268
				269	// This stack will store the embedding levels and override and isolated
				270	// statuses
				271	type directionalStatusStack struct {
				272	stackCounter int
				273	embeddingLevelStack [maxDepth + 1]level
				274	overrideStatusStack [maxDepth + 1]Class
				275	isolateStatusStack [maxDepth + 1]bool
				276	}
				277
				278	func (s *directionalStatusStack) empty() { s.stackCounter = 0 }
				279	func (s *directionalStatusStack) pop() { s.stackCounter-- }
				280	func (s *directionalStatusStack) depth() int { return s.stackCounter }
				281
				282	func (s *directionalStatusStack) push(level level, overrideStatus Class, isolateStatus bool) {
				283	s.embeddingLevelStack[s.stackCounter] = level
				284	s.overrideStatusStack[s.stackCounter] = overrideStatus
				285	s.isolateStatusStack[s.stackCounter] = isolateStatus
				286	s.stackCounter++
				287	}
				288
				289	func (s *directionalStatusStack) lastEmbeddingLevel() level {
				290	return s.embeddingLevelStack[s.stackCounter-1]
				291	}
				292
				293	func (s *directionalStatusStack) lastDirectionalOverrideStatus() Class {
				294	return s.overrideStatusStack[s.stackCounter-1]
				295	}
				296
				297	func (s *directionalStatusStack) lastDirectionalIsolateStatus() bool {
				298	return s.isolateStatusStack[s.stackCounter-1]
				299	}
				300
				301	// Determine explicit levels using rules X1 - X8
				302	func (p *paragraph) determineExplicitEmbeddingLevels() {
				303	var stack directionalStatusStack
				304	var overflowIsolateCount, overflowEmbeddingCount, validIsolateCount int
				305
				306	// Rule X1.
				307	stack.push(p.embeddingLevel, ON, false)
				308
				309	for i, t := range p.resultTypes {
				310	// Rules X2, X3, X4, X5, X5a, X5b, X5c
				311	switch t {
				312	case RLE, LRE, RLO, LRO, RLI, LRI, FSI:
				313	isIsolate := t.in(RLI, LRI, FSI)
				314	isRTL := t.in(RLE, RLO, RLI)
				315
				316	// override if this is an FSI that resolves to RLI
				317	if t == FSI {
				318	isRTL = (p.determineParagraphEmbeddingLevel(i+1, p.matchingPDI[i]) == 1)
				319	}
				320	if isIsolate {
				321	p.resultLevels[i] = stack.lastEmbeddingLevel()
				322	if stack.lastDirectionalOverrideStatus() != ON {
				323	p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
				324	}
				325	}
				326
				327	var newLevel level
				328	if isRTL {
				329	// least greater odd
				330	newLevel = (stack.lastEmbeddingLevel() + 1) \| 1
				331	} else {
				332	// least greater even
				333	newLevel = (stack.lastEmbeddingLevel() + 2) &^ 1
				334	}
				335
				336	if newLevel <= maxDepth && overflowIsolateCount == 0 && overflowEmbeddingCount == 0 {
				337	if isIsolate {
				338	validIsolateCount++
				339	}
				340	// Push new embedding level, override status, and isolated
				341	// status.
				342	// No check for valid stack counter, since the level check
				343	// suffices.
				344	switch t {
				345	case LRO:
				346	stack.push(newLevel, L, isIsolate)
				347	case RLO:
				348	stack.push(newLevel, R, isIsolate)
				349	default:
				350	stack.push(newLevel, ON, isIsolate)
				351	}
				352	// Not really part of the spec
				353	if !isIsolate {
				354	p.resultLevels[i] = newLevel
				355	}
				356	} else {
				357	// This is an invalid explicit formatting character,
				358	// so apply the "Otherwise" part of rules X2-X5b.
				359	if isIsolate {
				360	overflowIsolateCount++
				361	} else { // !isIsolate
				362	if overflowIsolateCount == 0 {
				363	overflowEmbeddingCount++
				364	}
				365	}
				366	}
				367
				368	// Rule X6a
				369	case PDI:
				370	if overflowIsolateCount > 0 {
				371	overflowIsolateCount--
				372	} else if validIsolateCount == 0 {
				373	// do nothing
				374	} else {
				375	overflowEmbeddingCount = 0
				376	for !stack.lastDirectionalIsolateStatus() {
				377	stack.pop()
				378	}
				379	stack.pop()
				380	validIsolateCount--
				381	}
				382	p.resultLevels[i] = stack.lastEmbeddingLevel()
				383
				384	// Rule X7
				385	case PDF:
				386	// Not really part of the spec
				387	p.resultLevels[i] = stack.lastEmbeddingLevel()
				388
				389	if overflowIsolateCount > 0 {
				390	// do nothing
				391	} else if overflowEmbeddingCount > 0 {
				392	overflowEmbeddingCount--
				393	} else if !stack.lastDirectionalIsolateStatus() && stack.depth() >= 2 {
				394	stack.pop()
				395	}
				396
				397	case B: // paragraph separator.
				398	// Rule X8.
				399
				400	// These values are reset for clarity, in this implementation B
				401	// can only occur as the last code in the array.
				402	stack.empty()
				403	overflowIsolateCount = 0
				404	overflowEmbeddingCount = 0
				405	validIsolateCount = 0
				406	p.resultLevels[i] = p.embeddingLevel
				407
				408	default:
				409	p.resultLevels[i] = stack.lastEmbeddingLevel()
				410	if stack.lastDirectionalOverrideStatus() != ON {
				411	p.resultTypes[i] = stack.lastDirectionalOverrideStatus()
				412	}
				413	}
				414	}
				415	}
				416
				417	type isolatingRunSequence struct {
				418	p *paragraph
				419
				420	indexes []int // indexes to the original string
				421
				422	types []Class // type of each character using the index
				423	resolvedLevels []level // resolved levels after application of rules
				424	level level
				425	sos, eos Class
				426	}
				427
				428	func (i *isolatingRunSequence) Len() int { return len(i.indexes) }
				429
				430	func maxLevel(a, b level) level {
				431	if a > b {
				432	return a
				433	}
				434	return b
				435	}
				436
				437	// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
Akash Kankanala	761955c	2024-02-21 19:32:20 +0530	[diff] [blame]	438	// either L or R, for each isolating run sequence.
khenaidoo	5fc5cea	2021-08-11 17:39:16 -0400	[diff] [blame]	439	func (p paragraph) isolatingRunSequence(indexes []int) isolatingRunSequence {
				440	length := len(indexes)
				441	types := make([]Class, length)
				442	for i, x := range indexes {
				443	types[i] = p.resultTypes[x]
				444	}
				445
				446	// assign level, sos and eos
				447	prevChar := indexes[0] - 1
				448	for prevChar >= 0 && isRemovedByX9(p.initialTypes[prevChar]) {
				449	prevChar--
				450	}
				451	prevLevel := p.embeddingLevel
				452	if prevChar >= 0 {
				453	prevLevel = p.resultLevels[prevChar]
				454	}
				455
				456	var succLevel level
				457	lastType := types[length-1]
				458	if lastType.in(LRI, RLI, FSI) {
				459	succLevel = p.embeddingLevel
				460	} else {
				461	// the first character after the end of run sequence
				462	limit := indexes[length-1] + 1
				463	for ; limit < p.Len() && isRemovedByX9(p.initialTypes[limit]); limit++ {
				464
				465	}
				466	succLevel = p.embeddingLevel
				467	if limit < p.Len() {
				468	succLevel = p.resultLevels[limit]
				469	}
				470	}
				471	level := p.resultLevels[indexes[0]]
				472	return &isolatingRunSequence{
				473	p: p,
				474	indexes: indexes,
				475	types: types,
				476	level: level,
				477	sos: typeForLevel(maxLevel(prevLevel, level)),
				478	eos: typeForLevel(maxLevel(succLevel, level)),
				479	}
				480	}
				481
				482	// Resolving weak types Rules W1-W7.
				483	//
				484	// Note that some weak types (EN, AN) remain after this processing is
				485	// complete.
				486	func (s *isolatingRunSequence) resolveWeakTypes() {
				487
				488	// on entry, only these types remain
				489	s.assertOnly(L, R, AL, EN, ES, ET, AN, CS, B, S, WS, ON, NSM, LRI, RLI, FSI, PDI)
				490
				491	// Rule W1.
				492	// Changes all NSMs.
				493	precedingCharacterType := s.sos
				494	for i, t := range s.types {
				495	if t == NSM {
				496	s.types[i] = precedingCharacterType
				497	} else {
Akash Kankanala	761955c	2024-02-21 19:32:20 +0530	[diff] [blame]	498	// if t.in(LRI, RLI, FSI, PDI) {
				499	// precedingCharacterType = ON
				500	// }
khenaidoo	5fc5cea	2021-08-11 17:39:16 -0400	[diff] [blame]	501	precedingCharacterType = t
				502	}
				503	}
				504
				505	// Rule W2.
				506	// EN does not change at the start of the run, because sos != AL.
				507	for i, t := range s.types {
				508	if t == EN {
				509	for j := i - 1; j >= 0; j-- {
				510	if t := s.types[j]; t.in(L, R, AL) {
				511	if t == AL {
				512	s.types[i] = AN
				513	}
				514	break
				515	}
				516	}
				517	}
				518	}
				519
				520	// Rule W3.
				521	for i, t := range s.types {
				522	if t == AL {
				523	s.types[i] = R
				524	}
				525	}
				526
				527	// Rule W4.
				528	// Since there must be values on both sides for this rule to have an
				529	// effect, the scan skips the first and last value.
				530	//
				531	// Although the scan proceeds left to right, and changes the type
				532	// values in a way that would appear to affect the computations
				533	// later in the scan, there is actually no problem. A change in the
				534	// current value can only affect the value to its immediate right,
				535	// and only affect it if it is ES or CS. But the current value can
				536	// only change if the value to its right is not ES or CS. Thus
				537	// either the current value will not change, or its change will have
				538	// no effect on the remainder of the analysis.
				539
				540	for i := 1; i < s.Len()-1; i++ {
				541	t := s.types[i]
				542	if t == ES \|\| t == CS {
				543	prevSepType := s.types[i-1]
				544	succSepType := s.types[i+1]
				545	if prevSepType == EN && succSepType == EN {
				546	s.types[i] = EN
				547	} else if s.types[i] == CS && prevSepType == AN && succSepType == AN {
				548	s.types[i] = AN
				549	}
				550	}
				551	}
				552
				553	// Rule W5.
				554	for i, t := range s.types {
				555	if t == ET {
				556	// locate end of sequence
				557	runStart := i
				558	runEnd := s.findRunLimit(runStart, ET)
				559
				560	// check values at ends of sequence
				561	t := s.sos
				562	if runStart > 0 {
				563	t = s.types[runStart-1]
				564	}
				565	if t != EN {
				566	t = s.eos
				567	if runEnd < len(s.types) {
				568	t = s.types[runEnd]
				569	}
				570	}
				571	if t == EN {
				572	setTypes(s.types[runStart:runEnd], EN)
				573	}
				574	// continue at end of sequence
				575	i = runEnd
				576	}
				577	}
				578
				579	// Rule W6.
				580	for i, t := range s.types {
				581	if t.in(ES, ET, CS) {
				582	s.types[i] = ON
				583	}
				584	}
				585
				586	// Rule W7.
				587	for i, t := range s.types {
				588	if t == EN {
				589	// set default if we reach start of run
				590	prevStrongType := s.sos
				591	for j := i - 1; j >= 0; j-- {
				592	t = s.types[j]
				593	if t == L \|\| t == R { // AL's have been changed to R
				594	prevStrongType = t
				595	break
				596	}
				597	}
				598	if prevStrongType == L {
				599	s.types[i] = L
				600	}
				601	}
				602	}
				603	}
				604
				605	// 6) resolving neutral types Rules N1-N2.
				606	func (s *isolatingRunSequence) resolveNeutralTypes() {
				607
				608	// on entry, only these types can be in resultTypes
				609	s.assertOnly(L, R, EN, AN, B, S, WS, ON, RLI, LRI, FSI, PDI)
				610
				611	for i, t := range s.types {
				612	switch t {
				613	case WS, ON, B, S, RLI, LRI, FSI, PDI:
				614	// find bounds of run of neutrals
				615	runStart := i
				616	runEnd := s.findRunLimit(runStart, B, S, WS, ON, RLI, LRI, FSI, PDI)
				617
				618	// determine effective types at ends of run
				619	var leadType, trailType Class
				620
				621	// Note that the character found can only be L, R, AN, or
				622	// EN.
				623	if runStart == 0 {
				624	leadType = s.sos
				625	} else {
				626	leadType = s.types[runStart-1]
				627	if leadType.in(AN, EN) {
				628	leadType = R
				629	}
				630	}
				631	if runEnd == len(s.types) {
				632	trailType = s.eos
				633	} else {
				634	trailType = s.types[runEnd]
				635	if trailType.in(AN, EN) {
				636	trailType = R
				637	}
				638	}
				639
				640	var resolvedType Class
				641	if leadType == trailType {
				642	// Rule N1.
				643	resolvedType = leadType
				644	} else {
				645	// Rule N2.
				646	// Notice the embedding level of the run is used, not
				647	// the paragraph embedding level.
				648	resolvedType = typeForLevel(s.level)
				649	}
				650
				651	setTypes(s.types[runStart:runEnd], resolvedType)
				652
				653	// skip over run of (former) neutrals
				654	i = runEnd
				655	}
				656	}
				657	}
				658
				659	func setLevels(levels []level, newLevel level) {
				660	for i := range levels {
				661	levels[i] = newLevel
				662	}
				663	}
				664
				665	func setTypes(types []Class, newType Class) {
				666	for i := range types {
				667	types[i] = newType
				668	}
				669	}
				670
				671	// 7) resolving implicit embedding levels Rules I1, I2.
				672	func (s *isolatingRunSequence) resolveImplicitLevels() {
				673
				674	// on entry, only these types can be in resultTypes
				675	s.assertOnly(L, R, EN, AN)
				676
				677	s.resolvedLevels = make([]level, len(s.types))
				678	setLevels(s.resolvedLevels, s.level)
				679
				680	if (s.level & 1) == 0 { // even level
				681	for i, t := range s.types {
				682	// Rule I1.
				683	if t == L {
				684	// no change
				685	} else if t == R {
				686	s.resolvedLevels[i] += 1
				687	} else { // t == AN \|\| t == EN
				688	s.resolvedLevels[i] += 2
				689	}
				690	}
				691	} else { // odd level
				692	for i, t := range s.types {
				693	// Rule I2.
				694	if t == R {
				695	// no change
				696	} else { // t == L \|\| t == AN \|\| t == EN
				697	s.resolvedLevels[i] += 1
				698	}
				699	}
				700	}
				701	}
				702
				703	// Applies the levels and types resolved in rules W1-I2 to the
				704	// resultLevels array.
				705	func (s *isolatingRunSequence) applyLevelsAndTypes() {
				706	for i, x := range s.indexes {
				707	s.p.resultTypes[x] = s.types[i]
				708	s.p.resultLevels[x] = s.resolvedLevels[i]
				709	}
				710	}
				711
				712	// Return the limit of the run consisting only of the types in validSet
				713	// starting at index. This checks the value at index, and will return
				714	// index if that value is not in validSet.
				715	func (s *isolatingRunSequence) findRunLimit(index int, validSet ...Class) int {
				716	loop:
				717	for ; index < len(s.types); index++ {
				718	t := s.types[index]
				719	for _, valid := range validSet {
				720	if t == valid {
				721	continue loop
				722	}
				723	}
				724	return index // didn't find a match in validSet
				725	}
				726	return len(s.types)
				727	}
				728
				729	// Algorithm validation. Assert that all values in types are in the
				730	// provided set.
				731	func (s *isolatingRunSequence) assertOnly(codes ...Class) {
				732	loop:
				733	for i, t := range s.types {
				734	for _, c := range codes {
				735	if t == c {
				736	continue loop
				737	}
				738	}
				739	log.Panicf("invalid bidi code %v present in assertOnly at position %d", t, s.indexes[i])
				740	}
				741	}
				742
				743	// determineLevelRuns returns an array of level runs. Each level run is
				744	// described as an array of indexes into the input string.
				745	//
				746	// Determines the level runs. Rule X9 will be applied in determining the
				747	// runs, in the way that makes sure the characters that are supposed to be
				748	// removed are not included in the runs.
				749	func (p *paragraph) determineLevelRuns() [][]int {
				750	run := []int{}
				751	allRuns := [][]int{}
				752	currentLevel := implicitLevel
				753
				754	for i := range p.initialTypes {
				755	if !isRemovedByX9(p.initialTypes[i]) {
				756	if p.resultLevels[i] != currentLevel {
				757	// we just encountered a new run; wrap up last run
				758	if currentLevel >= 0 { // only wrap it up if there was a run
				759	allRuns = append(allRuns, run)
				760	run = nil
				761	}
				762	// Start new run
				763	currentLevel = p.resultLevels[i]
				764	}
				765	run = append(run, i)
				766	}
				767	}
				768	// Wrap up the final run, if any
				769	if len(run) > 0 {
				770	allRuns = append(allRuns, run)
				771	}
				772	return allRuns
				773	}
				774
				775	// Definition BD13. Determine isolating run sequences.
				776	func (p paragraph) determineIsolatingRunSequences() []isolatingRunSequence {
				777	levelRuns := p.determineLevelRuns()
				778
				779	// Compute the run that each character belongs to
				780	runForCharacter := make([]int, p.Len())
				781	for i, run := range levelRuns {
				782	for _, index := range run {
				783	runForCharacter[index] = i
				784	}
				785	}
				786
				787	sequences := []*isolatingRunSequence{}
				788
				789	var currentRunSequence []int
				790
				791	for _, run := range levelRuns {
				792	first := run[0]
				793	if p.initialTypes[first] != PDI \|\| p.matchingIsolateInitiator[first] == -1 {
				794	currentRunSequence = nil
				795	// int run = i;
				796	for {
				797	// Copy this level run into currentRunSequence
				798	currentRunSequence = append(currentRunSequence, run...)
				799
				800	last := currentRunSequence[len(currentRunSequence)-1]
				801	lastT := p.initialTypes[last]
				802	if lastT.in(LRI, RLI, FSI) && p.matchingPDI[last] != p.Len() {
				803	run = levelRuns[runForCharacter[p.matchingPDI[last]]]
				804	} else {
				805	break
				806	}
				807	}
				808	sequences = append(sequences, p.isolatingRunSequence(currentRunSequence))
				809	}
				810	}
				811	return sequences
				812	}
				813
				814	// Assign level information to characters removed by rule X9. This is for
				815	// ease of relating the level information to the original input data. Note
				816	// that the levels assigned to these codes are arbitrary, they're chosen so
				817	// as to avoid breaking level runs.
				818	func (p *paragraph) assignLevelsToCharactersRemovedByX9() {
				819	for i, t := range p.initialTypes {
				820	if t.in(LRE, RLE, LRO, RLO, PDF, BN) {
				821	p.resultTypes[i] = t
				822	p.resultLevels[i] = -1
				823	}
				824	}
				825	// now propagate forward the levels information (could have
				826	// propagated backward, the main thing is not to introduce a level
				827	// break where one doesn't already exist).
				828
				829	if p.resultLevels[0] == -1 {
				830	p.resultLevels[0] = p.embeddingLevel
				831	}
				832	for i := 1; i < len(p.initialTypes); i++ {
				833	if p.resultLevels[i] == -1 {
				834	p.resultLevels[i] = p.resultLevels[i-1]
				835	}
				836	}
				837	// Embedding information is for informational purposes only so need not be
				838	// adjusted.
				839	}
				840
				841	//
				842	// Output
				843	//
				844
				845	// getLevels computes levels array breaking lines at offsets in linebreaks.
				846	// Rule L1.
				847	//
				848	// The linebreaks array must include at least one value. The values must be
				849	// in strictly increasing order (no duplicates) between 1 and the length of
				850	// the text, inclusive. The last value must be the length of the text.
				851	func (p *paragraph) getLevels(linebreaks []int) []level {
				852	// Note that since the previous processing has removed all
				853	// P, S, and WS values from resultTypes, the values referred to
				854	// in these rules are the initial types, before any processing
				855	// has been applied (including processing of overrides).
				856	//
				857	// This example implementation has reinserted explicit format codes
				858	// and BN, in order that the levels array correspond to the
				859	// initial text. Their final placement is not normative.
				860	// These codes are treated like WS in this implementation,
				861	// so they don't interrupt sequences of WS.
				862
				863	validateLineBreaks(linebreaks, p.Len())
				864
				865	result := append([]level(nil), p.resultLevels...)
				866
				867	// don't worry about linebreaks since if there is a break within
				868	// a series of WS values preceding S, the linebreak itself
				869	// causes the reset.
				870	for i, t := range p.initialTypes {
				871	if t.in(B, S) {
				872	// Rule L1, clauses one and two.
				873	result[i] = p.embeddingLevel
				874
				875	// Rule L1, clause three.
				876	for j := i - 1; j >= 0; j-- {
				877	if isWhitespace(p.initialTypes[j]) { // including format codes
				878	result[j] = p.embeddingLevel
				879	} else {
				880	break
				881	}
				882	}
				883	}
				884	}
				885
				886	// Rule L1, clause four.
				887	start := 0
				888	for _, limit := range linebreaks {
				889	for j := limit - 1; j >= start; j-- {
				890	if isWhitespace(p.initialTypes[j]) { // including format codes
				891	result[j] = p.embeddingLevel
				892	} else {
				893	break
				894	}
				895	}
				896	start = limit
				897	}
				898
				899	return result
				900	}
				901
				902	// getReordering returns the reordering of lines from a visual index to a
				903	// logical index for line breaks at the given offsets.
				904	//
				905	// Lines are concatenated from left to right. So for example, the fifth
				906	// character from the left on the third line is
				907	//
Joey Armstrong	ba3d9d1	2024-01-15 14:22:11 -0500	[diff] [blame]	908	// getReordering(linebreaks)[linebreaks[1] + 4]
khenaidoo	5fc5cea	2021-08-11 17:39:16 -0400	[diff] [blame]	909	//
				910	// (linebreaks[1] is the position after the last character of the second
				911	// line, which is also the index of the first character on the third line,
				912	// and adding four gets the fifth character from the left).
				913	//
				914	// The linebreaks array must include at least one value. The values must be
				915	// in strictly increasing order (no duplicates) between 1 and the length of
				916	// the text, inclusive. The last value must be the length of the text.
				917	func (p *paragraph) getReordering(linebreaks []int) []int {
				918	validateLineBreaks(linebreaks, p.Len())
				919
				920	return computeMultilineReordering(p.getLevels(linebreaks), linebreaks)
				921	}
				922
				923	// Return multiline reordering array for a given level array. Reordering
				924	// does not occur across a line break.
				925	func computeMultilineReordering(levels []level, linebreaks []int) []int {
				926	result := make([]int, len(levels))
				927
				928	start := 0
				929	for _, limit := range linebreaks {
				930	tempLevels := make([]level, limit-start)
				931	copy(tempLevels, levels[start:])
				932
				933	for j, order := range computeReordering(tempLevels) {
				934	result[start+j] = order + start
				935	}
				936	start = limit
				937	}
				938	return result
				939	}
				940
				941	// Return reordering array for a given level array. This reorders a single
				942	// line. The reordering is a visual to logical map. For example, the
				943	// leftmost char is string.charAt(order[0]). Rule L2.
				944	func computeReordering(levels []level) []int {
				945	result := make([]int, len(levels))
				946	// initialize order
				947	for i := range result {
				948	result[i] = i
				949	}
				950
				951	// locate highest level found on line.
				952	// Note the rules say text, but no reordering across line bounds is
				953	// performed, so this is sufficient.
				954	highestLevel := level(0)
				955	lowestOddLevel := level(maxDepth + 2)
				956	for _, level := range levels {
				957	if level > highestLevel {
				958	highestLevel = level
				959	}
				960	if level&1 != 0 && level < lowestOddLevel {
				961	lowestOddLevel = level
				962	}
				963	}
				964
				965	for level := highestLevel; level >= lowestOddLevel; level-- {
				966	for i := 0; i < len(levels); i++ {
				967	if levels[i] >= level {
				968	// find range of text at or above this level
				969	start := i
				970	limit := i + 1
				971	for limit < len(levels) && levels[limit] >= level {
				972	limit++
				973	}
				974
				975	for j, k := start, limit-1; j < k; j, k = j+1, k-1 {
				976	result[j], result[k] = result[k], result[j]
				977	}
				978	// skip to end of level run
				979	i = limit
				980	}
				981	}
				982	}
				983
				984	return result
				985	}
				986
				987	// isWhitespace reports whether the type is considered a whitespace type for the
				988	// line break rules.
				989	func isWhitespace(c Class) bool {
				990	switch c {
				991	case LRE, RLE, LRO, RLO, PDF, LRI, RLI, FSI, PDI, BN, WS:
				992	return true
				993	}
				994	return false
				995	}
				996
				997	// isRemovedByX9 reports whether the type is one of the types removed in X9.
				998	func isRemovedByX9(c Class) bool {
				999	switch c {
				1000	case LRE, RLE, LRO, RLO, PDF, BN:
				1001	return true
				1002	}
				1003	return false
				1004	}
				1005
				1006	// typeForLevel reports the strong type (L or R) corresponding to the level.
				1007	func typeForLevel(level level) Class {
				1008	if (level & 0x1) == 0 {
				1009	return L
				1010	}
				1011	return R
				1012	}
				1013
				1014	func validateTypes(types []Class) error {
				1015	if len(types) == 0 {
				1016	return fmt.Errorf("types is null")
				1017	}
				1018	for i, t := range types[:len(types)-1] {
				1019	if t == B {
				1020	return fmt.Errorf("B type before end of paragraph at index: %d", i)
				1021	}
				1022	}
				1023	return nil
				1024	}
				1025
				1026	func validateParagraphEmbeddingLevel(embeddingLevel level) error {
				1027	if embeddingLevel != implicitLevel &&
				1028	embeddingLevel != 0 &&
				1029	embeddingLevel != 1 {
				1030	return fmt.Errorf("illegal paragraph embedding level: %d", embeddingLevel)
				1031	}
				1032	return nil
				1033	}
				1034
				1035	func validateLineBreaks(linebreaks []int, textLength int) error {
				1036	prev := 0
				1037	for i, next := range linebreaks {
				1038	if next <= prev {
				1039	return fmt.Errorf("bad linebreak: %d at index: %d", next, i)
				1040	}
				1041	prev = next
				1042	}
				1043	if prev != textLength {
				1044	return fmt.Errorf("last linebreak was %d, want %d", prev, textLength)
				1045	}
				1046	return nil
				1047	}
				1048
				1049	func validatePbTypes(pairTypes []bracketType) error {
				1050	if len(pairTypes) == 0 {
				1051	return fmt.Errorf("pairTypes is null")
				1052	}
				1053	for i, pt := range pairTypes {
				1054	switch pt {
				1055	case bpNone, bpOpen, bpClose:
				1056	default:
				1057	return fmt.Errorf("illegal pairType value at %d: %v", i, pairTypes[i])
				1058	}
				1059	}
				1060	return nil
				1061	}
				1062
				1063	func validatePbValues(pairValues []rune, pairTypes []bracketType) error {
				1064	if pairValues == nil {
				1065	return fmt.Errorf("pairValues is null")
				1066	}
				1067	if len(pairTypes) != len(pairValues) {
				1068	return fmt.Errorf("pairTypes is different length from pairValues")
				1069	}
				1070	return nil
				1071	}