blob: 98949d036a337498750342733efb02cd13d548c1 [file] [log] [blame]
David K. Bainbridge528b3182017-01-23 08:51:59 -08001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// This file is mostly a copy of the go standard library text/tabwriter. With
6// the additional stripping of ansi control characters for width calculations.
7
8// Package tabwriter implements a write filter (tabwriter.Writer) that
9// translates tabbed columns in input into properly aligned text.
10//
11// The package is using the Elastic Tabstops algorithm described at
12// http://nickgravgaard.com/elastictabstops/index.html.
13//
14package tabwriter
15
16import (
17 "bytes"
18 "io"
19 "unicode/utf8"
20
21 "github.com/lunixbochs/vtclean"
22)
23
24// ----------------------------------------------------------------------------
25// Filter implementation
26
27// A cell represents a segment of text terminated by tabs or line breaks.
28// The text itself is stored in a separate buffer; cell only describes the
29// segment's size in bytes, its width in runes, and whether it's an htab
30// ('\t') terminated cell.
31//
32type cell struct {
33 size int // cell size in bytes
34 width int // cell width in runes
35 htab bool // true if the cell is terminated by an htab ('\t')
36}
37
38// A Writer is a filter that inserts padding around tab-delimited
39// columns in its input to align them in the output.
40//
41// The Writer treats incoming bytes as UTF-8 encoded text consisting
42// of cells terminated by (horizontal or vertical) tabs or line
43// breaks (newline or formfeed characters). Cells in adjacent lines
44// constitute a column. The Writer inserts padding as needed to
45// make all cells in a column have the same width, effectively
46// aligning the columns. It assumes that all characters have the
47// same width except for tabs for which a tabwidth must be specified.
48// Note that cells are tab-terminated, not tab-separated: trailing
49// non-tab text at the end of a line does not form a column cell.
50//
51// The Writer assumes that all Unicode code points have the same width;
52// this may not be true in some fonts.
53//
54// If DiscardEmptyColumns is set, empty columns that are terminated
55// entirely by vertical (or "soft") tabs are discarded. Columns
56// terminated by horizontal (or "hard") tabs are not affected by
57// this flag.
58//
59// If a Writer is configured to filter HTML, HTML tags and entities
60// are passed through. The widths of tags and entities are
61// assumed to be zero (tags) and one (entities) for formatting purposes.
62//
63// A segment of text may be escaped by bracketing it with Escape
64// characters. The tabwriter passes escaped text segments through
65// unchanged. In particular, it does not interpret any tabs or line
66// breaks within the segment. If the StripEscape flag is set, the
67// Escape characters are stripped from the output; otherwise they
68// are passed through as well. For the purpose of formatting, the
69// width of the escaped text is always computed excluding the Escape
70// characters.
71//
72// The formfeed character ('\f') acts like a newline but it also
73// terminates all columns in the current line (effectively calling
74// Flush). Cells in the next line start new columns. Unless found
75// inside an HTML tag or inside an escaped text segment, formfeed
76// characters appear as newlines in the output.
77//
78// The Writer must buffer input internally, because proper spacing
79// of one line may depend on the cells in future lines. Clients must
80// call Flush when done calling Write.
81//
82type Writer struct {
83 // configuration
84 output io.Writer
85 minwidth int
86 tabwidth int
87 padding int
88 padbytes [8]byte
89 flags uint
90
91 // current state
92 buf bytes.Buffer // collected text excluding tabs or line breaks
93 pos int // buffer position up to which cell.width of incomplete cell has been computed
94 cell cell // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
95 endChar byte // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
96 lines [][]cell // list of lines; each line is a list of cells
97 widths []int // list of column widths in runes - re-used during formatting
98 alignment map[int]uint // column alignment
99}
100
101func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
102
103// Reset the current state.
104func (b *Writer) reset() {
105 b.buf.Reset()
106 b.pos = 0
107 b.cell = cell{}
108 b.endChar = 0
109 b.lines = b.lines[0:0]
110 b.widths = b.widths[0:0]
111 b.alignment = make(map[int]uint)
112 b.addLine()
113}
114
115// Internal representation (current state):
116//
117// - all text written is appended to buf; tabs and line breaks are stripped away
118// - at any given time there is a (possibly empty) incomplete cell at the end
119// (the cell starts after a tab or line break)
120// - cell.size is the number of bytes belonging to the cell so far
121// - cell.width is text width in runes of that cell from the start of the cell to
122// position pos; html tags and entities are excluded from this width if html
123// filtering is enabled
124// - the sizes and widths of processed text are kept in the lines list
125// which contains a list of cells for each line
126// - the widths list is a temporary list with current widths used during
127// formatting; it is kept in Writer because it's re-used
128//
129// |<---------- size ---------->|
130// | |
131// |<- width ->|<- ignored ->| |
132// | | | |
133// [---processed---tab------------<tag>...</tag>...]
134// ^ ^ ^
135// | | |
136// buf start of incomplete cell pos
137
138// Formatting can be controlled with these flags.
139const (
140 // Ignore html tags and treat entities (starting with '&'
141 // and ending in ';') as single characters (width = 1).
142 FilterHTML uint = 1 << iota
143
144 // Strip Escape characters bracketing escaped text segments
145 // instead of passing them through unchanged with the text.
146 StripEscape
147
148 // Force right-alignment of cell content.
149 // Default is left-alignment.
150 AlignRight
151
152 // Handle empty columns as if they were not present in
153 // the input in the first place.
154 DiscardEmptyColumns
155
156 // Always use tabs for indentation columns (i.e., padding of
157 // leading empty cells on the left) independent of padchar.
158 TabIndent
159
160 // Print a vertical bar ('|') between columns (after formatting).
161 // Discarded columns appear as zero-width columns ("||").
162 Debug
163)
164
165// A Writer must be initialized with a call to Init. The first parameter (output)
166// specifies the filter output. The remaining parameters control the formatting:
167//
168// minwidth minimal cell width including any padding
169// tabwidth width of tab characters (equivalent number of spaces)
170// padding padding added to a cell before computing its width
171// padchar ASCII char used for padding
172// if padchar == '\t', the Writer will assume that the
173// width of a '\t' in the formatted output is tabwidth,
174// and cells are left-aligned independent of align_left
175// (for correct-looking results, tabwidth must correspond
176// to the tab width in the viewer displaying the result)
177// flags formatting control
178//
179func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
180 if minwidth < 0 || tabwidth < 0 || padding < 0 {
181 panic("negative minwidth, tabwidth, or padding")
182 }
183 b.output = output
184 b.minwidth = minwidth
185 b.tabwidth = tabwidth
186 b.padding = padding
187 for i := range b.padbytes {
188 b.padbytes[i] = padchar
189 }
190 if padchar == '\t' {
191 // tab padding enforces left-alignment
192 flags &^= AlignRight
193 }
194 b.flags = flags
195
196 b.reset()
197
198 return b
199}
200
201// debugging support (keep code around)
202func (b *Writer) dump() {
203 pos := 0
204 for i, line := range b.lines {
205 print("(", i, ") ")
206 for _, c := range line {
207 print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]")
208 pos += c.size
209 }
210 print("\n")
211 }
212 print("\n")
213}
214
215// local error wrapper so we can distinguish errors we want to return
216// as errors from genuine panics (which we don't want to return as errors)
217type osError struct {
218 err error
219}
220
221func (b *Writer) write0(buf []byte) {
222 n, err := b.output.Write(buf)
223 if n != len(buf) && err == nil {
224 err = io.ErrShortWrite
225 }
226 if err != nil {
227 panic(osError{err})
228 }
229}
230
231func (b *Writer) writeN(src []byte, n int) {
232 for n > len(src) {
233 b.write0(src)
234 n -= len(src)
235 }
236 b.write0(src[0:n])
237}
238
239var (
240 newline = []byte{'\n'}
241 tabs = []byte("\t\t\t\t\t\t\t\t")
242)
243
244func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
245 if b.padbytes[0] == '\t' || useTabs {
246 // padding is done with tabs
247 if b.tabwidth == 0 {
248 return // tabs have no width - can't do any padding
249 }
250 // make cellw the smallest multiple of b.tabwidth
251 cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
252 n := cellw - textw // amount of padding
253 if n < 0 {
254 panic("internal error")
255 }
256 b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
257 return
258 }
259
260 // padding is done with non-tab characters
261 b.writeN(b.padbytes[0:], cellw-textw)
262}
263
264var vbar = []byte{'|'}
265
266func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
267 pos = pos0
268 for i := line0; i < line1; i++ {
269 line := b.lines[i]
270
271 // if TabIndent is set, use tabs to pad leading empty cells
272 useTabs := b.flags&TabIndent != 0
273
274 for j, c := range line {
275 if j > 0 && b.flags&Debug != 0 {
276 // indicate column break
277 b.write0(vbar)
278 }
279
280 if c.size == 0 {
281 // empty cell
282 if j < len(b.widths) {
283 b.writePadding(c.width, b.widths[j], useTabs)
284 }
285 } else {
286 // non-empty cell
287 useTabs = false
288 alignColumnRight := b.alignment[j] == AlignRight
289 if (b.flags&AlignRight == 0) && !alignColumnRight { // align left
290 b.write0(b.buf.Bytes()[pos : pos+c.size])
291 pos += c.size
292 if j < len(b.widths) {
293 b.writePadding(c.width, b.widths[j], false)
294 }
295 } else if alignColumnRight && j < len(b.widths) {
296 // just this column
297 internalSize := b.widths[j] - b.padding
298 if j < len(b.widths) {
299 b.writePadding(c.width, internalSize, false)
300 }
301 b.write0(b.buf.Bytes()[pos : pos+c.size])
302 if b.padding > 0 {
303 b.writePadding(0, b.padding, false)
304 }
305 pos += c.size
306 } else { // align right
307 if j < len(b.widths) {
308 b.writePadding(c.width, b.widths[j], false)
309 }
310 b.write0(b.buf.Bytes()[pos : pos+c.size])
311 pos += c.size
312 }
313 }
314 }
315
316 if i+1 == len(b.lines) {
317 // last buffered line - we don't have a newline, so just write
318 // any outstanding buffered data
319 b.write0(b.buf.Bytes()[pos : pos+b.cell.size])
320 pos += b.cell.size
321 } else {
322 // not the last line - write newline
323 b.write0(newline)
324 }
325 }
326 return
327}
328
329// Format the text between line0 and line1 (excluding line1); pos
330// is the buffer position corresponding to the beginning of line0.
331// Returns the buffer position corresponding to the beginning of
332// line1 and an error, if any.
333//
334func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
335 pos = pos0
336 column := len(b.widths)
337 for this := line0; this < line1; this++ {
338 line := b.lines[this]
339
340 if column < len(line)-1 {
341 // cell exists in this column => this line
342 // has more cells than the previous line
343 // (the last cell per line is ignored because cells are
344 // tab-terminated; the last cell per line describes the
345 // text before the newline/formfeed and does not belong
346 // to a column)
347
348 // print unprinted lines until beginning of block
349 pos = b.writeLines(pos, line0, this)
350 line0 = this
351
352 // column block begin
353 width := b.minwidth // minimal column width
354 discardable := true // true if all cells in this column are empty and "soft"
355 for ; this < line1; this++ {
356 line = b.lines[this]
357 if column < len(line)-1 {
358 // cell exists in this column
359 c := line[column]
360 // update width
361 if w := c.width + b.padding; w > width {
362 width = w
363 }
364 // update discardable
365 if c.width > 0 || c.htab {
366 discardable = false
367 }
368 } else {
369 break
370 }
371 }
372 // column block end
373
374 // discard empty columns if necessary
375 if discardable && b.flags&DiscardEmptyColumns != 0 {
376 width = 0
377 }
378
379 // format and print all columns to the right of this column
380 // (we know the widths of this column and all columns to the left)
381 b.widths = append(b.widths, width) // push width
382 pos = b.format(pos, line0, this)
383 b.widths = b.widths[0 : len(b.widths)-1] // pop width
384 line0 = this
385 }
386 }
387
388 // print unprinted lines until end
389 return b.writeLines(pos, line0, line1)
390}
391
392// Append text to current cell.
393func (b *Writer) append(text []byte) {
394 b.buf.Write(text)
395 b.cell.size += len(text)
396}
397
398// Update the cell width.
399func (b *Writer) updateWidth() {
400 // ---- Changes here -----
401 newChars := b.buf.Bytes()[b.pos:b.buf.Len()]
402 cleaned := vtclean.Clean(string(newChars), false) // false to strip colors
403 b.cell.width += utf8.RuneCount([]byte(cleaned))
404 // --- end of changes ----
405 b.pos = b.buf.Len()
406}
407
408// To escape a text segment, bracket it with Escape characters.
409// For instance, the tab in this string "Ignore this tab: \xff\t\xff"
410// does not terminate a cell and constitutes a single character of
411// width one for formatting purposes.
412//
413// The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
414//
415const Escape = '\xff'
416
417// Start escaped mode.
418func (b *Writer) startEscape(ch byte) {
419 switch ch {
420 case Escape:
421 b.endChar = Escape
422 case '<':
423 b.endChar = '>'
424 case '&':
425 b.endChar = ';'
426 }
427}
428
429// Terminate escaped mode. If the escaped text was an HTML tag, its width
430// is assumed to be zero for formatting purposes; if it was an HTML entity,
431// its width is assumed to be one. In all other cases, the width is the
432// unicode width of the text.
433//
434func (b *Writer) endEscape() {
435 switch b.endChar {
436 case Escape:
437 b.updateWidth()
438 if b.flags&StripEscape == 0 {
439 b.cell.width -= 2 // don't count the Escape chars
440 }
441 case '>': // tag of zero width
442 case ';':
443 b.cell.width++ // entity, count as one rune
444 }
445 b.pos = b.buf.Len()
446 b.endChar = 0
447}
448
449// Terminate the current cell by adding it to the list of cells of the
450// current line. Returns the number of cells in that line.
451//
452func (b *Writer) terminateCell(htab bool) int {
453 b.cell.htab = htab
454 line := &b.lines[len(b.lines)-1]
455 *line = append(*line, b.cell)
456 b.cell = cell{}
457 return len(*line)
458}
459
460func handlePanic(err *error, op string) {
461 if e := recover(); e != nil {
462 if nerr, ok := e.(osError); ok {
463 *err = nerr.err
464 return
465 }
466 panic("tabwriter: panic during " + op)
467 }
468}
469
470// Flush should be called after the last call to Write to ensure
471// that any data buffered in the Writer is written to output. Any
472// incomplete escape sequence at the end is considered
473// complete for formatting purposes.
474//
475func (b *Writer) Flush() (err error) {
476 defer b.reset() // even in the presence of errors
477 defer handlePanic(&err, "Flush")
478
479 // add current cell if not empty
480 if b.cell.size > 0 {
481 if b.endChar != 0 {
482 // inside escape - terminate it even if incomplete
483 b.endEscape()
484 }
485 b.terminateCell(false)
486 }
487
488 // format contents of buffer
489 b.format(0, 0, len(b.lines))
490
491 return
492}
493
494var hbar = []byte("---\n")
495
496// SetColumnAlignRight will mark a particular column as align right.
497// This is reset on the next flush.
498func (b *Writer) SetColumnAlignRight(column int) {
499 b.alignment[column] = AlignRight
500}
501
502// Write writes buf to the writer b.
503// The only errors returned are ones encountered
504// while writing to the underlying output stream.
505//
506func (b *Writer) Write(buf []byte) (n int, err error) {
507 defer handlePanic(&err, "Write")
508
509 // split text into cells
510 n = 0
511 for i, ch := range buf {
512 if b.endChar == 0 {
513 // outside escape
514 switch ch {
515 case '\t', '\v', '\n', '\f':
516 // end of cell
517 b.append(buf[n:i])
518 b.updateWidth()
519 n = i + 1 // ch consumed
520 ncells := b.terminateCell(ch == '\t')
521 if ch == '\n' || ch == '\f' {
522 // terminate line
523 b.addLine()
524 if ch == '\f' || ncells == 1 {
525 // A '\f' always forces a flush. Otherwise, if the previous
526 // line has only one cell which does not have an impact on
527 // the formatting of the following lines (the last cell per
528 // line is ignored by format()), thus we can flush the
529 // Writer contents.
530 if err = b.Flush(); err != nil {
531 return
532 }
533 if ch == '\f' && b.flags&Debug != 0 {
534 // indicate section break
535 b.write0(hbar)
536 }
537 }
538 }
539
540 case Escape:
541 // start of escaped sequence
542 b.append(buf[n:i])
543 b.updateWidth()
544 n = i
545 if b.flags&StripEscape != 0 {
546 n++ // strip Escape
547 }
548 b.startEscape(Escape)
549
550 case '<', '&':
551 // possibly an html tag/entity
552 if b.flags&FilterHTML != 0 {
553 // begin of tag/entity
554 b.append(buf[n:i])
555 b.updateWidth()
556 n = i
557 b.startEscape(ch)
558 }
559 }
560
561 } else {
562 // inside escape
563 if ch == b.endChar {
564 // end of tag/entity
565 j := i + 1
566 if ch == Escape && b.flags&StripEscape != 0 {
567 j = i // strip Escape
568 }
569 b.append(buf[n:j])
570 n = i + 1 // ch consumed
571 b.endEscape()
572 }
573 }
574 }
575
576 // append leftover text
577 b.append(buf[n:])
578 n = len(buf)
579 return
580}
581
582// NewWriter allocates and initializes a new tabwriter.Writer.
583// The parameters are the same as for the Init function.
584//
585func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
586 return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
587}