| // Copyright 2016 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build !appengine |
| // +build gc |
| // +build !noasm |
| |
| #include "textflag.h" |
| |
| // The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a |
| // Go toolchain regression. See https://github.com/golang/go/issues/15426 and |
| // https://github.com/golang/snappy/issues/29 |
| // |
| // As a workaround, the package was built with a known good assembler, and |
| // those instructions were disassembled by "objdump -d" to yield the |
| // 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 |
| // style comments, in AT&T asm syntax. Note that rsp here is a physical |
| // register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). |
| // The instructions were then encoded as "BYTE $0x.." sequences, which assemble |
| // fine on Go 1.6. |
| |
| // The asm code generally follows the pure Go code in encode_other.go, except |
| // where marked with a "!!!". |
| |
| // ---------------------------------------------------------------------------- |
| |
| // func emitLiteral(dst, lit []byte) int |
| // |
| // All local variables fit into registers. The register allocation: |
| // - AX len(lit) |
| // - BX n |
| // - DX return value |
| // - DI &dst[i] |
| // - R10 &lit[0] |
| // |
| // The 24 bytes of stack space is to call runtime·memmove. |
| // |
| // The unusual register allocation of local variables, such as R10 for the |
| // source pointer, matches the allocation used at the call site in encodeBlock, |
| // which makes it easier to manually inline this function. |
| TEXT ·emitLiteral(SB), NOSPLIT, $24-56 |
| MOVQ dst_base+0(FP), DI |
| MOVQ lit_base+24(FP), R10 |
| MOVQ lit_len+32(FP), AX |
| MOVQ AX, DX |
| MOVL AX, BX |
| SUBL $1, BX |
| |
| CMPL BX, $60 |
| JLT oneByte |
| CMPL BX, $256 |
| JLT twoBytes |
| |
| threeBytes: |
| MOVB $0xf4, 0(DI) |
| MOVW BX, 1(DI) |
| ADDQ $3, DI |
| ADDQ $3, DX |
| JMP memmove |
| |
| twoBytes: |
| MOVB $0xf0, 0(DI) |
| MOVB BX, 1(DI) |
| ADDQ $2, DI |
| ADDQ $2, DX |
| JMP memmove |
| |
| oneByte: |
| SHLB $2, BX |
| MOVB BX, 0(DI) |
| ADDQ $1, DI |
| ADDQ $1, DX |
| |
| memmove: |
| MOVQ DX, ret+48(FP) |
| |
| // copy(dst[i:], lit) |
| // |
| // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push |
| // DI, R10 and AX as arguments. |
| MOVQ DI, 0(SP) |
| MOVQ R10, 8(SP) |
| MOVQ AX, 16(SP) |
| CALL runtime·memmove(SB) |
| RET |
| |
| // ---------------------------------------------------------------------------- |
| |
| // func emitCopy(dst []byte, offset, length int) int |
| // |
| // All local variables fit into registers. The register allocation: |
| // - AX length |
| // - SI &dst[0] |
| // - DI &dst[i] |
| // - R11 offset |
| // |
| // The unusual register allocation of local variables, such as R11 for the |
| // offset, matches the allocation used at the call site in encodeBlock, which |
| // makes it easier to manually inline this function. |
| TEXT ·emitCopy(SB), NOSPLIT, $0-48 |
| MOVQ dst_base+0(FP), DI |
| MOVQ DI, SI |
| MOVQ offset+24(FP), R11 |
| MOVQ length+32(FP), AX |
| |
| loop0: |
| // for length >= 68 { etc } |
| CMPL AX, $68 |
| JLT step1 |
| |
| // Emit a length 64 copy, encoded as 3 bytes. |
| MOVB $0xfe, 0(DI) |
| MOVW R11, 1(DI) |
| ADDQ $3, DI |
| SUBL $64, AX |
| JMP loop0 |
| |
| step1: |
| // if length > 64 { etc } |
| CMPL AX, $64 |
| JLE step2 |
| |
| // Emit a length 60 copy, encoded as 3 bytes. |
| MOVB $0xee, 0(DI) |
| MOVW R11, 1(DI) |
| ADDQ $3, DI |
| SUBL $60, AX |
| |
| step2: |
| // if length >= 12 || offset >= 2048 { goto step3 } |
| CMPL AX, $12 |
| JGE step3 |
| CMPL R11, $2048 |
| JGE step3 |
| |
| // Emit the remaining copy, encoded as 2 bytes. |
| MOVB R11, 1(DI) |
| SHRL $8, R11 |
| SHLB $5, R11 |
| SUBB $4, AX |
| SHLB $2, AX |
| ORB AX, R11 |
| ORB $1, R11 |
| MOVB R11, 0(DI) |
| ADDQ $2, DI |
| |
| // Return the number of bytes written. |
| SUBQ SI, DI |
| MOVQ DI, ret+40(FP) |
| RET |
| |
| step3: |
| // Emit the remaining copy, encoded as 3 bytes. |
| SUBL $1, AX |
| SHLB $2, AX |
| ORB $2, AX |
| MOVB AX, 0(DI) |
| MOVW R11, 1(DI) |
| ADDQ $3, DI |
| |
| // Return the number of bytes written. |
| SUBQ SI, DI |
| MOVQ DI, ret+40(FP) |
| RET |
| |
| // ---------------------------------------------------------------------------- |
| |
| // func extendMatch(src []byte, i, j int) int |
| // |
| // All local variables fit into registers. The register allocation: |
| // - DX &src[0] |
| // - SI &src[j] |
| // - R13 &src[len(src) - 8] |
| // - R14 &src[len(src)] |
| // - R15 &src[i] |
| // |
| // The unusual register allocation of local variables, such as R15 for a source |
| // pointer, matches the allocation used at the call site in encodeBlock, which |
| // makes it easier to manually inline this function. |
| TEXT ·extendMatch(SB), NOSPLIT, $0-48 |
| MOVQ src_base+0(FP), DX |
| MOVQ src_len+8(FP), R14 |
| MOVQ i+24(FP), R15 |
| MOVQ j+32(FP), SI |
| ADDQ DX, R14 |
| ADDQ DX, R15 |
| ADDQ DX, SI |
| MOVQ R14, R13 |
| SUBQ $8, R13 |
| |
| cmp8: |
| // As long as we are 8 or more bytes before the end of src, we can load and |
| // compare 8 bytes at a time. If those 8 bytes are equal, repeat. |
| CMPQ SI, R13 |
| JA cmp1 |
| MOVQ (R15), AX |
| MOVQ (SI), BX |
| CMPQ AX, BX |
| JNE bsf |
| ADDQ $8, R15 |
| ADDQ $8, SI |
| JMP cmp8 |
| |
| bsf: |
| // If those 8 bytes were not equal, XOR the two 8 byte values, and return |
| // the index of the first byte that differs. The BSF instruction finds the |
| // least significant 1 bit, the amd64 architecture is little-endian, and |
| // the shift by 3 converts a bit index to a byte index. |
| XORQ AX, BX |
| BSFQ BX, BX |
| SHRQ $3, BX |
| ADDQ BX, SI |
| |
| // Convert from &src[ret] to ret. |
| SUBQ DX, SI |
| MOVQ SI, ret+40(FP) |
| RET |
| |
| cmp1: |
| // In src's tail, compare 1 byte at a time. |
| CMPQ SI, R14 |
| JAE extendMatchEnd |
| MOVB (R15), AX |
| MOVB (SI), BX |
| CMPB AX, BX |
| JNE extendMatchEnd |
| ADDQ $1, R15 |
| ADDQ $1, SI |
| JMP cmp1 |
| |
| extendMatchEnd: |
| // Convert from &src[ret] to ret. |
| SUBQ DX, SI |
| MOVQ SI, ret+40(FP) |
| RET |
| |
| // ---------------------------------------------------------------------------- |
| |
| // func encodeBlock(dst, src []byte) (d int) |
| // |
| // All local variables fit into registers, other than "var table". The register |
| // allocation: |
| // - AX . . |
| // - BX . . |
| // - CX 56 shift (note that amd64 shifts by non-immediates must use CX). |
| // - DX 64 &src[0], tableSize |
| // - SI 72 &src[s] |
| // - DI 80 &dst[d] |
| // - R9 88 sLimit |
| // - R10 . &src[nextEmit] |
| // - R11 96 prevHash, currHash, nextHash, offset |
| // - R12 104 &src[base], skip |
| // - R13 . &src[nextS], &src[len(src) - 8] |
| // - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x |
| // - R15 112 candidate |
| // |
| // The second column (56, 64, etc) is the stack offset to spill the registers |
| // when calling other functions. We could pack this slightly tighter, but it's |
| // simpler to have a dedicated spill map independent of the function called. |
| // |
| // "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An |
| // extra 56 bytes, to call other functions, and an extra 64 bytes, to spill |
| // local variables (registers) during calls gives 32768 + 56 + 64 = 32888. |
| TEXT ·encodeBlock(SB), 0, $32888-56 |
| MOVQ dst_base+0(FP), DI |
| MOVQ src_base+24(FP), SI |
| MOVQ src_len+32(FP), R14 |
| |
| // shift, tableSize := uint32(32-8), 1<<8 |
| MOVQ $24, CX |
| MOVQ $256, DX |
| |
| calcShift: |
| // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { |
| // shift-- |
| // } |
| CMPQ DX, $16384 |
| JGE varTable |
| CMPQ DX, R14 |
| JGE varTable |
| SUBQ $1, CX |
| SHLQ $1, DX |
| JMP calcShift |
| |
| varTable: |
| // var table [maxTableSize]uint16 |
| // |
| // In the asm code, unlike the Go code, we can zero-initialize only the |
| // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU |
| // writes 16 bytes, so we can do only tableSize/8 writes instead of the |
| // 2048 writes that would zero-initialize all of table's 32768 bytes. |
| SHRQ $3, DX |
| LEAQ table-32768(SP), BX |
| PXOR X0, X0 |
| |
| memclr: |
| MOVOU X0, 0(BX) |
| ADDQ $16, BX |
| SUBQ $1, DX |
| JNZ memclr |
| |
| // !!! DX = &src[0] |
| MOVQ SI, DX |
| |
| // sLimit := len(src) - inputMargin |
| MOVQ R14, R9 |
| SUBQ $15, R9 |
| |
| // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't |
| // change for the rest of the function. |
| MOVQ CX, 56(SP) |
| MOVQ DX, 64(SP) |
| MOVQ R9, 88(SP) |
| |
| // nextEmit := 0 |
| MOVQ DX, R10 |
| |
| // s := 1 |
| ADDQ $1, SI |
| |
| // nextHash := hash(load32(src, s), shift) |
| MOVL 0(SI), R11 |
| IMULL $0x1e35a7bd, R11 |
| SHRL CX, R11 |
| |
| outer: |
| // for { etc } |
| |
| // skip := 32 |
| MOVQ $32, R12 |
| |
| // nextS := s |
| MOVQ SI, R13 |
| |
| // candidate := 0 |
| MOVQ $0, R15 |
| |
| inner0: |
| // for { etc } |
| |
| // s := nextS |
| MOVQ R13, SI |
| |
| // bytesBetweenHashLookups := skip >> 5 |
| MOVQ R12, R14 |
| SHRQ $5, R14 |
| |
| // nextS = s + bytesBetweenHashLookups |
| ADDQ R14, R13 |
| |
| // skip += bytesBetweenHashLookups |
| ADDQ R14, R12 |
| |
| // if nextS > sLimit { goto emitRemainder } |
| MOVQ R13, AX |
| SUBQ DX, AX |
| CMPQ AX, R9 |
| JA emitRemainder |
| |
| // candidate = int(table[nextHash]) |
| // XXX: MOVWQZX table-32768(SP)(R11*2), R15 |
| // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 |
| BYTE $0x4e |
| BYTE $0x0f |
| BYTE $0xb7 |
| BYTE $0x7c |
| BYTE $0x5c |
| BYTE $0x78 |
| |
| // table[nextHash] = uint16(s) |
| MOVQ SI, AX |
| SUBQ DX, AX |
| |
| // XXX: MOVW AX, table-32768(SP)(R11*2) |
| // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) |
| BYTE $0x66 |
| BYTE $0x42 |
| BYTE $0x89 |
| BYTE $0x44 |
| BYTE $0x5c |
| BYTE $0x78 |
| |
| // nextHash = hash(load32(src, nextS), shift) |
| MOVL 0(R13), R11 |
| IMULL $0x1e35a7bd, R11 |
| SHRL CX, R11 |
| |
| // if load32(src, s) != load32(src, candidate) { continue } break |
| MOVL 0(SI), AX |
| MOVL (DX)(R15*1), BX |
| CMPL AX, BX |
| JNE inner0 |
| |
| fourByteMatch: |
| // As per the encode_other.go code: |
| // |
| // A 4-byte match has been found. We'll later see etc. |
| |
| // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment |
| // on inputMargin in encode.go. |
| MOVQ SI, AX |
| SUBQ R10, AX |
| CMPQ AX, $16 |
| JLE emitLiteralFastPath |
| |
| // ---------------------------------------- |
| // Begin inline of the emitLiteral call. |
| // |
| // d += emitLiteral(dst[d:], src[nextEmit:s]) |
| |
| MOVL AX, BX |
| SUBL $1, BX |
| |
| CMPL BX, $60 |
| JLT inlineEmitLiteralOneByte |
| CMPL BX, $256 |
| JLT inlineEmitLiteralTwoBytes |
| |
| inlineEmitLiteralThreeBytes: |
| MOVB $0xf4, 0(DI) |
| MOVW BX, 1(DI) |
| ADDQ $3, DI |
| JMP inlineEmitLiteralMemmove |
| |
| inlineEmitLiteralTwoBytes: |
| MOVB $0xf0, 0(DI) |
| MOVB BX, 1(DI) |
| ADDQ $2, DI |
| JMP inlineEmitLiteralMemmove |
| |
| inlineEmitLiteralOneByte: |
| SHLB $2, BX |
| MOVB BX, 0(DI) |
| ADDQ $1, DI |
| |
| inlineEmitLiteralMemmove: |
| // Spill local variables (registers) onto the stack; call; unspill. |
| // |
| // copy(dst[i:], lit) |
| // |
| // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push |
| // DI, R10 and AX as arguments. |
| MOVQ DI, 0(SP) |
| MOVQ R10, 8(SP) |
| MOVQ AX, 16(SP) |
| ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". |
| MOVQ SI, 72(SP) |
| MOVQ DI, 80(SP) |
| MOVQ R15, 112(SP) |
| CALL runtime·memmove(SB) |
| MOVQ 56(SP), CX |
| MOVQ 64(SP), DX |
| MOVQ 72(SP), SI |
| MOVQ 80(SP), DI |
| MOVQ 88(SP), R9 |
| MOVQ 112(SP), R15 |
| JMP inner1 |
| |
| inlineEmitLiteralEnd: |
| // End inline of the emitLiteral call. |
| // ---------------------------------------- |
| |
| emitLiteralFastPath: |
| // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". |
| MOVB AX, BX |
| SUBB $1, BX |
| SHLB $2, BX |
| MOVB BX, (DI) |
| ADDQ $1, DI |
| |
| // !!! Implement the copy from lit to dst as a 16-byte load and store. |
| // (Encode's documentation says that dst and src must not overlap.) |
| // |
| // This always copies 16 bytes, instead of only len(lit) bytes, but that's |
| // OK. Subsequent iterations will fix up the overrun. |
| // |
| // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or |
| // 16-byte loads and stores. This technique probably wouldn't be as |
| // effective on architectures that are fussier about alignment. |
| MOVOU 0(R10), X0 |
| MOVOU X0, 0(DI) |
| ADDQ AX, DI |
| |
| inner1: |
| // for { etc } |
| |
| // base := s |
| MOVQ SI, R12 |
| |
| // !!! offset := base - candidate |
| MOVQ R12, R11 |
| SUBQ R15, R11 |
| SUBQ DX, R11 |
| |
| // ---------------------------------------- |
| // Begin inline of the extendMatch call. |
| // |
| // s = extendMatch(src, candidate+4, s+4) |
| |
| // !!! R14 = &src[len(src)] |
| MOVQ src_len+32(FP), R14 |
| ADDQ DX, R14 |
| |
| // !!! R13 = &src[len(src) - 8] |
| MOVQ R14, R13 |
| SUBQ $8, R13 |
| |
| // !!! R15 = &src[candidate + 4] |
| ADDQ $4, R15 |
| ADDQ DX, R15 |
| |
| // !!! s += 4 |
| ADDQ $4, SI |
| |
| inlineExtendMatchCmp8: |
| // As long as we are 8 or more bytes before the end of src, we can load and |
| // compare 8 bytes at a time. If those 8 bytes are equal, repeat. |
| CMPQ SI, R13 |
| JA inlineExtendMatchCmp1 |
| MOVQ (R15), AX |
| MOVQ (SI), BX |
| CMPQ AX, BX |
| JNE inlineExtendMatchBSF |
| ADDQ $8, R15 |
| ADDQ $8, SI |
| JMP inlineExtendMatchCmp8 |
| |
| inlineExtendMatchBSF: |
| // If those 8 bytes were not equal, XOR the two 8 byte values, and return |
| // the index of the first byte that differs. The BSF instruction finds the |
| // least significant 1 bit, the amd64 architecture is little-endian, and |
| // the shift by 3 converts a bit index to a byte index. |
| XORQ AX, BX |
| BSFQ BX, BX |
| SHRQ $3, BX |
| ADDQ BX, SI |
| JMP inlineExtendMatchEnd |
| |
| inlineExtendMatchCmp1: |
| // In src's tail, compare 1 byte at a time. |
| CMPQ SI, R14 |
| JAE inlineExtendMatchEnd |
| MOVB (R15), AX |
| MOVB (SI), BX |
| CMPB AX, BX |
| JNE inlineExtendMatchEnd |
| ADDQ $1, R15 |
| ADDQ $1, SI |
| JMP inlineExtendMatchCmp1 |
| |
| inlineExtendMatchEnd: |
| // End inline of the extendMatch call. |
| // ---------------------------------------- |
| |
| // ---------------------------------------- |
| // Begin inline of the emitCopy call. |
| // |
| // d += emitCopy(dst[d:], base-candidate, s-base) |
| |
| // !!! length := s - base |
| MOVQ SI, AX |
| SUBQ R12, AX |
| |
| inlineEmitCopyLoop0: |
| // for length >= 68 { etc } |
| CMPL AX, $68 |
| JLT inlineEmitCopyStep1 |
| |
| // Emit a length 64 copy, encoded as 3 bytes. |
| MOVB $0xfe, 0(DI) |
| MOVW R11, 1(DI) |
| ADDQ $3, DI |
| SUBL $64, AX |
| JMP inlineEmitCopyLoop0 |
| |
| inlineEmitCopyStep1: |
| // if length > 64 { etc } |
| CMPL AX, $64 |
| JLE inlineEmitCopyStep2 |
| |
| // Emit a length 60 copy, encoded as 3 bytes. |
| MOVB $0xee, 0(DI) |
| MOVW R11, 1(DI) |
| ADDQ $3, DI |
| SUBL $60, AX |
| |
| inlineEmitCopyStep2: |
| // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } |
| CMPL AX, $12 |
| JGE inlineEmitCopyStep3 |
| CMPL R11, $2048 |
| JGE inlineEmitCopyStep3 |
| |
| // Emit the remaining copy, encoded as 2 bytes. |
| MOVB R11, 1(DI) |
| SHRL $8, R11 |
| SHLB $5, R11 |
| SUBB $4, AX |
| SHLB $2, AX |
| ORB AX, R11 |
| ORB $1, R11 |
| MOVB R11, 0(DI) |
| ADDQ $2, DI |
| JMP inlineEmitCopyEnd |
| |
| inlineEmitCopyStep3: |
| // Emit the remaining copy, encoded as 3 bytes. |
| SUBL $1, AX |
| SHLB $2, AX |
| ORB $2, AX |
| MOVB AX, 0(DI) |
| MOVW R11, 1(DI) |
| ADDQ $3, DI |
| |
| inlineEmitCopyEnd: |
| // End inline of the emitCopy call. |
| // ---------------------------------------- |
| |
| // nextEmit = s |
| MOVQ SI, R10 |
| |
| // if s >= sLimit { goto emitRemainder } |
| MOVQ SI, AX |
| SUBQ DX, AX |
| CMPQ AX, R9 |
| JAE emitRemainder |
| |
| // As per the encode_other.go code: |
| // |
| // We could immediately etc. |
| |
| // x := load64(src, s-1) |
| MOVQ -1(SI), R14 |
| |
| // prevHash := hash(uint32(x>>0), shift) |
| MOVL R14, R11 |
| IMULL $0x1e35a7bd, R11 |
| SHRL CX, R11 |
| |
| // table[prevHash] = uint16(s-1) |
| MOVQ SI, AX |
| SUBQ DX, AX |
| SUBQ $1, AX |
| |
| // XXX: MOVW AX, table-32768(SP)(R11*2) |
| // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) |
| BYTE $0x66 |
| BYTE $0x42 |
| BYTE $0x89 |
| BYTE $0x44 |
| BYTE $0x5c |
| BYTE $0x78 |
| |
| // currHash := hash(uint32(x>>8), shift) |
| SHRQ $8, R14 |
| MOVL R14, R11 |
| IMULL $0x1e35a7bd, R11 |
| SHRL CX, R11 |
| |
| // candidate = int(table[currHash]) |
| // XXX: MOVWQZX table-32768(SP)(R11*2), R15 |
| // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 |
| BYTE $0x4e |
| BYTE $0x0f |
| BYTE $0xb7 |
| BYTE $0x7c |
| BYTE $0x5c |
| BYTE $0x78 |
| |
| // table[currHash] = uint16(s) |
| ADDQ $1, AX |
| |
| // XXX: MOVW AX, table-32768(SP)(R11*2) |
| // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) |
| BYTE $0x66 |
| BYTE $0x42 |
| BYTE $0x89 |
| BYTE $0x44 |
| BYTE $0x5c |
| BYTE $0x78 |
| |
| // if uint32(x>>8) == load32(src, candidate) { continue } |
| MOVL (DX)(R15*1), BX |
| CMPL R14, BX |
| JEQ inner1 |
| |
| // nextHash = hash(uint32(x>>16), shift) |
| SHRQ $8, R14 |
| MOVL R14, R11 |
| IMULL $0x1e35a7bd, R11 |
| SHRL CX, R11 |
| |
| // s++ |
| ADDQ $1, SI |
| |
| // break out of the inner1 for loop, i.e. continue the outer loop. |
| JMP outer |
| |
| emitRemainder: |
| // if nextEmit < len(src) { etc } |
| MOVQ src_len+32(FP), AX |
| ADDQ DX, AX |
| CMPQ R10, AX |
| JEQ encodeBlockEnd |
| |
| // d += emitLiteral(dst[d:], src[nextEmit:]) |
| // |
| // Push args. |
| MOVQ DI, 0(SP) |
| MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. |
| MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. |
| MOVQ R10, 24(SP) |
| SUBQ R10, AX |
| MOVQ AX, 32(SP) |
| MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. |
| |
| // Spill local variables (registers) onto the stack; call; unspill. |
| MOVQ DI, 80(SP) |
| CALL ·emitLiteral(SB) |
| MOVQ 80(SP), DI |
| |
| // Finish the "d +=" part of "d += emitLiteral(etc)". |
| ADDQ 48(SP), DI |
| |
| encodeBlockEnd: |
| MOVQ dst_base+0(FP), AX |
| SUBQ AX, DI |
| MOVQ DI, d+48(FP) |
| RET |