You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
2.1 KiB
88 lines
2.1 KiB
// Copyright 2018 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
//go:build ppc64 || ppc64le |
|
// +build ppc64 ppc64le |
|
|
|
#include "textflag.h" |
|
|
|
// func xorBytesVSX(dst, a, b *byte, n int) |
|
TEXT ·xorBytesVSX(SB), NOSPLIT, $0 |
|
MOVD dst+0(FP), R3 // R3 = dst |
|
MOVD a+8(FP), R4 // R4 = a |
|
MOVD b+16(FP), R5 // R5 = b |
|
MOVD n+24(FP), R6 // R6 = n |
|
|
|
CMPU R6, $32, CR7 // Check if n ≥ 32 bytes |
|
MOVD R0, R8 // R8 = index |
|
CMPU R6, $8, CR6 // Check if 8 ≤ n < 32 bytes |
|
BLT CR6, small // Smaller than 8 |
|
BLT CR7, xor16 // Case for 16 ≤ n < 32 bytes |
|
|
|
// Case for n ≥ 32 bytes |
|
preloop32: |
|
SRD $5, R6, R7 // Setup loop counter |
|
MOVD R7, CTR |
|
MOVD $16, R10 |
|
ANDCC $31, R6, R9 // Check for tailing bytes for later |
|
loop32: |
|
LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15] |
|
LXVD2X (R4)(R10), VS34 |
|
LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15] |
|
LXVD2X (R5)(R10), VS35 |
|
XXLXOR VS32, VS33, VS32 // VS34 = a[] ^ b[] |
|
XXLXOR VS34, VS35, VS34 |
|
STXVD2X VS32, (R3)(R8) // Store to dst |
|
STXVD2X VS34, (R3)(R10) |
|
ADD $32, R8 // Update index |
|
ADD $32, R10 |
|
BC 16, 0, loop32 // bdnz loop16 |
|
|
|
BEQ CR0, done |
|
|
|
MOVD R9, R6 |
|
CMP R6, $8 |
|
BLT small |
|
xor16: |
|
CMP R6, $16 |
|
BLT xor8 |
|
LXVD2X (R4)(R8), VS32 |
|
LXVD2X (R5)(R8), VS33 |
|
XXLXOR VS32, VS33, VS32 |
|
STXVD2X VS32, (R3)(R8) |
|
ADD $16, R8 |
|
ADD $-16, R6 |
|
CMP R6, $8 |
|
BLT small |
|
xor8: |
|
// Case for 8 ≤ n < 16 bytes |
|
MOVD (R4)(R8), R14 // R14 = a[i,...,i+7] |
|
MOVD (R5)(R8), R15 // R15 = b[i,...,i+7] |
|
XOR R14, R15, R16 // R16 = a[] ^ b[] |
|
SUB $8, R6 // n = n - 8 |
|
MOVD R16, (R3)(R8) // Store to dst |
|
ADD $8, R8 |
|
|
|
// Check if we're finished |
|
CMP R6, R0 |
|
BGT small |
|
RET |
|
|
|
// Case for n < 8 bytes and tailing bytes from the |
|
// previous cases. |
|
small: |
|
CMP R6, R0 |
|
BEQ done |
|
MOVD R6, CTR // Setup loop counter |
|
|
|
loop: |
|
MOVBZ (R4)(R8), R14 // R14 = a[i] |
|
MOVBZ (R5)(R8), R15 // R15 = b[i] |
|
XOR R14, R15, R16 // R16 = a[i] ^ b[i] |
|
MOVB R16, (R3)(R8) // Store to dst |
|
ADD $1, R8 |
|
BC 16, 0, loop // bdnz loop |
|
|
|
done: |
|
RET
|
|
|