You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.3 KiB
54 lines
1.3 KiB
// Copyright 2018 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
#include "textflag.h" |
|
|
|
// func xorBytesSSE2(dst, a, b *byte, n int) |
|
TEXT ·xorBytesSSE2(SB), NOSPLIT, $0 |
|
MOVQ dst+0(FP), BX |
|
MOVQ a+8(FP), SI |
|
MOVQ b+16(FP), CX |
|
MOVQ n+24(FP), DX |
|
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. |
|
JNZ not_aligned |
|
|
|
aligned: |
|
MOVQ $0, AX // position in slices |
|
|
|
loop16b: |
|
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. |
|
MOVOU (CX)(AX*1), X1 |
|
PXOR X1, X0 |
|
MOVOU X0, (BX)(AX*1) |
|
ADDQ $16, AX |
|
CMPQ DX, AX |
|
JNE loop16b |
|
RET |
|
|
|
loop_1b: |
|
SUBQ $1, DX // XOR 1byte backwards. |
|
MOVB (SI)(DX*1), DI |
|
MOVB (CX)(DX*1), AX |
|
XORB AX, DI |
|
MOVB DI, (BX)(DX*1) |
|
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. |
|
JNZ loop_1b |
|
CMPQ DX, $0 // if len is 0, ret. |
|
JE ret |
|
TESTQ $15, DX // AND 15 & len, if zero jump to aligned. |
|
JZ aligned |
|
|
|
not_aligned: |
|
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. |
|
JNE loop_1b |
|
SUBQ $8, DX // XOR 8bytes backwards. |
|
MOVQ (SI)(DX*1), DI |
|
MOVQ (CX)(DX*1), AX |
|
XORQ AX, DI |
|
MOVQ DI, (BX)(DX*1) |
|
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. |
|
JGE aligned |
|
|
|
ret: |
|
RET
|
|
|