diff options
Diffstat (limited to 'vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s')
-rw-r--r-- | vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s b/vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s new file mode 100644 index 0000000..8f67edd --- /dev/null +++ b/vendor/github.com/templexxx/xorsimd/xorbytes_amd64.s @@ -0,0 +1,72 @@ +#include "textflag.h" + +// func bytesN(dst, a, b *byte, n int) +TEXT ·bytesN(SB), NOSPLIT, $0 + MOVQ d+0(FP), BX + MOVQ a+8(FP), SI + MOVQ b+16(FP), CX + MOVQ n+24(FP), DX + TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned. + JNZ not_aligned + +aligned: + MOVQ $0, AX // position in slices + +loop16b: + MOVOU (SI)(AX*1), X0 // XOR 16byte forwards. + MOVOU (CX)(AX*1), X1 + PXOR X1, X0 + MOVOU X0, (BX)(AX*1) + ADDQ $16, AX + CMPQ DX, AX + JNE loop16b + RET + +loop_1b: + SUBQ $1, DX // XOR 1byte backwards. + MOVB (SI)(DX*1), DI + MOVB (CX)(DX*1), AX + XORB AX, DI + MOVB DI, (BX)(DX*1) + TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b. + JNZ loop_1b + CMPQ DX, $0 // if len is 0, ret. + JE ret + TESTQ $15, DX // AND 15 & len, if zero jump to aligned. + JZ aligned + +not_aligned: + TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b. + JNE loop_1b + SUBQ $8, DX // XOR 8bytes backwards. + MOVQ (SI)(DX*1), DI + MOVQ (CX)(DX*1), AX + XORQ AX, DI + MOVQ DI, (BX)(DX*1) + CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned. + JGE aligned + +ret: + RET + +// func bytes8(dst, a, b *byte) +TEXT ·bytes8(SB), NOSPLIT, $0 + MOVQ d+0(FP), BX + MOVQ a+8(FP), SI + MOVQ b+16(FP), CX + MOVQ (SI), DI + MOVQ (CX), AX + XORQ AX, DI + MOVQ DI, (BX) + RET + +// func bytes16(dst, a, b *byte) +TEXT ·bytes16(SB), NOSPLIT, $0 + MOVQ d+0(FP), BX + MOVQ a+8(FP), SI + MOVQ b+16(FP), CX + MOVOU (SI), X0 + MOVOU (CX), X1 + PXOR X1, X0 + MOVOU X0, (BX) + RET |