summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/klauspost/reedsolomon/galois_amd64.go')
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_amd64.go138
1 files changed, 138 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
new file mode 100644
index 0000000..f757f9d
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@@ -0,0 +1,138 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+//go:noescape
+func galMulSSSE3(low, high, in, out []byte)
+
+//go:noescape
+func galMulSSSE3Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice(in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor_64(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2_64(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice_64(in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulSSSE3(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] = low[l] ^ high[h]
+ }
+}
+
+func galMulSSSE3Xor(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] ^= low[l] ^ high[h]
+ }
+}
+*/
+
+// bigSwitchover is the size where 64 bytes are processed per loop.
+const bigSwitchover = 128
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ if o.useAVX2 {
+ if len(in) >= bigSwitchover {
+ galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) > 32 {
+ galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ out = out[done:]
+ }
+ } else if o.useSSSE3 {
+ galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mulTable[c][:256]
+ for i := range in {
+ out[i] = mt[in[i]]
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+
+ if o.useAVX2 {
+ if len(in) >= bigSwitchover {
+ galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) >= 32 {
+ galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ out = out[done:]
+ }
+ } else if o.useSSSE3 {
+ galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mulTable[c][:256]
+ for i := range in {
+ out[i] ^= mt[in[i]]
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ if o.useSSE2 {
+ if len(in) >= bigSwitchover {
+ sSE2XorSlice_64(in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) >= 16 {
+ sSE2XorSlice(in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ }
+ out = out[:len(in)]
+ for i := range in {
+ out[i] ^= in[i]
+ }
+}