summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost
diff options
context:
space:
mode:
authorkali kaneko (leap communications) <kali@leap.se>2021-11-29 01:46:27 +0100
committerkali kaneko (leap communications) <kali@leap.se>2021-11-29 18:14:16 +0100
commit18f52af5be3a9a0c73811706108f790d65ee9c67 (patch)
treee13cbacb47d56919caa9c44a2b45dec1497a7860 /vendor/github.com/klauspost
parentebcef0d57b6ecb5a40c6579f6be07182dd3033ba (diff)
[pkg] update vendor
Diffstat (limited to 'vendor/github.com/klauspost')
-rw-r--r--vendor/github.com/klauspost/cpuid/.gitignore24
-rw-r--r--vendor/github.com/klauspost/cpuid/.travis.yml46
-rw-r--r--vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt35
-rw-r--r--vendor/github.com/klauspost/cpuid/LICENSE22
-rw-r--r--vendor/github.com/klauspost/cpuid/README.md191
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid.go1504
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid_386.s42
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid_amd64.s42
-rw-r--r--vendor/github.com/klauspost/cpuid/cpuid_arm64.s26
-rw-r--r--vendor/github.com/klauspost/cpuid/detect_arm64.go219
-rw-r--r--vendor/github.com/klauspost/cpuid/detect_intel.go33
-rw-r--r--vendor/github.com/klauspost/cpuid/detect_ref.go14
-rw-r--r--vendor/github.com/klauspost/cpuid/go.mod3
-rw-r--r--vendor/github.com/klauspost/reedsolomon/.gitignore26
-rw-r--r--vendor/github.com/klauspost/reedsolomon/.travis.yml77
-rw-r--r--vendor/github.com/klauspost/reedsolomon/LICENSE23
-rw-r--r--vendor/github.com/klauspost/reedsolomon/README.md395
-rw-r--r--vendor/github.com/klauspost/reedsolomon/appveyor.yml20
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois.go929
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go338
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s400
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_amd64.go138
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_amd64.s368
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_arm64.go67
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_arm64.s125
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go408
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s18526
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_none.go11
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go293
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_noasm.go44
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_notamd64.go13
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go75
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s124
-rw-r--r--vendor/github.com/klauspost/reedsolomon/gen.go249
-rw-r--r--vendor/github.com/klauspost/reedsolomon/go.mod7
-rw-r--r--vendor/github.com/klauspost/reedsolomon/go.sum2
-rw-r--r--vendor/github.com/klauspost/reedsolomon/inversion_tree.go160
-rw-r--r--vendor/github.com/klauspost/reedsolomon/matrix.go279
-rw-r--r--vendor/github.com/klauspost/reedsolomon/options.go175
-rw-r--r--vendor/github.com/klauspost/reedsolomon/reedsolomon.go1011
-rw-r--r--vendor/github.com/klauspost/reedsolomon/streaming.go603
41 files changed, 27087 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/klauspost/cpuid/.gitignore
new file mode 100644
index 0000000..daf913b
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/.gitignore
@@ -0,0 +1,24 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml
new file mode 100644
index 0000000..77d975f
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/.travis.yml
@@ -0,0 +1,46 @@
+language: go
+
+os:
+ - linux
+ - osx
+ - windows
+
+arch:
+ - amd64
+ - arm64
+
+go:
+ - 1.12.x
+ - 1.13.x
+ - 1.14.x
+ - master
+
+script:
+ - go vet ./...
+ - go test -race ./...
+ - go test -tags=noasm ./...
+
+stages:
+ - gofmt
+ - test
+
+matrix:
+ allow_failures:
+ - go: 'master'
+ fast_finish: true
+ include:
+ - stage: gofmt
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - diff <(gofmt -d .) <(printf "")
+ - diff <(gofmt -d ./private) <(printf "")
+ - go install github.com/klauspost/asmfmt/cmd/asmfmt
+ - diff <(asmfmt -d .) <(printf "")
+ - stage: i386
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - GOOS=linux GOARCH=386 go test .
diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
new file mode 100644
index 0000000..2ef4714
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
@@ -0,0 +1,35 @@
+Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/LICENSE
new file mode 100644
index 0000000..5cec7ee
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md
new file mode 100644
index 0000000..38d4a8b
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/README.md
@@ -0,0 +1,191 @@
+# cpuid
+Package cpuid provides information about the CPU running the current program.
+
+CPU features are detected on startup, and kept for fast access through the life of the application.
+Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
+
+You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+
+Package home: https://github.com/klauspost/cpuid
+
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
+[2]: https://godoc.org/github.com/klauspost/cpuid
+[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
+[4]: https://travis-ci.org/klauspost/cpuid
+
+# features
+
+## x86 CPU Instructions
+* **CMOV** (i686 CMOV)
+* **NX** (NX (No-Execute) bit)
+* **AMD3DNOW** (AMD 3DNOW)
+* **AMD3DNOWEXT** (AMD 3DNowExt)
+* **MMX** (standard MMX)
+* **MMXEXT** (SSE integer functions or AMD MMX ext)
+* **SSE** (SSE functions)
+* **SSE2** (P4 SSE functions)
+* **SSE3** (Prescott SSE3 functions)
+* **SSSE3** (Conroe SSSE3 functions)
+* **SSE4** (Penryn SSE4.1 functions)
+* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
+* **SSE42** (Nehalem SSE4.2 functions)
+* **AVX** (AVX functions)
+* **AVX2** (AVX2 functions)
+* **FMA3** (Intel FMA 3)
+* **FMA4** (Bulldozer FMA4 functions)
+* **XOP** (Bulldozer XOP functions)
+* **F16C** (Half-precision floating-point conversion)
+* **BMI1** (Bit Manipulation Instruction Set 1)
+* **BMI2** (Bit Manipulation Instruction Set 2)
+* **TBM** (AMD Trailing Bit Manipulation)
+* **LZCNT** (LZCNT instruction)
+* **POPCNT** (POPCNT instruction)
+* **AESNI** (Advanced Encryption Standard New Instructions)
+* **CLMUL** (Carry-less Multiplication)
+* **HTT** (Hyperthreading (enabled))
+* **HLE** (Hardware Lock Elision)
+* **RTM** (Restricted Transactional Memory)
+* **RDRAND** (RDRAND instruction is available)
+* **RDSEED** (RDSEED instruction is available)
+* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
+* **SHA** (Intel SHA Extensions)
+* **AVX512F** (AVX-512 Foundation)
+* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
+* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
+* **AVX512PF** (AVX-512 Prefetch Instructions)
+* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
+* **AVX512CD** (AVX-512 Conflict Detection Instructions)
+* **AVX512BW** (AVX-512 Byte and Word Instructions)
+* **AVX512VL** (AVX-512 Vector Length Extensions)
+* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
+* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
+* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
+* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
+* **GFNI** (Galois Field New Instructions)
+* **VAES** (Vector AES)
+* **AVX512BITALG** (AVX-512 Bit Algorithms)
+* **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
+* **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
+* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
+* **MPX** (Intel MPX (Memory Protection Extensions))
+* **ERMS** (Enhanced REP MOVSB/STOSB)
+* **RDTSCP** (RDTSCP Instruction)
+* **CX16** (CMPXCHG16B Instruction)
+* **SGX** (Software Guard Extensions, with activation details)
+* **VMX** (Virtual Machine Extensions)
+
+## Performance
+* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
+* **SSE2SLOW** (SSE2 is supported, but usually not faster)
+* **SSE3SLOW** (SSE3 is supported, but usually not faster)
+* **ATOM** (Atom processor, some SSSE3 instructions are slower)
+* **Cache line** (Probable size of a cache line).
+* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
+
+## ARM CPU features
+
+# ARM FEATURE DETECTION DISABLED!
+
+See [#52](https://github.com/klauspost/cpuid/issues/52).
+
+Currently only `arm64` platforms are implemented.
+
+* **FP** Single-precision and double-precision floating point
+* **ASIMD** Advanced SIMD
+* **EVTSTRM** Generic timer
+* **AES** AES instructions
+* **PMULL** Polynomial Multiply instructions (PMULL/PMULL2)
+* **SHA1** SHA-1 instructions (SHA1C, etc)
+* **SHA2** SHA-2 instructions (SHA256H, etc)
+* **CRC32** CRC32/CRC32C instructions
+* **ATOMICS** Large System Extensions (LSE)
+* **FPHP** Half-precision floating point
+* **ASIMDHP** Advanced SIMD half-precision floating point
+* **ARMCPUID** Some CPU ID registers readable at user-level
+* **ASIMDRDM** Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+* **JSCVT** Javascript-style double->int convert (FJCVTZS)
+* **FCMA** Floating point complex number addition and multiplication
+* **LRCPC** Weaker release consistency (LDAPR, etc)
+* **DCPOP** Data cache clean to Point of Persistence (DC CVAP)
+* **SHA3** SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+* **SM3** SM3 instructions
+* **SM4** SM4 instructions
+* **ASIMDDP** SIMD Dot Product
+* **SHA512** SHA512 instructions
+* **SVE** Scalable Vector Extension
+* **GPA** Generic Pointer Authentication
+
+## Cpu Vendor/VM
+* **Intel**
+* **AMD**
+* **VIA**
+* **Transmeta**
+* **NSC**
+* **KVM** (Kernel-based Virtual Machine)
+* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
+* **VMware**
+* **XenHVM**
+* **Bhyve**
+* **Hygon**
+
+# installing
+
+```go get github.com/klauspost/cpuid```
+
+# example
+
+```Go
+package main
+
+import (
+ "fmt"
+ "github.com/klauspost/cpuid"
+)
+
+func main() {
+ // Print basic CPU information:
+ fmt.Println("Name:", cpuid.CPU.BrandName)
+ fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
+ fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
+ fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
+ fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
+ fmt.Println("Features:", cpuid.CPU.Features)
+ fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
+ fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
+ fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
+ fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
+ fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
+
+ // Test if we have a specific feature:
+ if cpuid.CPU.SSE() {
+ fmt.Println("We have Streaming SIMD Extensions")
+ }
+}
+```
+
+Sample output:
+```
+>go run main.go
+Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
+PhysicalCores: 2
+ThreadsPerCore: 2
+LogicalCores: 4
+Family 6 Model: 42
+Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
+Cacheline bytes: 64
+We have Streaming SIMD Extensions
+```
+
+# private package
+
+In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
+
+For this purpose all exports are removed, and functions and constants are lowercased.
+
+This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
+
+# license
+
+This code is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go
new file mode 100644
index 0000000..208b3e7
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid.go
@@ -0,0 +1,1504 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+// Package cpuid provides information about the CPU running the current program.
+//
+// CPU features are detected on startup, and kept for fast access through the life of the application.
+// Currently x86 / x64 (AMD64) as well as arm64 is supported.
+//
+// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
+//
+// Package home: https://github.com/klauspost/cpuid
+package cpuid
+
+import (
+ "math"
+ "strings"
+)
+
+// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
+// and Processor Programming Reference (PPR)
+
+// Vendor is a representation of a CPU vendor.
+type Vendor int
+
+const (
+ Other Vendor = iota
+ Intel
+ AMD
+ VIA
+ Transmeta
+ NSC
+ KVM // Kernel-based Virtual Machine
+ MSVM // Microsoft Hyper-V or Windows Virtual PC
+ VMware
+ XenHVM
+ Bhyve
+ Hygon
+ SiS
+ RDC
+)
+
+const (
+ CMOV = 1 << iota // i686 CMOV
+ NX // NX (No-Execute) bit
+ AMD3DNOW // AMD 3DNOW
+ AMD3DNOWEXT // AMD 3DNowExt
+ MMX // standard MMX
+ MMXEXT // SSE integer functions or AMD MMX ext
+ SSE // SSE functions
+ SSE2 // P4 SSE functions
+ SSE3 // Prescott SSE3 functions
+ SSSE3 // Conroe SSSE3 functions
+ SSE4 // Penryn SSE4.1 functions
+ SSE4A // AMD Barcelona microarchitecture SSE4a instructions
+ SSE42 // Nehalem SSE4.2 functions
+ AVX // AVX functions
+ AVX2 // AVX2 functions
+ FMA3 // Intel FMA 3
+ FMA4 // Bulldozer FMA4 functions
+ XOP // Bulldozer XOP functions
+ F16C // Half-precision floating-point conversion
+ BMI1 // Bit Manipulation Instruction Set 1
+ BMI2 // Bit Manipulation Instruction Set 2
+ TBM // AMD Trailing Bit Manipulation
+ LZCNT // LZCNT instruction
+ POPCNT // POPCNT instruction
+ AESNI // Advanced Encryption Standard New Instructions
+ CLMUL // Carry-less Multiplication
+ HTT // Hyperthreading (enabled)
+ HLE // Hardware Lock Elision
+ RTM // Restricted Transactional Memory
+ RDRAND // RDRAND instruction is available
+ RDSEED // RDSEED instruction is available
+ ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+ SHA // Intel SHA Extensions
+ AVX512F // AVX-512 Foundation
+ AVX512DQ // AVX-512 Doubleword and Quadword Instructions
+ AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
+ AVX512PF // AVX-512 Prefetch Instructions
+ AVX512ER // AVX-512 Exponential and Reciprocal Instructions
+ AVX512CD // AVX-512 Conflict Detection Instructions
+ AVX512BW // AVX-512 Byte and Word Instructions
+ AVX512VL // AVX-512 Vector Length Extensions
+ AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
+ AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
+ AVX512VNNI // AVX-512 Vector Neural Network Instructions
+ AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
+ GFNI // Galois Field New Instructions
+ VAES // Vector AES
+ AVX512BITALG // AVX-512 Bit Algorithms
+ VPCLMULQDQ // Carry-Less Multiplication Quadword
+ AVX512BF16 // AVX-512 BFLOAT16 Instructions
+ AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
+ MPX // Intel MPX (Memory Protection Extensions)
+ ERMS // Enhanced REP MOVSB/STOSB
+ RDTSCP // RDTSCP Instruction
+ CX16 // CMPXCHG16B Instruction
+ SGX // Software Guard Extensions
+ SGXLC // Software Guard Extensions Launch Control
+ IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+ STIBP // Single Thread Indirect Branch Predictors
+ VMX // Virtual Machine Extensions
+
+ // Performance indicators
+ SSE2SLOW // SSE2 is supported, but usually not faster
+ SSE3SLOW // SSE3 is supported, but usually not faster
+ ATOM // Atom processor, some SSSE3 instructions are slower
+)
+
+var flagNames = map[Flags]string{
+ CMOV: "CMOV", // i686 CMOV
+ NX: "NX", // NX (No-Execute) bit
+ AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
+ AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
+ MMX: "MMX", // Standard MMX
+ MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
+ SSE: "SSE", // SSE functions
+ SSE2: "SSE2", // P4 SSE2 functions
+ SSE3: "SSE3", // Prescott SSE3 functions
+ SSSE3: "SSSE3", // Conroe SSSE3 functions
+ SSE4: "SSE4.1", // Penryn SSE4.1 functions
+ SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
+ SSE42: "SSE4.2", // Nehalem SSE4.2 functions
+ AVX: "AVX", // AVX functions
+ AVX2: "AVX2", // AVX functions
+ FMA3: "FMA3", // Intel FMA 3
+ FMA4: "FMA4", // Bulldozer FMA4 functions
+ XOP: "XOP", // Bulldozer XOP functions
+ F16C: "F16C", // Half-precision floating-point conversion
+ BMI1: "BMI1", // Bit Manipulation Instruction Set 1
+ BMI2: "BMI2", // Bit Manipulation Instruction Set 2
+ TBM: "TBM", // AMD Trailing Bit Manipulation
+ LZCNT: "LZCNT", // LZCNT instruction
+ POPCNT: "POPCNT", // POPCNT instruction
+ AESNI: "AESNI", // Advanced Encryption Standard New Instructions
+ CLMUL: "CLMUL", // Carry-less Multiplication
+ HTT: "HTT", // Hyperthreading (enabled)
+ HLE: "HLE", // Hardware Lock Elision
+ RTM: "RTM", // Restricted Transactional Memory
+ RDRAND: "RDRAND", // RDRAND instruction is available
+ RDSEED: "RDSEED", // RDSEED instruction is available
+ ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+ SHA: "SHA", // Intel SHA Extensions
+ AVX512F: "AVX512F", // AVX-512 Foundation
+ AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
+ AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
+ AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
+ AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
+ AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
+ AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
+ AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
+ AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
+ AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2
+ AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions
+ AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword
+ GFNI: "GFNI", // Galois Field New Instructions
+ VAES: "VAES", // Vector AES
+ AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms
+ VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword
+ AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction
+ AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
+ MPX: "MPX", // Intel MPX (Memory Protection Extensions)
+ ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
+ RDTSCP: "RDTSCP", // RDTSCP Instruction
+ CX16: "CX16", // CMPXCHG16B Instruction
+ SGX: "SGX", // Software Guard Extensions
+ SGXLC: "SGXLC", // Software Guard Extensions Launch Control
+ IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
+ STIBP: "STIBP", // Single Thread Indirect Branch Predictors
+ VMX: "VMX", // Virtual Machine Extensions
+
+ // Performance indicators
+ SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
+ SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
+ ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
+
+}
+
+/* all special features for arm64 should be defined here */
+const (
+ /* extension instructions */
+ FP ArmFlags = 1 << iota
+ ASIMD
+ EVTSTRM
+ AES
+ PMULL
+ SHA1
+ SHA2
+ CRC32
+ ATOMICS
+ FPHP
+ ASIMDHP
+ ARMCPUID
+ ASIMDRDM
+ JSCVT
+ FCMA
+ LRCPC
+ DCPOP
+ SHA3
+ SM3
+ SM4
+ ASIMDDP
+ SHA512
+ SVE
+ GPA
+)
+
+var flagNamesArm = map[ArmFlags]string{
+ FP: "FP", // Single-precision and double-precision floating point
+ ASIMD: "ASIMD", // Advanced SIMD
+ EVTSTRM: "EVTSTRM", // Generic timer
+ AES: "AES", // AES instructions
+ PMULL: "PMULL", // Polynomial Multiply instructions (PMULL/PMULL2)
+ SHA1: "SHA1", // SHA-1 instructions (SHA1C, etc)
+ SHA2: "SHA2", // SHA-2 instructions (SHA256H, etc)
+ CRC32: "CRC32", // CRC32/CRC32C instructions
+ ATOMICS: "ATOMICS", // Large System Extensions (LSE)
+ FPHP: "FPHP", // Half-precision floating point
+ ASIMDHP: "ASIMDHP", // Advanced SIMD half-precision floating point
+ ARMCPUID: "CPUID", // Some CPU ID registers readable at user-level
+ ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+ JSCVT: "JSCVT", // Javascript-style double->int convert (FJCVTZS)
+ FCMA: "FCMA", // Floatin point complex number addition and multiplication
+ LRCPC: "LRCPC", // Weaker release consistency (LDAPR, etc)
+ DCPOP: "DCPOP", // Data cache clean to Point of Persistence (DC CVAP)
+ SHA3: "SHA3", // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+ SM3: "SM3", // SM3 instructions
+ SM4: "SM4", // SM4 instructions
+ ASIMDDP: "ASIMDDP", // SIMD Dot Product
+ SHA512: "SHA512", // SHA512 instructions
+ SVE: "SVE", // Scalable Vector Extension
+ GPA: "GPA", // Generic Pointer Authentication
+}
+
+// CPUInfo contains information about the detected system CPU.
+type CPUInfo struct {
+ BrandName string // Brand name reported by the CPU
+ VendorID Vendor // Comparable CPU vendor ID
+ VendorString string // Raw vendor string.
+ Features Flags // Features of the CPU (x64)
+ Arm ArmFlags // Features of the CPU (arm)
+ PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
+ ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
+ LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
+ Family int // CPU family number
+ Model int // CPU model number
+ CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
+ Hz int64 // Clock speed, if known
+ Cache struct {
+ L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
+ L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
+ L2 int // L2 Cache (per core or shared). Will be -1 if undetected
+ L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
+ }
+ SGX SGXSupport
+ maxFunc uint32
+ maxExFunc uint32
+}
+
+var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
+var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+var xgetbv func(index uint32) (eax, edx uint32)
+var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+
+// CPU contains information about the CPU as detected on startup,
+// or when Detect last was called.
+//
+// Use this as the primary entry point to you data.
+var CPU CPUInfo
+
+func init() {
+ initCPU()
+ Detect()
+}
+
+// Detect will re-detect current CPU info.
+// This will replace the content of the exported CPU variable.
+//
+// Unless you expect the CPU to change while you are running your program
+// you should not need to call this function.
+// If you call this, you must ensure that no other goroutine is accessing the
+// exported CPU variable.
+func Detect() {
+ // Set defaults
+ CPU.ThreadsPerCore = 1
+ CPU.Cache.L1I = -1
+ CPU.Cache.L1D = -1
+ CPU.Cache.L2 = -1
+ CPU.Cache.L3 = -1
+ addInfo(&CPU)
+}
+
+// Generated here: http://play.golang.org/p/BxFH2Gdc0G
+
+// Cmov indicates support of CMOV instructions
+func (c CPUInfo) Cmov() bool {
+ return c.Features&CMOV != 0
+}
+
+// Amd3dnow indicates support of AMD 3DNOW! instructions
+func (c CPUInfo) Amd3dnow() bool {
+ return c.Features&AMD3DNOW != 0
+}
+
+// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
+func (c CPUInfo) Amd3dnowExt() bool {
+ return c.Features&AMD3DNOWEXT != 0
+}
+
+// VMX indicates support of VMX
+func (c CPUInfo) VMX() bool {
+ return c.Features&VMX != 0
+}
+
+// MMX indicates support of MMX instructions
+func (c CPUInfo) MMX() bool {
+ return c.Features&MMX != 0
+}
+
+// MMXExt indicates support of MMXEXT instructions
+// (SSE integer functions or AMD MMX ext)
+func (c CPUInfo) MMXExt() bool {
+ return c.Features&MMXEXT != 0
+}
+
+// SSE indicates support of SSE instructions
+func (c CPUInfo) SSE() bool {
+ return c.Features&SSE != 0
+}
+
+// SSE2 indicates support of SSE 2 instructions
+func (c CPUInfo) SSE2() bool {
+ return c.Features&SSE2 != 0
+}
+
+// SSE3 indicates support of SSE 3 instructions
+func (c CPUInfo) SSE3() bool {
+ return c.Features&SSE3 != 0
+}
+
+// SSSE3 indicates support of SSSE 3 instructions
+func (c CPUInfo) SSSE3() bool {
+ return c.Features&SSSE3 != 0
+}
+
+// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
+func (c CPUInfo) SSE4() bool {
+ return c.Features&SSE4 != 0
+}
+
+// SSE42 indicates support of SSE4.2 instructions
+func (c CPUInfo) SSE42() bool {
+ return c.Features&SSE42 != 0
+}
+
+// AVX indicates support of AVX instructions
+// and operating system support of AVX instructions
+func (c CPUInfo) AVX() bool {
+ return c.Features&AVX != 0
+}
+
+// AVX2 indicates support of AVX2 instructions
+func (c CPUInfo) AVX2() bool {
+ return c.Features&AVX2 != 0
+}
+
+// FMA3 indicates support of FMA3 instructions
+func (c CPUInfo) FMA3() bool {
+ return c.Features&FMA3 != 0
+}
+
+// FMA4 indicates support of FMA4 instructions
+func (c CPUInfo) FMA4() bool {
+ return c.Features&FMA4 != 0
+}
+
+// XOP indicates support of XOP instructions
+func (c CPUInfo) XOP() bool {
+ return c.Features&XOP != 0
+}
+
+// F16C indicates support of F16C instructions
+func (c CPUInfo) F16C() bool {
+ return c.Features&F16C != 0
+}
+
+// BMI1 indicates support of BMI1 instructions
+func (c CPUInfo) BMI1() bool {
+ return c.Features&BMI1 != 0
+}
+
+// BMI2 indicates support of BMI2 instructions
+func (c CPUInfo) BMI2() bool {
+ return c.Features&BMI2 != 0
+}
+
+// TBM indicates support of TBM instructions
+// (AMD Trailing Bit Manipulation)
+func (c CPUInfo) TBM() bool {
+ return c.Features&TBM != 0
+}
+
+// Lzcnt indicates support of LZCNT instruction
+func (c CPUInfo) Lzcnt() bool {
+ return c.Features&LZCNT != 0
+}
+
+// Popcnt indicates support of POPCNT instruction
+func (c CPUInfo) Popcnt() bool {
+ return c.Features&POPCNT != 0
+}
+
+// HTT indicates the processor has Hyperthreading enabled
+func (c CPUInfo) HTT() bool {
+ return c.Features&HTT != 0
+}
+
+// SSE2Slow indicates that SSE2 may be slow on this processor
+func (c CPUInfo) SSE2Slow() bool {
+ return c.Features&SSE2SLOW != 0
+}
+
+// SSE3Slow indicates that SSE3 may be slow on this processor
+func (c CPUInfo) SSE3Slow() bool {
+ return c.Features&SSE3SLOW != 0
+}
+
+// AesNi indicates support of AES-NI instructions
+// (Advanced Encryption Standard New Instructions)
+func (c CPUInfo) AesNi() bool {
+ return c.Features&AESNI != 0
+}
+
+// Clmul indicates support of CLMUL instructions
+// (Carry-less Multiplication)
+func (c CPUInfo) Clmul() bool {
+ return c.Features&CLMUL != 0
+}
+
+// NX indicates support of NX (No-Execute) bit
+func (c CPUInfo) NX() bool {
+ return c.Features&NX != 0
+}
+
+// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
+func (c CPUInfo) SSE4A() bool {
+ return c.Features&SSE4A != 0
+}
+
+// HLE indicates support of Hardware Lock Elision
+func (c CPUInfo) HLE() bool {
+ return c.Features&HLE != 0
+}
+
+// RTM indicates support of Restricted Transactional Memory
+func (c CPUInfo) RTM() bool {
+ return c.Features&RTM != 0
+}
+
+// Rdrand indicates support of RDRAND instruction is available
+func (c CPUInfo) Rdrand() bool {
+ return c.Features&RDRAND != 0
+}
+
+// Rdseed indicates support of RDSEED instruction is available
+func (c CPUInfo) Rdseed() bool {
+ return c.Features&RDSEED != 0
+}
+
+// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+func (c CPUInfo) ADX() bool {
+ return c.Features&ADX != 0
+}
+
+// SHA indicates support of Intel SHA Extensions
+func (c CPUInfo) SHA() bool {
+ return c.Features&SHA != 0
+}
+
+// AVX512F indicates support of AVX-512 Foundation
+func (c CPUInfo) AVX512F() bool {
+ return c.Features&AVX512F != 0
+}
+
+// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
+func (c CPUInfo) AVX512DQ() bool {
+ return c.Features&AVX512DQ != 0
+}
+
+// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
+func (c CPUInfo) AVX512IFMA() bool {
+ return c.Features&AVX512IFMA != 0
+}
+
+// AVX512PF indicates support of AVX-512 Prefetch Instructions
+func (c CPUInfo) AVX512PF() bool {
+ return c.Features&AVX512PF != 0
+}
+
+// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
+func (c CPUInfo) AVX512ER() bool {
+ return c.Features&AVX512ER != 0
+}
+
+// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
+func (c CPUInfo) AVX512CD() bool {
+ return c.Features&AVX512CD != 0
+}
+
+// AVX512BW indicates support of AVX-512 Byte and Word Instructions
+func (c CPUInfo) AVX512BW() bool {
+ return c.Features&AVX512BW != 0
+}
+
+// AVX512VL indicates support of AVX-512 Vector Length Extensions
+func (c CPUInfo) AVX512VL() bool {
+ return c.Features&AVX512VL != 0
+}
+
+// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
+func (c CPUInfo) AVX512VBMI() bool {
+ return c.Features&AVX512VBMI != 0
+}
+
+// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
+func (c CPUInfo) AVX512VBMI2() bool {
+ return c.Features&AVX512VBMI2 != 0
+}
+
+// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
+func (c CPUInfo) AVX512VNNI() bool {
+ return c.Features&AVX512VNNI != 0
+}
+
+// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
+func (c CPUInfo) AVX512VPOPCNTDQ() bool {
+ return c.Features&AVX512VPOPCNTDQ != 0
+}
+
+// GFNI indicates support of Galois Field New Instructions
+func (c CPUInfo) GFNI() bool {
+ return c.Features&GFNI != 0
+}
+
+// VAES indicates support of Vector AES
+func (c CPUInfo) VAES() bool {
+ return c.Features&VAES != 0
+}
+
+// AVX512BITALG indicates support of AVX-512 Bit Algorithms
+func (c CPUInfo) AVX512BITALG() bool {
+ return c.Features&AVX512BITALG != 0
+}
+
+// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
+func (c CPUInfo) VPCLMULQDQ() bool {
+ return c.Features&VPCLMULQDQ != 0
+}
+
+// AVX512BF16 indicates support of
+func (c CPUInfo) AVX512BF16() bool {
+ return c.Features&AVX512BF16 != 0
+}
+
+// AVX512VP2INTERSECT indicates support of
+func (c CPUInfo) AVX512VP2INTERSECT() bool {
+ return c.Features&AVX512VP2INTERSECT != 0
+}
+
+// MPX indicates support of Intel MPX (Memory Protection Extensions)
+func (c CPUInfo) MPX() bool {
+ return c.Features&MPX != 0
+}
+
+// ERMS indicates support of Enhanced REP MOVSB/STOSB
+func (c CPUInfo) ERMS() bool {
+ return c.Features&ERMS != 0
+}
+
+// RDTSCP Instruction is available.
+func (c CPUInfo) RDTSCP() bool {
+ return c.Features&RDTSCP != 0
+}
+
+// CX16 indicates if CMPXCHG16B instruction is available.
+func (c CPUInfo) CX16() bool {
+ return c.Features&CX16 != 0
+}
+
+// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
+// So TSX simply checks that.
+func (c CPUInfo) TSX() bool {
+ return c.Features&(HLE|RTM) == HLE|RTM
+}
+
+// Atom indicates an Atom processor
+func (c CPUInfo) Atom() bool {
+ return c.Features&ATOM != 0
+}
+
+// Intel returns true if vendor is recognized as Intel
+func (c CPUInfo) Intel() bool {
+ return c.VendorID == Intel
+}
+
+// AMD returns true if vendor is recognized as AMD
+func (c CPUInfo) AMD() bool {
+ return c.VendorID == AMD
+}
+
+// Hygon returns true if vendor is recognized as Hygon
+func (c CPUInfo) Hygon() bool {
+ return c.VendorID == Hygon
+}
+
+// Transmeta returns true if vendor is recognized as Transmeta
+func (c CPUInfo) Transmeta() bool {
+ return c.VendorID == Transmeta
+}
+
+// NSC returns true if vendor is recognized as National Semiconductor
+func (c CPUInfo) NSC() bool {
+ return c.VendorID == NSC
+}
+
+// VIA returns true if vendor is recognized as VIA
+func (c CPUInfo) VIA() bool {
+ return c.VendorID == VIA
+}
+
+// RTCounter returns the 64-bit time-stamp counter
+// Uses the RDTSCP instruction. The value 0 is returned
+// if the CPU does not support the instruction.
+func (c CPUInfo) RTCounter() uint64 {
+ if !c.RDTSCP() {
+ return 0
+ }
+ a, _, _, d := rdtscpAsm()
+ return uint64(a) | (uint64(d) << 32)
+}
+
+// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
+// This variable is OS dependent, but on Linux contains information
+// about the current cpu/core the code is running on.
+// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
+func (c CPUInfo) Ia32TscAux() uint32 {
+ if !c.RDTSCP() {
+ return 0
+ }
+ _, _, ecx, _ := rdtscpAsm()
+ return ecx
+}
+
+// LogicalCPU will return the Logical CPU the code is currently executing on.
+// This is likely to change when the OS re-schedules the running thread
+// to another CPU.
+// If the current core cannot be detected, -1 will be returned.
+func (c CPUInfo) LogicalCPU() int {
+ if c.maxFunc < 1 {
+ return -1
+ }
+ _, ebx, _, _ := cpuid(1)
+ return int(ebx >> 24)
+}
+
+// hertz tries to compute the clock speed of the CPU. If leaf 15 is
+// supported, use it, otherwise parse the brand string. Yes, really.
+func hertz(model string) int64 {
+ mfi := maxFunctionID()
+ if mfi >= 0x15 {
+ eax, ebx, ecx, _ := cpuid(0x15)
+ if eax != 0 && ebx != 0 && ecx != 0 {
+ return int64((int64(ecx) * int64(ebx)) / int64(eax))
+ }
+ }
+ // computeHz determines the official rated speed of a CPU from its brand
+ // string. This insanity is *actually the official documented way to do
+ // this according to Intel*, prior to leaf 0x15 existing. The official
+ // documentation only shows this working for exactly `x.xx` or `xxxx`
+ // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
+ // sizes.
+ hz := strings.LastIndex(model, "Hz")
+ if hz < 3 {
+ return -1
+ }
+ var multiplier int64
+ switch model[hz-1] {
+ case 'M':
+ multiplier = 1000 * 1000
+ case 'G':
+ multiplier = 1000 * 1000 * 1000
+ case 'T':
+ multiplier = 1000 * 1000 * 1000 * 1000
+ }
+ if multiplier == 0 {
+ return -1
+ }
+ freq := int64(0)
+ divisor := int64(0)
+ decimalShift := int64(1)
+ var i int
+ for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
+ if model[i] >= '0' && model[i] <= '9' {
+ freq += int64(model[i]-'0') * decimalShift
+ decimalShift *= 10
+ } else if model[i] == '.' {
+ if divisor != 0 {
+ return -1
+ }
+ divisor = decimalShift
+ } else {
+ return -1
+ }
+ }
+ // we didn't find a space
+ if i < 0 {
+ return -1
+ }
+ if divisor != 0 {
+ return (freq * multiplier) / divisor
+ }
+ return freq * multiplier
+}
+
+// VM Will return true if the cpu id indicates we are in
+// a virtual machine. This is only a hint, and will very likely
+// have many false negatives.
+func (c CPUInfo) VM() bool {
+ switch c.VendorID {
+ case MSVM, KVM, VMware, XenHVM, Bhyve:
+ return true
+ }
+ return false
+}
+
+// Flags contains detected cpu features and characteristics
+type Flags uint64
+
+// ArmFlags contains detected ARM cpu features and characteristics
+type ArmFlags uint64
+
+// String returns a string representation of the detected
+// CPU features.
+func (f Flags) String() string {
+ return strings.Join(f.Strings(), ",")
+}
+
+// Strings returns an array of the detected features.
+func (f Flags) Strings() []string {
+ r := make([]string, 0, 20)
+ for i := uint(0); i < 64; i++ {
+ key := Flags(1 << i)
+ val := flagNames[key]
+ if f&key != 0 {
+ r = append(r, val)
+ }
+ }
+ return r
+}
+
+// String returns a string representation of the detected
+// CPU features.
+func (f ArmFlags) String() string {
+ return strings.Join(f.Strings(), ",")
+}
+
+// Strings returns an array of the detected features.
+func (f ArmFlags) Strings() []string {
+ r := make([]string, 0, 20)
+ for i := uint(0); i < 64; i++ {
+ key := ArmFlags(1 << i)
+ val := flagNamesArm[key]
+ if f&key != 0 {
+ r = append(r, val)
+ }
+ }
+ return r
+}
+func maxExtendedFunction() uint32 {
+ eax, _, _, _ := cpuid(0x80000000)
+ return eax
+}
+
+func maxFunctionID() uint32 {
+ a, _, _, _ := cpuid(0)
+ return a
+}
+
+func brandName() string {
+ if maxExtendedFunction() >= 0x80000004 {
+ v := make([]uint32, 0, 48)
+ for i := uint32(0); i < 3; i++ {
+ a, b, c, d := cpuid(0x80000002 + i)
+ v = append(v, a, b, c, d)
+ }
+ return strings.Trim(string(valAsString(v...)), " ")
+ }
+ return "unknown"
+}
+
+func threadsPerCore() int {
+ mfi := maxFunctionID()
+ vend, _ := vendorID()
+
+ if mfi < 0x4 || (vend != Intel && vend != AMD) {
+ return 1
+ }
+
+ if mfi < 0xb {
+ if vend != Intel {
+ return 1
+ }
+ _, b, _, d := cpuid(1)
+ if (d & (1 << 28)) != 0 {
+ // v will contain logical core count
+ v := (b >> 16) & 255
+ if v > 1 {
+ a4, _, _, _ := cpuid(4)
+ // physical cores
+ v2 := (a4 >> 26) + 1
+ if v2 > 0 {
+ return int(v) / int(v2)
+ }
+ }
+ }
+ return 1
+ }
+ _, b, _, _ := cpuidex(0xb, 0)
+ if b&0xffff == 0 {
+ return 1
+ }
+ return int(b & 0xffff)
+}
+
+func logicalCores() int {
+ mfi := maxFunctionID()
+ v, _ := vendorID()
+ switch v {
+ case Intel:
+ // Use this on old Intel processors
+ if mfi < 0xb {
+ if mfi < 1 {
+ return 0
+ }
+ // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
+ // that can be assigned to logical processors in a physical package.
+ // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
+ _, ebx, _, _ := cpuid(1)
+ logical := (ebx >> 16) & 0xff
+ return int(logical)
+ }
+ _, b, _, _ := cpuidex(0xb, 1)
+ return int(b & 0xffff)
+ case AMD, Hygon:
+ _, b, _, _ := cpuid(1)
+ return int((b >> 16) & 0xff)
+ default:
+ return 0
+ }
+}
+
+func familyModel() (int, int) {
+ if maxFunctionID() < 0x1 {
+ return 0, 0
+ }
+ eax, _, _, _ := cpuid(1)
+ family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
+ model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
+ return int(family), int(model)
+}
+
+func physicalCores() int {
+ v, _ := vendorID()
+ switch v {
+ case Intel:
+ return logicalCores() / threadsPerCore()
+ case AMD, Hygon:
+ lc := logicalCores()
+ tpc := threadsPerCore()
+ if lc > 0 && tpc > 0 {
+ return lc / tpc
+ }
+ // The following is inaccurate on AMD EPYC 7742 64-Core Processor
+
+ if maxExtendedFunction() >= 0x80000008 {
+ _, _, c, _ := cpuid(0x80000008)
+ return int(c&0xff) + 1
+ }
+ }
+ return 0
+}
+
+// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
+var vendorMapping = map[string]Vendor{
+ "AMDisbetter!": AMD,
+ "AuthenticAMD": AMD,
+ "CentaurHauls": VIA,
+ "GenuineIntel": Intel,
+ "TransmetaCPU": Transmeta,
+ "GenuineTMx86": Transmeta,
+ "Geode by NSC": NSC,
+ "VIA VIA VIA ": VIA,
+ "KVMKVMKVMKVM": KVM,
+ "Microsoft Hv": MSVM,
+ "VMwareVMware": VMware,
+ "XenVMMXenVMM": XenHVM,
+ "bhyve bhyve ": Bhyve,
+ "HygonGenuine": Hygon,
+ "Vortex86 SoC": SiS,
+ "SiS SiS SiS ": SiS,
+ "RiseRiseRise": SiS,
+ "Genuine RDC": RDC,
+}
+
+func vendorID() (Vendor, string) {
+ _, b, c, d := cpuid(0)
+ v := string(valAsString(b, d, c))
+ vend, ok := vendorMapping[v]
+ if !ok {
+ return Other, v
+ }
+ return vend, v
+}
+
+func cacheLine() int {
+ if maxFunctionID() < 0x1 {
+ return 0
+ }
+
+ _, ebx, _, _ := cpuid(1)
+ cache := (ebx & 0xff00) >> 5 // cflush size
+ if cache == 0 && maxExtendedFunction() >= 0x80000006 {
+ _, _, ecx, _ := cpuid(0x80000006)
+ cache = ecx & 0xff // cacheline size
+ }
+ // TODO: Read from Cache and TLB Information
+ return int(cache)
+}
+
+func (c *CPUInfo) cacheSize() {
+ c.Cache.L1D = -1
+ c.Cache.L1I = -1
+ c.Cache.L2 = -1
+ c.Cache.L3 = -1
+ vendor, _ := vendorID()
+ switch vendor {
+ case Intel:
+ if maxFunctionID() < 4 {
+ return
+ }
+ for i := uint32(0); ; i++ {
+ eax, ebx, ecx, _ := cpuidex(4, i)
+ cacheType := eax & 15
+ if cacheType == 0 {
+ break
+ }
+ cacheLevel := (eax >> 5) & 7
+ coherency := int(ebx&0xfff) + 1
+ partitions := int((ebx>>12)&0x3ff) + 1
+ associativity := int((ebx>>22)&0x3ff) + 1
+ sets := int(ecx) + 1
+ size := associativity * partitions * coherency * sets
+ switch cacheLevel {
+ case 1:
+ if cacheType == 1 {
+ // 1 = Data Cache
+ c.Cache.L1D = size
+ } else if cacheType == 2 {
+ // 2 = Instruction Cache
+ c.Cache.L1I = size
+ } else {
+ if c.Cache.L1D < 0 {
+ c.Cache.L1I = size
+ }
+ if c.Cache.L1I < 0 {
+ c.Cache.L1I = size
+ }
+ }
+ case 2:
+ c.Cache.L2 = size
+ case 3:
+ c.Cache.L3 = size
+ }
+ }
+ case AMD, Hygon:
+ // Untested.
+ if maxExtendedFunction() < 0x80000005 {
+ return
+ }
+ _, _, ecx, edx := cpuid(0x80000005)
+ c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
+ c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
+
+ if maxExtendedFunction() < 0x80000006 {
+ return
+ }
+ _, _, ecx, _ = cpuid(0x80000006)
+ c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
+
+ // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
+ if maxExtendedFunction() < 0x8000001D {
+ return
+ }
+ for i := uint32(0); i < math.MaxUint32; i++ {
+ eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
+
+ level := (eax >> 5) & 7
+ cacheNumSets := ecx + 1
+ cacheLineSize := 1 + (ebx & 2047)
+ cachePhysPartitions := 1 + ((ebx >> 12) & 511)
+ cacheNumWays := 1 + ((ebx >> 22) & 511)
+
+ typ := eax & 15
+ size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
+ if typ == 0 {
+ return
+ }
+
+ switch level {
+ case 1:
+ switch typ {
+ case 1:
+ // Data cache
+ c.Cache.L1D = size
+ case 2:
+ // Inst cache
+ c.Cache.L1I = size
+ default:
+ if c.Cache.L1D < 0 {
+ c.Cache.L1I = size
+ }
+ if c.Cache.L1I < 0 {
+ c.Cache.L1I = size
+ }
+ }
+ case 2:
+ c.Cache.L2 = size
+ case 3:
+ c.Cache.L3 = size
+ }
+ }
+ }
+
+ return
+}
+
+type SGXEPCSection struct {
+ BaseAddress uint64
+ EPCSize uint64
+}
+
+type SGXSupport struct {
+ Available bool
+ LaunchControl bool
+ SGX1Supported bool
+ SGX2Supported bool
+ MaxEnclaveSizeNot64 int64
+ MaxEnclaveSize64 int64
+ EPCSections []SGXEPCSection
+}
+
+func hasSGX(available, lc bool) (rval SGXSupport) {
+ rval.Available = available
+
+ if !available {
+ return
+ }
+
+ rval.LaunchControl = lc
+
+ a, _, _, d := cpuidex(0x12, 0)
+ rval.SGX1Supported = a&0x01 != 0
+ rval.SGX2Supported = a&0x02 != 0
+ rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
+ rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
+ rval.EPCSections = make([]SGXEPCSection, 0)
+
+ for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
+ eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
+ leafType := eax & 0xf
+
+ if leafType == 0 {
+ // Invalid subleaf, stop iterating
+ break
+ } else if leafType == 1 {
+ // EPC Section subleaf
+ baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
+ size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
+
+ section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
+ rval.EPCSections = append(rval.EPCSections, section)
+ }
+ }
+
+ return
+}
+
+func support() Flags {
+ mfi := maxFunctionID()
+ vend, _ := vendorID()
+ if mfi < 0x1 {
+ return 0
+ }
+ rval := uint64(0)
+ _, _, c, d := cpuid(1)
+ if (d & (1 << 15)) != 0 {
+ rval |= CMOV
+ }
+ if (d & (1 << 23)) != 0 {
+ rval |= MMX
+ }
+ if (d & (1 << 25)) != 0 {
+ rval |= MMXEXT
+ }
+ if (d & (1 << 25)) != 0 {
+ rval |= SSE
+ }
+ if (d & (1 << 26)) != 0 {
+ rval |= SSE2
+ }
+ if (c & 1) != 0 {
+ rval |= SSE3
+ }
+ if (c & (1 << 5)) != 0 {
+ rval |= VMX
+ }
+ if (c & 0x00000200) != 0 {
+ rval |= SSSE3
+ }
+ if (c & 0x00080000) != 0 {
+ rval |= SSE4
+ }
+ if (c & 0x00100000) != 0 {
+ rval |= SSE42
+ }
+ if (c & (1 << 25)) != 0 {
+ rval |= AESNI
+ }
+ if (c & (1 << 1)) != 0 {
+ rval |= CLMUL
+ }
+ if c&(1<<23) != 0 {
+ rval |= POPCNT
+ }
+ if c&(1<<30) != 0 {
+ rval |= RDRAND
+ }
+ if c&(1<<29) != 0 {
+ rval |= F16C
+ }
+ if c&(1<<13) != 0 {
+ rval |= CX16
+ }
+ if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
+ if threadsPerCore() > 1 {
+ rval |= HTT
+ }
+ }
+ if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
+ if threadsPerCore() > 1 {
+ rval |= HTT
+ }
+ }
+ // Check XGETBV, OXSAVE and AVX bits
+ if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
+ // Check for OS support
+ eax, _ := xgetbv(0)
+ if (eax & 0x6) == 0x6 {
+ rval |= AVX
+ if (c & 0x00001000) != 0 {
+ rval |= FMA3
+ }
+ }
+ }
+
+ // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
+ if mfi >= 7 {
+ _, ebx, ecx, edx := cpuidex(7, 0)
+ eax1, _, _, _ := cpuidex(7, 1)
+ if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
+ rval |= AVX2
+ }
+ if (ebx & 0x00000008) != 0 {
+ rval |= BMI1
+ if (ebx & 0x00000100) != 0 {
+ rval |= BMI2
+ }
+ }
+ if ebx&(1<<2) != 0 {
+ rval |= SGX
+ }
+ if ebx&(1<<4) != 0 {
+ rval |= HLE
+ }
+ if ebx&(1<<9) != 0 {
+ rval |= ERMS
+ }
+ if ebx&(1<<11) != 0 {
+ rval |= RTM
+ }
+ if ebx&(1<<14) != 0 {
+ rval |= MPX
+ }
+ if ebx&(1<<18) != 0 {
+ rval |= RDSEED
+ }
+ if ebx&(1<<19) != 0 {
+ rval |= ADX
+ }
+ if ebx&(1<<29) != 0 {
+ rval |= SHA
+ }
+ if edx&(1<<26) != 0 {
+ rval |= IBPB
+ }
+ if ecx&(1<<30) != 0 {
+ rval |= SGXLC
+ }
+ if edx&(1<<27) != 0 {
+ rval |= STIBP
+ }
+
+ // Only detect AVX-512 features if XGETBV is supported
+ if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
+ // Check for OS support
+ eax, _ := xgetbv(0)
+
+ // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+ // ZMM16-ZMM31 state are enabled by OS)
+ /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
+ if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
+ if ebx&(1<<16) != 0 {
+ rval |= AVX512F
+ }
+ if ebx&(1<<17) != 0 {
+ rval |= AVX512DQ
+ }
+ if ebx&(1<<21) != 0 {
+ rval |= AVX512IFMA
+ }
+ if ebx&(1<<26) != 0 {
+ rval |= AVX512PF
+ }
+ if ebx&(1<<27) != 0 {
+ rval |= AVX512ER
+ }
+ if ebx&(1<<28) != 0 {
+ rval |= AVX512CD
+ }
+ if ebx&(1<<30) != 0 {
+ rval |= AVX512BW
+ }
+ if ebx&(1<<31) != 0 {
+ rval |= AVX512VL
+ }
+ // ecx
+ if ecx&(1<<1) != 0 {
+ rval |= AVX512VBMI
+ }
+ if ecx&(1<<6) != 0 {
+ rval |= AVX512VBMI2
+ }
+ if ecx&(1<<8) != 0 {
+ rval |= GFNI
+ }
+ if ecx&(1<<9) != 0 {
+ rval |= VAES
+ }
+ if ecx&(1<<10) != 0 {
+ rval |= VPCLMULQDQ
+ }
+ if ecx&(1<<11) != 0 {
+ rval |= AVX512VNNI
+ }
+ if ecx&(1<<12) != 0 {
+ rval |= AVX512BITALG
+ }
+ if ecx&(1<<14) != 0 {
+ rval |= AVX512VPOPCNTDQ
+ }
+ // edx
+ if edx&(1<<8) != 0 {
+ rval |= AVX512VP2INTERSECT
+ }
+ // cpuid eax 07h,ecx=1
+ if eax1&(1<<5) != 0 {
+ rval |= AVX512BF16
+ }
+ }
+ }
+ }
+
+ if maxExtendedFunction() >= 0x80000001 {
+ _, _, c, d := cpuid(0x80000001)
+ if (c & (1 << 5)) != 0 {
+ rval |= LZCNT
+ rval |= POPCNT
+ }
+ if (d & (1 << 31)) != 0 {
+ rval |= AMD3DNOW
+ }
+ if (d & (1 << 30)) != 0 {
+ rval |= AMD3DNOWEXT
+ }
+ if (d & (1 << 23)) != 0 {
+ rval |= MMX
+ }
+ if (d & (1 << 22)) != 0 {
+ rval |= MMXEXT
+ }
+ if (c & (1 << 6)) != 0 {
+ rval |= SSE4A
+ }
+ if d&(1<<20) != 0 {
+ rval |= NX
+ }
+ if d&(1<<27) != 0 {
+ rval |= RDTSCP
+ }
+
+ /* Allow for selectively disabling SSE2 functions on AMD processors
+ with SSE2 support but not SSE4a. This includes Athlon64, some
+ Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
+ than SSE2 often enough to utilize this special-case flag.
+ AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
+ so that SSE2 is used unless explicitly disabled by checking
+ AV_CPU_FLAG_SSE2SLOW. */
+ if vend != Intel &&
+ rval&SSE2 != 0 && (c&0x00000040) == 0 {
+ rval |= SSE2SLOW
+ }
+
+ /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
+ * used unless the OS has AVX support. */
+ if (rval & AVX) != 0 {
+ if (c & 0x00000800) != 0 {
+ rval |= XOP
+ }
+ if (c & 0x00010000) != 0 {
+ rval |= FMA4
+ }
+ }
+
+ if vend == Intel {
+ family, model := familyModel()
+ if family == 6 && (model == 9 || model == 13 || model == 14) {
+ /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
+ * 6/14 (core1 "yonah") theoretically support sse2, but it's
+ * usually slower than mmx. */
+ if (rval & SSE2) != 0 {
+ rval |= SSE2SLOW
+ }
+ if (rval & SSE3) != 0 {
+ rval |= SSE3SLOW
+ }
+ }
+ /* The Atom processor has SSSE3 support, which is useful in many cases,
+ * but sometimes the SSSE3 version is slower than the SSE2 equivalent
+ * on the Atom, but is generally faster on other processors supporting
+ * SSSE3. This flag allows for selectively disabling certain SSSE3
+ * functions on the Atom. */
+ if family == 6 && model == 28 {
+ rval |= ATOM
+ }
+ }
+ }
+ return Flags(rval)
+}
+
+func valAsString(values ...uint32) []byte {
+ r := make([]byte, 4*len(values))
+ for i, v := range values {
+ dst := r[i*4:]
+ dst[0] = byte(v & 0xff)
+ dst[1] = byte((v >> 8) & 0xff)
+ dst[2] = byte((v >> 16) & 0xff)
+ dst[3] = byte((v >> 24) & 0xff)
+ switch {
+ case dst[0] == 0:
+ return r[:i*4]
+ case dst[1] == 0:
+ return r[:i*4+1]
+ case dst[2] == 0:
+ return r[:i*4+2]
+ case dst[3] == 0:
+ return r[:i*4+3]
+ }
+ }
+ return r
+}
+
+// Single-precision and double-precision floating point
+func (c CPUInfo) ArmFP() bool {
+ return c.Arm&FP != 0
+}
+
+// Advanced SIMD
+func (c CPUInfo) ArmASIMD() bool {
+ return c.Arm&ASIMD != 0
+}
+
+// Generic timer
+func (c CPUInfo) ArmEVTSTRM() bool {
+ return c.Arm&EVTSTRM != 0
+}
+
+// AES instructions
+func (c CPUInfo) ArmAES() bool {
+ return c.Arm&AES != 0
+}
+
+// Polynomial Multiply instructions (PMULL/PMULL2)
+func (c CPUInfo) ArmPMULL() bool {
+ return c.Arm&PMULL != 0
+}
+
+// SHA-1 instructions (SHA1C, etc)
+func (c CPUInfo) ArmSHA1() bool {
+ return c.Arm&SHA1 != 0
+}
+
+// SHA-2 instructions (SHA256H, etc)
+func (c CPUInfo) ArmSHA2() bool {
+ return c.Arm&SHA2 != 0
+}
+
+// CRC32/CRC32C instructions
+func (c CPUInfo) ArmCRC32() bool {
+ return c.Arm&CRC32 != 0
+}
+
+// Large System Extensions (LSE)
+func (c CPUInfo) ArmATOMICS() bool {
+ return c.Arm&ATOMICS != 0
+}
+
+// Half-precision floating point
+func (c CPUInfo) ArmFPHP() bool {
+ return c.Arm&FPHP != 0
+}
+
+// Advanced SIMD half-precision floating point
+func (c CPUInfo) ArmASIMDHP() bool {
+ return c.Arm&ASIMDHP != 0
+}
+
+// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
+func (c CPUInfo) ArmASIMDRDM() bool {
+ return c.Arm&ASIMDRDM != 0
+}
+
+// Javascript-style double->int convert (FJCVTZS)
+func (c CPUInfo) ArmJSCVT() bool {
+ return c.Arm&JSCVT != 0
+}
+
+// Floatin point complex number addition and multiplication
+func (c CPUInfo) ArmFCMA() bool {
+ return c.Arm&FCMA != 0
+}
+
+// Weaker release consistency (LDAPR, etc)
+func (c CPUInfo) ArmLRCPC() bool {
+ return c.Arm&LRCPC != 0
+}
+
+// Data cache clean to Point of Persistence (DC CVAP)
+func (c CPUInfo) ArmDCPOP() bool {
+ return c.Arm&DCPOP != 0
+}
+
+// SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
+func (c CPUInfo) ArmSHA3() bool {
+ return c.Arm&SHA3 != 0
+}
+
+// SM3 instructions
+func (c CPUInfo) ArmSM3() bool {
+ return c.Arm&SM3 != 0
+}
+
+// SM4 instructions
+func (c CPUInfo) ArmSM4() bool {
+ return c.Arm&SM4 != 0
+}
+
+// SIMD Dot Product
+func (c CPUInfo) ArmASIMDDP() bool {
+ return c.Arm&ASIMDDP != 0
+}
+
+// SHA512 instructions
+func (c CPUInfo) ArmSHA512() bool {
+ return c.Arm&SHA512 != 0
+}
+
+// Scalable Vector Extension
+func (c CPUInfo) ArmSVE() bool {
+ return c.Arm&SVE != 0
+}
+
+// Generic Pointer Authentication
+func (c CPUInfo) ArmGPA() bool {
+ return c.Arm&GPA != 0
+}
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s
new file mode 100644
index 0000000..089638f
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s
@@ -0,0 +1,42 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build 386,!gccgo,!noasm,!appengine
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+ XORL CX, CX
+ MOVL op+0(FP), AX
+ CPUID
+ MOVL AX, eax+4(FP)
+ MOVL BX, ebx+8(FP)
+ MOVL CX, ecx+12(FP)
+ MOVL DX, edx+16(FP)
+ RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+ MOVL op+0(FP), AX
+ MOVL op2+4(FP), CX
+ CPUID
+ MOVL AX, eax+8(FP)
+ MOVL BX, ebx+12(FP)
+ MOVL CX, ecx+16(FP)
+ MOVL DX, edx+20(FP)
+ RET
+
+// func xgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+ MOVL index+0(FP), CX
+ BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+ MOVL AX, eax+4(FP)
+ MOVL DX, edx+8(FP)
+ RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+ BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+ MOVL AX, eax+0(FP)
+ MOVL BX, ebx+4(FP)
+ MOVL CX, ecx+8(FP)
+ MOVL DX, edx+12(FP)
+ RET
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
new file mode 100644
index 0000000..3ba0559
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
@@ -0,0 +1,42 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build amd64,!gccgo,!noasm,!appengine
+
+// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuid(SB), 7, $0
+ XORQ CX, CX
+ MOVL op+0(FP), AX
+ CPUID
+ MOVL AX, eax+8(FP)
+ MOVL BX, ebx+12(FP)
+ MOVL CX, ecx+16(FP)
+ MOVL DX, edx+20(FP)
+ RET
+
+// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·asmCpuidex(SB), 7, $0
+ MOVL op+0(FP), AX
+ MOVL op2+4(FP), CX
+ CPUID
+ MOVL AX, eax+8(FP)
+ MOVL BX, ebx+12(FP)
+ MOVL CX, ecx+16(FP)
+ MOVL DX, edx+20(FP)
+ RET
+
+// func asmXgetbv(index uint32) (eax, edx uint32)
+TEXT ·asmXgetbv(SB), 7, $0
+ MOVL index+0(FP), CX
+ BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
+ MOVL AX, eax+8(FP)
+ MOVL DX, edx+12(FP)
+ RET
+
+// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+TEXT ·asmRdtscpAsm(SB), 7, $0
+ BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
+ MOVL AX, eax+0(FP)
+ MOVL BX, ebx+4(FP)
+ MOVL CX, ecx+8(FP)
+ MOVL DX, edx+12(FP)
+ RET
diff --git a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s
new file mode 100644
index 0000000..8975ee8
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s
@@ -0,0 +1,26 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build arm64,!gccgo
+
+// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
+
+// func getMidr
+TEXT ·getMidr(SB), 7, $0
+ WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */
+ MOVD R0, midr+0(FP)
+ RET
+
+// func getProcFeatures
+TEXT ·getProcFeatures(SB), 7, $0
+ WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */
+ MOVD R0, procFeatures+0(FP)
+ RET
+
+// func getInstAttributes
+TEXT ·getInstAttributes(SB), 7, $0
+ WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
+ WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
+ MOVD R0, instAttrReg0+0(FP)
+ MOVD R1, instAttrReg1+8(FP)
+ RET
+
diff --git a/vendor/github.com/klauspost/cpuid/detect_arm64.go b/vendor/github.com/klauspost/cpuid/detect_arm64.go
new file mode 100644
index 0000000..923a826
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/detect_arm64.go
@@ -0,0 +1,219 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build arm64,!gccgo,!noasm,!appengine
+
+package cpuid
+
+func getMidr() (midr uint64)
+func getProcFeatures() (procFeatures uint64)
+func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
+
+func initCPU() {
+ cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
+ rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
+}
+
+func addInfo(c *CPUInfo) {
+ // ARM64 disabled for now.
+ if true {
+ return
+ }
+ // midr := getMidr()
+
+ // MIDR_EL1 - Main ID Register
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | Implementer | [31-24] | y |
+ // |--------------------------------------------------|
+ // | Variant | [23-20] | y |
+ // |--------------------------------------------------|
+ // | Architecture | [19-16] | y |
+ // |--------------------------------------------------|
+ // | PartNum | [15-4] | y |
+ // |--------------------------------------------------|
+ // | Revision | [3-0] | y |
+ // x--------------------------------------------------x
+
+ // fmt.Printf(" implementer: 0x%02x\n", (midr>>24)&0xff)
+ // fmt.Printf(" variant: 0x%01x\n", (midr>>20)&0xf)
+ // fmt.Printf("architecture: 0x%01x\n", (midr>>16)&0xf)
+ // fmt.Printf(" part num: 0x%03x\n", (midr>>4)&0xfff)
+ // fmt.Printf(" revision: 0x%01x\n", (midr>>0)&0xf)
+
+ procFeatures := getProcFeatures()
+
+ // ID_AA64PFR0_EL1 - Processor Feature Register 0
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | DIT | [51-48] | y |
+ // |--------------------------------------------------|
+ // | SVE | [35-32] | y |
+ // |--------------------------------------------------|
+ // | GIC | [27-24] | n |
+ // |--------------------------------------------------|
+ // | AdvSIMD | [23-20] | y |
+ // |--------------------------------------------------|
+ // | FP | [19-16] | y |
+ // |--------------------------------------------------|
+ // | EL3 | [15-12] | n |
+ // |--------------------------------------------------|
+ // | EL2 | [11-8] | n |
+ // |--------------------------------------------------|
+ // | EL1 | [7-4] | n |
+ // |--------------------------------------------------|
+ // | EL0 | [3-0] | n |
+ // x--------------------------------------------------x
+
+ var f ArmFlags
+ // if procFeatures&(0xf<<48) != 0 {
+ // fmt.Println("DIT")
+ // }
+ if procFeatures&(0xf<<32) != 0 {
+ f |= SVE
+ }
+ if procFeatures&(0xf<<20) != 15<<20 {
+ f |= ASIMD
+ if procFeatures&(0xf<<20) == 1<<20 {
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
+ // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
+ f |= FPHP
+ f |= ASIMDHP
+ }
+ }
+ if procFeatures&(0xf<<16) != 0 {
+ f |= FP
+ }
+
+ instAttrReg0, instAttrReg1 := getInstAttributes()
+
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+ //
+ // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | TS | [55-52] | y |
+ // |--------------------------------------------------|
+ // | FHM | [51-48] | y |
+ // |--------------------------------------------------|
+ // | DP | [47-44] | y |
+ // |--------------------------------------------------|
+ // | SM4 | [43-40] | y |
+ // |--------------------------------------------------|
+ // | SM3 | [39-36] | y |
+ // |--------------------------------------------------|
+ // | SHA3 | [35-32] | y |
+ // |--------------------------------------------------|
+ // | RDM | [31-28] | y |
+ // |--------------------------------------------------|
+ // | ATOMICS | [23-20] | y |
+ // |--------------------------------------------------|
+ // | CRC32 | [19-16] | y |
+ // |--------------------------------------------------|
+ // | SHA2 | [15-12] | y |
+ // |--------------------------------------------------|
+ // | SHA1 | [11-8] | y |
+ // |--------------------------------------------------|
+ // | AES | [7-4] | y |
+ // x--------------------------------------------------x
+
+ // if instAttrReg0&(0xf<<52) != 0 {
+ // fmt.Println("TS")
+ // }
+ // if instAttrReg0&(0xf<<48) != 0 {
+ // fmt.Println("FHM")
+ // }
+ if instAttrReg0&(0xf<<44) != 0 {
+ f |= ASIMDDP
+ }
+ if instAttrReg0&(0xf<<40) != 0 {
+ f |= SM4
+ }
+ if instAttrReg0&(0xf<<36) != 0 {
+ f |= SM3
+ }
+ if instAttrReg0&(0xf<<32) != 0 {
+ f |= SHA3
+ }
+ if instAttrReg0&(0xf<<28) != 0 {
+ f |= ASIMDRDM
+ }
+ if instAttrReg0&(0xf<<20) != 0 {
+ f |= ATOMICS
+ }
+ if instAttrReg0&(0xf<<16) != 0 {
+ f |= CRC32
+ }
+ if instAttrReg0&(0xf<<12) != 0 {
+ f |= SHA2
+ }
+ if instAttrReg0&(0xf<<12) == 2<<12 {
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+ // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
+ f |= SHA512
+ }
+ if instAttrReg0&(0xf<<8) != 0 {
+ f |= SHA1
+ }
+ if instAttrReg0&(0xf<<4) != 0 {
+ f |= AES
+ }
+ if instAttrReg0&(0xf<<4) == 2<<4 {
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
+ // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
+ f |= PMULL
+ }
+
+ // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
+ //
+ // ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+ // x--------------------------------------------------x
+ // | Name | bits | visible |
+ // |--------------------------------------------------|
+ // | GPI | [31-28] | y |
+ // |--------------------------------------------------|
+ // | GPA | [27-24] | y |
+ // |--------------------------------------------------|
+ // | LRCPC | [23-20] | y |
+ // |--------------------------------------------------|
+ // | FCMA | [19-16] | y |
+ // |--------------------------------------------------|
+ // | JSCVT | [15-12] | y |
+ // |--------------------------------------------------|
+ // | API | [11-8] | y |
+ // |--------------------------------------------------|
+ // | APA | [7-4] | y |
+ // |--------------------------------------------------|
+ // | DPB | [3-0] | y |
+ // x--------------------------------------------------x
+
+ // if instAttrReg1&(0xf<<28) != 0 {
+ // fmt.Println("GPI")
+ // }
+ if instAttrReg1&(0xf<<28) != 24 {
+ f |= GPA
+ }
+ if instAttrReg1&(0xf<<20) != 0 {
+ f |= LRCPC
+ }
+ if instAttrReg1&(0xf<<16) != 0 {
+ f |= FCMA
+ }
+ if instAttrReg1&(0xf<<12) != 0 {
+ f |= JSCVT
+ }
+ // if instAttrReg1&(0xf<<8) != 0 {
+ // fmt.Println("API")
+ // }
+ // if instAttrReg1&(0xf<<4) != 0 {
+ // fmt.Println("APA")
+ // }
+ if instAttrReg1&(0xf<<0) != 0 {
+ f |= DCPOP
+ }
+ c.Arm = f
+}
diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go
new file mode 100644
index 0000000..363951b
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/detect_intel.go
@@ -0,0 +1,33 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine
+
+package cpuid
+
+func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
+func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
+func asmXgetbv(index uint32) (eax, edx uint32)
+func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+
+func initCPU() {
+ cpuid = asmCpuid
+ cpuidex = asmCpuidex
+ xgetbv = asmXgetbv
+ rdtscpAsm = asmRdtscpAsm
+}
+
+func addInfo(c *CPUInfo) {
+ c.maxFunc = maxFunctionID()
+ c.maxExFunc = maxExtendedFunction()
+ c.BrandName = brandName()
+ c.CacheLine = cacheLine()
+ c.Family, c.Model = familyModel()
+ c.Features = support()
+ c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0)
+ c.ThreadsPerCore = threadsPerCore()
+ c.LogicalCores = logicalCores()
+ c.PhysicalCores = physicalCores()
+ c.VendorID, c.VendorString = vendorID()
+ c.Hz = hertz(c.BrandName)
+ c.cacheSize()
+}
diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go
new file mode 100644
index 0000000..970ff3d
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/detect_ref.go
@@ -0,0 +1,14 @@
+// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build !amd64,!386,!arm64 gccgo noasm appengine
+
+package cpuid
+
+func initCPU() {
+ cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
+ xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
+ rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
+}
+
+func addInfo(info *CPUInfo) {}
diff --git a/vendor/github.com/klauspost/cpuid/go.mod b/vendor/github.com/klauspost/cpuid/go.mod
new file mode 100644
index 0000000..55563f2
--- /dev/null
+++ b/vendor/github.com/klauspost/cpuid/go.mod
@@ -0,0 +1,3 @@
+module github.com/klauspost/cpuid
+
+go 1.12
diff --git a/vendor/github.com/klauspost/reedsolomon/.gitignore b/vendor/github.com/klauspost/reedsolomon/.gitignore
new file mode 100644
index 0000000..59610b5
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/.gitignore
@@ -0,0 +1,26 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+
+.idea \ No newline at end of file
diff --git a/vendor/github.com/klauspost/reedsolomon/.travis.yml b/vendor/github.com/klauspost/reedsolomon/.travis.yml
new file mode 100644
index 0000000..f77b85c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/.travis.yml
@@ -0,0 +1,77 @@
+language: go
+
+os:
+ - linux
+ - osx
+ - windows
+
+arch:
+ - amd64
+ - arm64
+ - ppc64le
+ - s390x
+
+go:
+ - 1.12.x
+ - 1.13.x
+ - 1.14.x
+ - master
+
+install:
+ - go get ./...
+
+script:
+ - go vet ./...
+ - go test -cpu=1,2 .
+ - go test -tags=noasm -cpu=1,2 .
+ - go build examples/simple-decoder.go
+ - go build examples/simple-encoder.go
+ - go build examples/stream-decoder.go
+ - go build examples/stream-encoder.go
+
+stages:
+ - gofmt
+ - test
+ - deploy
+
+jobs:
+ allow_failures:
+ - go: 'master'
+ - arch: s390x
+ fast_finish: true
+ include:
+ - stage: gofmt
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - diff <(gofmt -d .) <(printf "")
+ - diff <(gofmt -d ./examples) <(printf "")
+ - go install github.com/klauspost/asmfmt/cmd/asmfmt
+ - diff <(asmfmt -d .) <(printf "")
+ - stage: race
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - go test -cpu=1 -short -race .
+ - go test -cpu=2 -short -race .
+ - go test -tags=noasm -cpu=1 -short -race .
+ - go test -tags=noasm -cpu=4 -short -race .
+ - go test -no-avx512 -short -race .
+ - go test -no-avx512 -no-avx2 -short -race .
+ - go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
+ - stage: amd64-noasm
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - go test -no-avx512
+ - go test -no-avx512 -no-avx2
+ - go test -no-avx512 -no-avx2 -no-ssse3
+ - stage: i386
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - GOOS=linux GOARCH=386 go test -short .
diff --git a/vendor/github.com/klauspost/reedsolomon/LICENSE b/vendor/github.com/klauspost/reedsolomon/LICENSE
new file mode 100644
index 0000000..a947e16
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/LICENSE
@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+Copyright (c) 2015 Backblaze
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md
new file mode 100644
index 0000000..f9824cb
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
@@ -0,0 +1,395 @@
+# Reed-Solomon
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
+[2]: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
+[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
+[4]: https://travis-ci.org/klauspost/reedsolomon
+
+Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
+
+This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by
+[Backblaze](http://backblaze.com), with some additional optimizations.
+
+For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
+
+Package home: https://github.com/klauspost/reedsolomon
+
+Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
+
+# Installation
+To get the package use the standard:
+```bash
+go get -u github.com/klauspost/reedsolomon
+```
+
+# Changes
+
+## May 2020
+
+* ARM64 optimizations, up to 2.5x faster.
+* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
+* Much better performance when using a limited number of goroutines.
+* AVX512 is now using multiple cores.
+* Stream processing overhaul, big speedups in most cases.
+* AVX512 optimizations
+
+## March 6, 2019
+
+The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
+
+## February 8, 2019
+
+AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
+See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
+
+## December 18, 2018
+
+Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
+
+## November 18, 2017
+
+Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt
+to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
+
+## October 1, 2017
+
+* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option.
+Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
+
+* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines)
+has been increased for better multi-core scaling.
+
+* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to
+be used instead of allocating new memory.
+
+## August 26, 2017
+
+* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update`
+function contributed by [chenzhongtao](https://github.com/chenzhongtao).
+
+* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly,
+which gives a huge performance boost on this platform.
+
+## July 20, 2017
+
+`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface.
+This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
+
+```Go
+func (e *YourEnc) ReconstructData(shards [][]byte) error {
+ return ReconstructData(shards)
+}
+```
+
+You can of course also do your own implementation.
+The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder)
+handles this without modifying the interface.
+This is a good lesson on why returning interfaces is not a good design.
+
+# Usage
+
+This section assumes you know the basics of Reed-Solomon encoding.
+A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
+
+This package performs the calculation of the parity sets. The usage is therefore relatively simple.
+
+First of all, you need to choose your distribution of data and parity shards.
+A 'good' distribution is very subjective, and will depend a lot on your usage scenario.
+A good starting point is above 5 and below 257 data shards (the maximum supported number),
+and the number of parity shards to be 2 or above, and below the number of data shards.
+
+To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
+```Go
+ enc, err := reedsolomon.New(10, 3)
+```
+This encoder will work for all parity sets with this distribution of data and parity shards.
+The error will only be set if you specify 0 or negative values in any of the parameters,
+or if you specify more than 256 data shards.
+
+If you will primarily be using it with one shard size it is recommended to use
+[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
+as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
+It is not required that all shards are this size.
+
+The you send and receive data is a simple slice of byte slices; `[][]byte`.
+In the example above, the top slice must have a length of 13.
+
+```Go
+ data := make([][]byte, 13)
+```
+You should then fill the 10 first slices with *equally sized* data,
+and create parity shards that will be populated with parity data. In this case we create the data in memory,
+but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
+
+```Go
+ // Create all shards, size them at 50000 each
+ for i := range input {
+ data[i] := make([]byte, 50000)
+ }
+
+
+ // Fill some data into the data shards
+ for i, in := range data[:10] {
+ for j:= range in {
+ in[j] = byte((i+j)&0xff)
+ }
+ }
+```
+
+To populate the parity shards, you simply call `Encode()` with your data.
+```Go
+ err = enc.Encode(data)
+```
+The only cases where you should get an error is, if the data shards aren't of equal size.
+The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
+
+```Go
+ ok, err = enc.Verify(data)
+```
+
+The final (and important) part is to be able to reconstruct missing shards.
+For this to work, you need to know which parts of your data is missing.
+The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario,
+you need to implement a hash check for each shard.
+
+If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
+
+To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
+
+```Go
+ // Delete two data shards
+ data[3] = nil
+ data[7] = nil
+
+ // Reconstruct the missing shards
+ err := enc.Reconstruct(data)
+```
+The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
+
+If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
+
+```Go
+ // Delete two data shards
+ data[3] = nil
+ data[7] = nil
+
+ // Reconstruct just the missing data shards
+ err := enc.ReconstructData(data)
+```
+
+So to sum up reconstruction:
+* The number of data/parity shards must match the numbers used for encoding.
+* The order of shards must be the same as used when encoding.
+* You may only supply data you know is valid.
+* Invalid shards should be set to nil.
+
+For complete examples of an encoder and decoder see the
+[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Splitting/Joining Data
+
+You might have a large slice of data.
+To help you split this, there are some helper functions that can split and join a single byte slice.
+
+```Go
+ bigfile, _ := ioutil.Readfile("myfile.data")
+
+ // Split the file
+ split, err := enc.Split(bigfile)
+```
+This will split the file into the number of data shards set when creating the encoder and create empty parity shards.
+
+An important thing to note is that you have to *keep track of the exact input size*.
+If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
+
+To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply:
+```Go
+ // Join a data set and write it to io.Discard.
+ err = enc.Join(io.Discard, data, len(bigfile))
+```
+
+# Streaming/Merging
+
+It might seem like a limitation that all data should be in memory,
+but an important property is that *as long as the number of data/parity shards are the same,
+you can merge/split data sets*, and they will remain valid as a separate set.
+
+```Go
+ // Split the data set of 50000 elements into two of 25000
+ splitA := make([][]byte, 13)
+ splitB := make([][]byte, 13)
+
+ // Merge into a 100000 element set
+ merged := make([][]byte, 13)
+
+ for i := range data {
+ splitA[i] = data[i][:25000]
+ splitB[i] = data[i][25000:]
+
+ // Concatenate it to itself
+ merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
+ merged[i] = append(merged[i], data[i]...)
+ }
+
+ // Each part should still verify as ok.
+ ok, err := enc.Verify(splitA)
+ if ok && err == nil {
+ log.Println("splitA ok")
+ }
+
+ ok, err = enc.Verify(splitB)
+ if ok && err == nil {
+ log.Println("splitB ok")
+ }
+
+ ok, err = enc.Verify(merge)
+ if ok && err == nil {
+ log.Println("merge ok")
+ }
+```
+
+This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks.
+For the best throughput, don't use too small blocks.
+
+This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data.
+This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
+
+# Streaming API
+
+There has been added support for a streaming API, to help perform fully streaming operations,
+which enables you to do the same operations, but on streams.
+To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function
+to create the encoding/decoding interfaces.
+
+You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams)
+to ready an interface that reads/writes concurrently from the streams.
+
+You can specify the size of each operation using
+[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
+This will set the size of each read/write operation.
+
+Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API.
+Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API.
+If an error occurs in relation to a stream,
+a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError)
+or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError)
+will help you determine which stream was the offender.
+
+There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
+
+For complete examples of a streaming encoder and decoder see the
+[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Advanced Options
+
+You can modify internal options which affects how jobs are split between and processed by goroutines.
+
+To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`.
+If no Options are supplied, default options are used.
+
+Example of how to supply options:
+
+ ```Go
+ enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
+ ```
+
+
+# Performance
+Performance depends mainly on the number of parity shards.
+In rough terms, doubling the number of parity shards will double the encoding time.
+
+Here are the throughput numbers with some different selections of data and parity shards.
+For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
+
+| Data | Parity | Parity | MB/s | SSSE3 MB/s | SSSE3 Speed | Rel. Speed |
+|------|--------|--------|--------|-------------|-------------|------------|
+| 5 | 2 | 40% | 576,11 | 2599,2 | 451% | 100,00% |
+| 10 | 2 | 20% | 587,73 | 3100,28 | 528% | 102,02% |
+| 10 | 4 | 40% | 298,38 | 2470,97 | 828% | 51,79% |
+| 50 | 20 | 40% | 59,81 | 713,28 | 1193% | 10,38% |
+
+If `runtime.GOMAXPROCS()` is set to a value higher than 1,
+the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
+
+Example of performance scaling on AMD Ryzen 3950X - 16 physical cores, 32 logical cores, AVX 2.
+The example uses 10 blocks with 1MB data each and 4 parity blocks.
+
+| Threads | Speed |
+|---------|------------|
+| 1 | 9979 MB/s |
+| 2 | 18870 MB/s |
+| 4 | 33697 MB/s |
+| 8 | 51531 MB/s |
+| 16 | 59204 MB/s |
+
+
+Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
+```
+benchmark all MB/s data MB/s speedup
+BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x
+BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x
+BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x
+BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x
+BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x
+BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
+BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
+```
+
+# Performance on AVX512
+
+The performance on AVX512 has been accelerated for Intel CPUs.
+This gives speedups on a per-core basis typically up to 2x compared to
+AVX2 as can be seen in the following table:
+
+```
+[...]
+```
+
+This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other.
+In doing so it is possible to minimize the memory bandwidth required for loading all data shards.
+At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM
+registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
+
+# Performance on ARM64 NEON
+
+By exploiting NEON instructions the performance for ARM has been accelerated.
+Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
+
+```
+BenchmarkGalois128K-64 119562 10028 ns/op 13070.78 MB/s
+BenchmarkGalois1M-64 14380 83424 ns/op 12569.22 MB/s
+BenchmarkGaloisXor128K-64 96508 12432 ns/op 10543.29 MB/s
+BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s
+```
+
+# Performance on ppc64le
+
+The performance for ppc64le has been accelerated.
+This gives roughly a 10x performance improvement on this architecture as can been seen below:
+
+```
+benchmark old MB/s new MB/s speedup
+BenchmarkGalois128K-160 948.87 8878.85 9.36x
+BenchmarkGalois1M-160 968.85 9041.92 9.33x
+BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
+BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
+```
+
+# asm2plan9s
+
+[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
+
+# Links
+* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
+* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
+* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
+* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
+* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
+* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
+* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
+* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+
+# License
+
+This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/reedsolomon/appveyor.yml b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
new file mode 100644
index 0000000..9bb067f
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
@@ -0,0 +1,20 @@
+os: Visual Studio 2015
+
+platform: x64
+
+clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
+
+# environment variables
+environment:
+ GOPATH: c:\gopath
+
+install:
+ - echo %PATH%
+ - echo %GOPATH%
+ - go version
+ - go env
+ - go get -d ./...
+
+build_script:
+ - go test -v -cpu=2 ./...
+ - go test -cpu=1,2,4 -short -race ./...
diff --git a/vendor/github.com/klauspost/reedsolomon/galois.go b/vendor/github.com/klauspost/reedsolomon/galois.go
new file mode 100644
index 0000000..76049f9
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois.go
@@ -0,0 +1,929 @@
+/**
+ * 8-bit Galois Field
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc. All rights reserved.
+ */
+
+package reedsolomon
+
+const (
+ // The number of elements in the field.
+ fieldSize = 256
+
+ // The polynomial used to generate the logarithm table.
+ //
+ // There are a number of polynomials that work to generate
+ // a Galois field of 256 elements. The choice is arbitrary,
+ // and we just use the first one.
+ //
+ // The possibilities are: 29, 43, 45, 77, 95, 99, 101, 105,
+ //* 113, 135, 141, 169, 195, 207, 231, and 245.
+ generatingPolynomial = 29
+)
+
+var logTable = [fieldSize]byte{
+ 0, 0, 1, 25, 2, 50, 26, 198,
+ 3, 223, 51, 238, 27, 104, 199, 75,
+ 4, 100, 224, 14, 52, 141, 239, 129,
+ 28, 193, 105, 248, 200, 8, 76, 113,
+ 5, 138, 101, 47, 225, 36, 15, 33,
+ 53, 147, 142, 218, 240, 18, 130, 69,
+ 29, 181, 194, 125, 106, 39, 249, 185,
+ 201, 154, 9, 120, 77, 228, 114, 166,
+ 6, 191, 139, 98, 102, 221, 48, 253,
+ 226, 152, 37, 179, 16, 145, 34, 136,
+ 54, 208, 148, 206, 143, 150, 219, 189,
+ 241, 210, 19, 92, 131, 56, 70, 64,
+ 30, 66, 182, 163, 195, 72, 126, 110,
+ 107, 58, 40, 84, 250, 133, 186, 61,
+ 202, 94, 155, 159, 10, 21, 121, 43,
+ 78, 212, 229, 172, 115, 243, 167, 87,
+ 7, 112, 192, 247, 140, 128, 99, 13,
+ 103, 74, 222, 237, 49, 197, 254, 24,
+ 227, 165, 153, 119, 38, 184, 180, 124,
+ 17, 68, 146, 217, 35, 32, 137, 46,
+ 55, 63, 209, 91, 149, 188, 207, 205,
+ 144, 135, 151, 178, 220, 252, 190, 97,
+ 242, 86, 211, 171, 20, 42, 93, 158,
+ 132, 60, 57, 83, 71, 109, 65, 162,
+ 31, 45, 67, 216, 183, 123, 164, 118,
+ 196, 23, 73, 236, 127, 12, 111, 246,
+ 108, 161, 59, 82, 41, 157, 85, 170,
+ 251, 96, 134, 177, 187, 204, 62, 90,
+ 203, 89, 95, 176, 156, 169, 160, 81,
+ 11, 245, 22, 235, 122, 117, 44, 215,
+ 79, 174, 213, 233, 230, 231, 173, 232,
+ 116, 214, 244, 234, 168, 80, 88, 175,
+}
+
+/**
+ * Inverse of the logarithm table. Maps integer logarithms
+ * to members of the field. There is no entry for 255
+ * because the highest log is 254.
+ *
+ * This table was generated by `go run gentables.go`
+ */
+var expTable = []byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e}
+
+func galAdd(a, b byte) byte {
+ return a ^ b
+}
+
+func galSub(a, b byte) byte {
+ return a ^ b
+}
+
+// Table from https://github.com/templexxx/reedsolomon
+var invTable = [256]byte{0x0, 0x1, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, 0xab, 0xc, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x4, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, 0xcb, 0x63, 0x97, 0xe, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, 0xdb, 0x77, 0x6, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x2, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0xf, 0x5c, 0xb, 0xdc, 0xbd, 0x94, 0xac, 0x9, 0xc7, 0xa2, 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x5, 0xce, 0x3b, 0xd, 0x3c, 0x9c, 0x8, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x7, 0x7b, 0x95, 0x9a, 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, 0x99, 0xa, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, 0xe3, 0xe7, 0xb5, 0xea, 0x3, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd}
+
+var mulTable = [256][256]uint8{[256]uint8{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+ {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff},
+ {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0xd, 0xf, 0x9, 0xb, 0x5, 0x7, 0x1, 0x3, 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3},
+ {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0xd, 0xe, 0xb, 0x8, 0x1, 0x2, 0x7, 0x4, 0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c},
+ {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64, 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c, 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19, 0x15, 0x11, 0xd, 0x9, 0x5, 0x1, 0x3d, 0x39, 0x35, 0x31, 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61, 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9, 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26, 0x1a, 0x1e, 0x12, 0x16, 0xa, 0xe, 0x2, 0x6, 0x7a, 0x7e, 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56, 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86, 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde, 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b, 0x37, 0x33, 0x3f, 0x3b, 0x7, 0x3, 0xf, 0xb, 0x17, 0x13, 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b, 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3, 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b, 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb},
+ {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f, 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb, 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5, 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7, 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e, 0xd, 0x8, 0x7, 0x2, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20, 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2, 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6, 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf, 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d, 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9, 0x1a, 0x1f, 0x10, 0x15, 0xe, 0xb, 0x4, 0x1, 0x32, 0x37, 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45, 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc, 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2, 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90, 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74, 0x17, 0x12, 0x1d, 0x18, 0x3, 0x6, 0x9, 0xc, 0x3f, 0x3a, 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24},
+ {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72, 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6, 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa, 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82, 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab, 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7, 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f, 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b, 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0xd, 0xb, 0x1, 0x7, 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0xf, 0x9, 0x3, 0x5, 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71, 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed, 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95, 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc, 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80, 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8, 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c, 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10, 0x2, 0x4, 0xe, 0x8, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, 0x3e, 0x38},
+ {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4, 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8, 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a, 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x5, 0x2, 0xb, 0xc, 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60, 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98, 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde, 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52, 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30, 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0xf, 0x8, 0x1, 0x6, 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57, 0xa, 0xd, 0x4, 0x3, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35, 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93, 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff, 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7},
+ {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 0x1d, 0x15, 0xd, 0x5, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55, 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85, 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0xa, 0x2, 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2, 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2, 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x7, 0xf, 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f, 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef, 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c, 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, 0x4, 0xc, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc, 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61, 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31, 0x9, 0x1, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91, 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0xe, 0x6, 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6, 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b, 0x13, 0x1b, 0x3, 0xb, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb, 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b, 0xb3, 0xbb, 0xa3, 0xab},
+ {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26, 0x19, 0x10, 0xb, 0x2, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92, 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x4, 0xd, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0xf, 0x6, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb, 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, 0x8, 0x1, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe, 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18, 0x3, 0xa, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95, 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, 0xf0, 0xf9, 0x1e, 0x17, 0xc, 0x5, 0x3a, 0x33, 0x28, 0x21, 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba, 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0, 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x7, 0xe, 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54},
+ {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe, 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b, 0xd, 0x7, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7, 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3, 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc, 0x1a, 0x10, 0xe, 0x4, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40, 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9, 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71, 0x17, 0x1d, 0x3, 0x9, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63, 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27, 0x11, 0x1b, 0x5, 0xf, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf, 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x8, 0x2, 0x64, 0x6e, 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a, 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5, 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79, 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d, 0xb, 0x1, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8, 0x2e, 0x24, 0x3a, 0x30, 0x6, 0xc, 0x12, 0x18, 0x7e, 0x74, 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48},
+ {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76, 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38, 0x9, 0x2, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4, 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9, 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57, 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x4, 0xf, 0x3e, 0x35, 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6, 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c, 0x21, 0x2a, 0x1b, 0x10, 0xd, 0x6, 0x6f, 0x64, 0x79, 0x72, 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x1, 0xa, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89, 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x8, 0x3, 0x1e, 0x15, 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18, 0x5, 0xe, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56, 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca, 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d, 0x20, 0x2b, 0x1a, 0x11, 0xc, 0x7, 0xde, 0xd5, 0xc8, 0xc3, 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, 0xbc, 0xb7},
+ {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc, 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4, 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9, 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31, 0x25, 0x29, 0xd, 0x1, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33, 0x17, 0x1b, 0xf, 0x3, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3, 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6, 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce, 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, 0x52, 0x5e, 0x1a, 0x16, 0x2, 0xe, 0x2a, 0x26, 0x32, 0x3e, 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22, 0x36, 0x3a, 0x1e, 0x12, 0x6, 0xa, 0x8e, 0x82, 0x96, 0x9a, 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7, 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f, 0xb, 0x7, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67, 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, 0x41, 0x4d, 0x9, 0x5, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d, 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5, 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0, 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x4, 0x8, 0x1c, 0x10, 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70},
+ {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x5, 0x8, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0xf, 0x2, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0xa, 0x7, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9, 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, 0x88, 0x85, 0x1e, 0x13, 0x4, 0x9, 0x2a, 0x27, 0x30, 0x3d, 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e, 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x1, 0xc, 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8, 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc, 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e, 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0xb, 0x6, 0x25, 0x28, 0x3f, 0x32, 0x14, 0x19, 0xe, 0x3, 0x20, 0x2d, 0x3a, 0x37, 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9, 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb, 0x98, 0x95, 0x82, 0x8f},
+ {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x5, 0xb, 0x19, 0x17, 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0xf, 0x1, 0x13, 0x1d, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0xa, 0x4, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79, 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x7, 0x9, 0xb3, 0xbd, 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1, 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4, 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10, 0x2, 0xc, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6, 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, 0xa0, 0xae, 0x14, 0x1a, 0x8, 0x6, 0x2c, 0x22, 0x30, 0x3e, 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27, 0x35, 0x3b, 0x11, 0x1f, 0xd, 0x3, 0x59, 0x57, 0x45, 0x4b, 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93},
+ {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1, 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0, 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0xd, 0x2, 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64, 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2, 0x17, 0x18, 0x9, 0x6, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60, 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x4, 0xb, 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7, 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc, 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba, 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, 0x1, 0xe, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76, 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0xc, 0x3, 0x56, 0x59, 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf, 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x8, 0x7, 0x16, 0x19, 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb, 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad, 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c, 0x39, 0x36, 0x27, 0x28, 0x5, 0xa, 0x1b, 0x14, 0x41, 0x4e, 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c},
+ {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0x1d, 0xd, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d, 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a, 0x1a, 0xa, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a, 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x7, 0x17, 0x67, 0x77, 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7, 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x4, 0xf4, 0xe4, 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59, 0x29, 0x39, 0x9, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0xe, 0x1e, 0x2e, 0x3e, 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43, 0x73, 0x63, 0x13, 0x3, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3, 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x8, 0x18, 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65, 0x55, 0x45, 0x35, 0x25, 0x15, 0x5, 0xd2, 0xc2, 0xf2, 0xe2, 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x2, 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf, 0x4f, 0x5f, 0x6f, 0x7f, 0xf, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c, 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0xc, 0x3c, 0x2c, 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, 0xe1, 0xf1, 0x1, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36, 0x6, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b, 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0xb, 0x7b, 0x6b, 0x5b, 0x4b},
+ {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0xd, 0x1c, 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6, 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0xb, 0x38, 0x29, 0x5e, 0x4f, 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5, 0x17, 0x6, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e, 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x7, 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, 0xda, 0xcb, 0x39, 0x28, 0x1b, 0xa, 0x7d, 0x6c, 0x5f, 0x4e, 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f, 0xc, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95, 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x1, 0x10, 0x67, 0x76, 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc, 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0xe, 0x1f, 0xe0, 0xf1, 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56, 0x21, 0x30, 0x3, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x5, 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x8, 0xf7, 0xe6, 0xd5, 0xc4, 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x9, 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3, 0x51, 0x40, 0x73, 0x62, 0x15, 0x4, 0x37, 0x26, 0xd9, 0xc8, 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75, 0x2, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0xf, 0x1e, 0x2d, 0x3c, 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4},
+ {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0xb, 0x75, 0x67, 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3, 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x4, 0xea, 0xf8, 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71, 0xf, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x8, 0x1a, 0xc9, 0xdb, 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f, 0x11, 0x3, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, 0xe2, 0xf0, 0x1e, 0xc, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60, 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31, 0x7, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3, 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, 0x9, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x2, 0x34, 0x26, 0x8f, 0x9d, 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0xd, 0x3b, 0x29, 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, 0xde, 0xcc, 0x22, 0x30, 0x6, 0x14, 0x6a, 0x78, 0x4e, 0x5c, 0x1, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83, 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0xa, 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x5, 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54, 0x62, 0x70, 0xe, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0, 0x9e, 0x8c, 0xba, 0xa8},
+ {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1, 0x2d, 0x3e, 0xb, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6, 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f, 0x16, 0x5, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0xe, 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7, 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0xa, 0x19, 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, 0xf3, 0xe0, 0x1, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78, 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65, 0x50, 0x43, 0x3a, 0x29, 0x1c, 0xf, 0xc3, 0xd0, 0xe5, 0xf6, 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x4, 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0xc, 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b, 0x7e, 0x6d, 0x14, 0x7, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5, 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x9, 0x1a, 0x63, 0x70, 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce, 0x2, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89, 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4, 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x6, 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95, 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0xd, 0x9b, 0x88, 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x3, 0x10, 0x25, 0x36, 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, 0xdc, 0xcf, 0x2e, 0x3d, 0x8, 0x1b, 0x62, 0x71, 0x44, 0x57},
+ {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61, 0xd, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6, 0x1a, 0xe, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3, 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b, 0x17, 0x3, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, 0x11, 0x5, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5, 0x34, 0x20, 0x1c, 0x8, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80, 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef, 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, 0xb, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2, 0x2e, 0x3a, 0x6, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6, 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e, 0x22, 0x36, 0xa, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, 0xf7, 0xe3, 0x2f, 0x3b, 0x7, 0x13, 0x7f, 0x6b, 0x57, 0x43, 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x4, 0xc8, 0xdc, 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x9, 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7, 0x1b, 0xf, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2, 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a, 0x16, 0x2, 0x3e, 0x2a, 0x1, 0x15, 0x29, 0x3d, 0x51, 0x45, 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd, 0x5c, 0x48, 0x74, 0x60, 0xc, 0x18, 0x24, 0x30, 0xfc, 0xe8, 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90},
+ {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0xc, 0x33, 0x26, 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f, 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0xd, 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x1, 0x14, 0x29, 0x3c, 0x3, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94, 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b, 0x30, 0x25, 0x1a, 0xf, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8, 0x1b, 0xe, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb, 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69, 0x2, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x6, 0x13, 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91, 0x1f, 0xa, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2, 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7, 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, 0x1e, 0xb, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee, 0x2d, 0x38, 0x7, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e, 0x51, 0x44, 0x2f, 0x3a, 0x5, 0x10, 0xd3, 0xc6, 0xf9, 0xec, 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x9, 0x62, 0x77, 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5, 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c, 0x63, 0x76, 0x1d, 0x8, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93, 0xf8, 0xed, 0xd2, 0xc7, 0x4, 0x11, 0x2e, 0x3b, 0x50, 0x45, 0x7a, 0x6f},
+ {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b, 0x51, 0x47, 0x25, 0x33, 0x9, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7, 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x4, 0x3e, 0x28, 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21, 0x1b, 0xd, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3, 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x1, 0x17, 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6, 0x24, 0x32, 0x8, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x5, 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99, 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, 0x1a, 0xc, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc, 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69, 0x53, 0x45, 0x27, 0x31, 0xb, 0x1d, 0xb2, 0xa4, 0x9e, 0x88, 0xea, 0xfc, 0xc6, 0xd0, 0x2, 0x14, 0x2e, 0x38, 0x5a, 0x4c, 0x76, 0x60, 0x35, 0x23, 0x19, 0xf, 0x6d, 0x7b, 0x41, 0x57, 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e, 0x64, 0x72, 0x10, 0x6, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2, 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0xa, 0x1c, 0x7e, 0x68, 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4, 0x5b, 0x4d, 0x77, 0x61, 0x3, 0x15, 0x2f, 0x39, 0xeb, 0xfd, 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6, 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, 0x18, 0xe, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3, 0x11, 0x7, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73},
+ {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, 0x1f, 0x8, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0, 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75, 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x7, 0xb7, 0xa0, 0x99, 0x8e, 0xeb, 0xfc, 0xc5, 0xd2, 0xf, 0x18, 0x21, 0x36, 0x53, 0x44, 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc, 0x11, 0x6, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3, 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45, 0x20, 0x37, 0xe, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, 0x1, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae, 0x1e, 0x9, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1, 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76, 0x13, 0x4, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, 0x85, 0x92, 0x22, 0x35, 0xc, 0x1b, 0x7e, 0x69, 0x50, 0x47, 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82, 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x3, 0x14, 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0xb, 0x32, 0x25, 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49, 0x70, 0x67, 0x2, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2, 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0xa, 0x6f, 0x78, 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x5, 0x60, 0x77, 0x4e, 0x59, 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46, 0x7f, 0x68, 0xd, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0, 0xb5, 0xa2, 0x9b, 0x8c},
+ {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88, 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45, 0x6d, 0x75, 0x3d, 0x25, 0xd, 0x15, 0x27, 0x3f, 0x17, 0xf, 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2, 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x2, 0x2a, 0x32, 0x4e, 0x56, 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x6, 0x8e, 0x96, 0xbe, 0xa6, 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, 0x83, 0x9b, 0x13, 0xb, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b, 0x69, 0x71, 0x59, 0x41, 0x9, 0x11, 0x39, 0x21, 0xa9, 0xb1, 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc, 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x4, 0x1c, 0x54, 0x4c, 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4, 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0xc, 0x14, 0x1, 0x19, 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9, 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x3, 0x2b, 0x33, 0x26, 0x3e, 0x16, 0xe, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe, 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa, 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0xa, 0x22, 0x3a, 0x72, 0x6a, 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x7, 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed, 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x5, 0x1d, 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x8, 0x10, 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0},
+ {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6, 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, 0x13, 0xa, 0x7, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48, 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93, 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69, 0x26, 0x3f, 0x14, 0xd, 0xe, 0x17, 0x3c, 0x25, 0x6a, 0x73, 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89, 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52, 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x4, 0x9, 0x10, 0x3b, 0x22, 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb, 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x3, 0x1c, 0x5, 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff, 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0xf, 0x16, 0x1b, 0x2, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca, 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd, 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, 0x8, 0x11, 0x12, 0xb, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d, 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86, 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c, 0x33, 0x2a, 0x1, 0x18, 0x15, 0xc, 0x27, 0x3e, 0x71, 0x68, 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92, 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49, 0x62, 0x7b, 0x34, 0x2d, 0x6, 0x1f},
+ {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb, 0x6d, 0x77, 0x59, 0x43, 0x5, 0x1f, 0x31, 0x2b, 0x67, 0x7d, 0x53, 0x49, 0xf, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99, 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, 0x86, 0x9c, 0xa, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c, 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x4, 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d, 0x1b, 0x1, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef, 0x79, 0x63, 0x4d, 0x57, 0x11, 0xb, 0x25, 0x3f, 0x14, 0xe, 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea, 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0xd, 0x17, 0x3c, 0x26, 0x8, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6, 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8, 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x2, 0x18, 0x5e, 0x44, 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x7, 0x1d, 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55, 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x9, 0x9f, 0x85, 0xab, 0xb1, 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, 0xae, 0xb4, 0x22, 0x38, 0x16, 0xc, 0x4a, 0x50, 0x7e, 0x64, 0x28, 0x32, 0x1c, 0x6, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2, 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb, 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, 0x19, 0x3},
+ {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6, 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58, 0x19, 0x2, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, 0x1d, 0x6, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde, 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29, 0x4, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3, 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, 0xc, 0x17, 0x23, 0x38, 0x15, 0xe, 0x4f, 0x54, 0x79, 0x62, 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2, 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0xa, 0x27, 0x3c, 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x8, 0x13, 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd, 0x1, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2, 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81, 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x3, 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x7, 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0, 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x5, 0x1e, 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0xd, 0x16, 0x22, 0x39, 0x14, 0xf, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1, 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5, 0xa4, 0xbf, 0x92, 0x89, 0x10, 0xb, 0x26, 0x3d, 0x7c, 0x67, 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x9, 0x12, 0x3f, 0x24, 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc},
+ {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, 0x95, 0x89, 0x3d, 0x21, 0x5, 0x19, 0x4d, 0x51, 0x75, 0x69, 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b, 0x7f, 0x63, 0x37, 0x2b, 0xf, 0x13, 0x7a, 0x66, 0x42, 0x5e, 0xa, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x7, 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92, 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a, 0x1e, 0x2, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, 0xbc, 0xa0, 0x14, 0x8, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40, 0x29, 0x35, 0x11, 0xd, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5, 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82, 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, 0xe, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0xb, 0x17, 0x33, 0x2f, 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x1, 0x1d, 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5, 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, 0x94, 0x88, 0x3c, 0x20, 0x4, 0x18, 0x4c, 0x50, 0x74, 0x68, 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x9, 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0xc, 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x6, 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93, 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b, 0x1f, 0x3, 0x27, 0x3b},
+ {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb, 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38, 0x1f, 0x2, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0, 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x6, 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x4, 0x19, 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0xe, 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc, 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, 0x90, 0x8d, 0x36, 0x2b, 0xc, 0x11, 0x42, 0x5f, 0x78, 0x65, 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61, 0x46, 0x5b, 0x8, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e, 0x2d, 0x30, 0x17, 0xa, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x1, 0x52, 0x4f, 0x68, 0x75, 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6, 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x3, 0x1e, 0x39, 0x24, 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x7, 0x1a, 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x5, 0x22, 0x3f, 0x84, 0x99, 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0xf, 0x12, 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab, 0x10, 0xd, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf, 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d, 0x2e, 0x33, 0x14, 0x9, 0x7f, 0x62, 0x45, 0x58, 0xb, 0x16, 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4},
+ {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf, 0x85, 0x9b, 0xb9, 0xa7, 0xd, 0x13, 0x31, 0x2f, 0x75, 0x6b, 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd, 0x17, 0x9, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x4, 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8, 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x1, 0x5b, 0x45, 0x67, 0x79, 0x2e, 0x30, 0x12, 0xc, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0, 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x8, 0x16, 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93, 0x39, 0x27, 0x5, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5, 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69, 0x33, 0x2d, 0xf, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, 0x2, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec, 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x6, 0xac, 0xb2, 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83, 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, 0x15, 0xb, 0x68, 0x76, 0x54, 0x4a, 0x10, 0xe, 0x2c, 0x32, 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b, 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47, 0x1d, 0x3, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x7, 0x19, 0x3b, 0x25, 0x72, 0x6c, 0x4e, 0x50, 0xa, 0x14, 0x36, 0x28, 0x82, 0x9c, 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8},
+ {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0, 0x15, 0xa, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8, 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x1, 0x1e, 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0xb, 0x56, 0x49, 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f, 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74, 0x55, 0x4a, 0x17, 0x8, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f, 0x2, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x9, 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6, 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60, 0x3d, 0x22, 0x3, 0x1c, 0x3b, 0x24, 0x5, 0x1a, 0x47, 0x58, 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e, 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31, 0x10, 0xf, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd, 0x80, 0x9f, 0xbe, 0xa1, 0x4, 0x1b, 0x3a, 0x25, 0x78, 0x67, 0x46, 0x59, 0x11, 0xe, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c, 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7, 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71, 0x2c, 0x33, 0x12, 0xd, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, 0x7, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0, 0x6f, 0x70, 0x51, 0x4e, 0x13, 0xc, 0x2d, 0x32, 0x97, 0x88, 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3, 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x6, 0x19, 0x38, 0x27},
+ {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a, 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x7, 0x67, 0x47, 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, 0xb4, 0x94, 0x69, 0x49, 0x29, 0x9, 0xe9, 0xc9, 0xa9, 0x89, 0x4e, 0x6e, 0xe, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73, 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88, 0x68, 0x48, 0x28, 0x8, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32, 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0xf, 0x2f, 0x9c, 0xbc, 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1, 0x1, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x6, 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b, 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0xd, 0x2d, 0xd0, 0xf0, 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97, 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, 0x2a, 0xa, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59, 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x4, 0x64, 0x44, 0x83, 0xa3, 0xc3, 0xe3, 0x3, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe, 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x5, 0x65, 0x45, 0xa5, 0x85, 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8, 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x2, 0x22, 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31, 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0xc, 0x2c, 0xcc, 0xec, 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0xb, 0xeb, 0xcb, 0xab, 0x8b, 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96},
+ {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0xb, 0x68, 0x49, 0xae, 0x8f, 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8, 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60, 0x3, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d, 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x8, 0xef, 0xce, 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0xd, 0x6e, 0x4f, 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3, 0xc0, 0xe1, 0x6, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4, 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0xe, 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2, 0x81, 0xa0, 0x47, 0x66, 0x5, 0x24, 0x4d, 0x6c, 0xf, 0x2e, 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x4, 0xe3, 0xc2, 0xa1, 0x80, 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38, 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0xc, 0x2d, 0x4e, 0x6f, 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, 0xf5, 0xd4, 0x26, 0x7, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1, 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x2, 0xf0, 0xd1, 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac, 0x4b, 0x6a, 0x9, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56, 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x1, 0x62, 0x43, 0x9b, 0xba, 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed, 0xa, 0x2b, 0x48, 0x69},
+ {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3, 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35, 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52, 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, 0xf5, 0xd7, 0x2e, 0xc, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0, 0x23, 0x1, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a, 0x2c, 0xe, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x3, 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91, 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73, 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x2, 0x20, 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0xf, 0x2d, 0xc3, 0xe1, 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e, 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8, 0x8e, 0xac, 0x42, 0x60, 0x6, 0x24, 0xc7, 0xe5, 0x83, 0xa1, 0x4f, 0x6d, 0xb, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, 0x28, 0xa, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x7, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1, 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde, 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x8, 0x6e, 0x4c, 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x5, 0x63, 0x41, 0x8c, 0xae, 0xc8, 0xea, 0x4, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7, 0x9, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75},
+ {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0xa, 0x29, 0x4c, 0x6f, 0x86, 0xa5, 0xc0, 0xe3, 0xf, 0x2c, 0x49, 0x6a, 0x83, 0xa0, 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd, 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d, 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e, 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0xb, 0x6e, 0x4d, 0xa4, 0x87, 0xe2, 0xc1, 0x2d, 0xe, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4, 0x22, 0x1, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x4, 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59, 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf, 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73, 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30, 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6, 0x44, 0x67, 0x2, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62, 0x7, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x8, 0x2b, 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0xd, 0x2e, 0xc7, 0xe4, 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91, 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51, 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12, 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x9, 0xe0, 0xc3, 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0xc, 0xe5, 0xc6, 0xa3, 0x80, 0x66, 0x45, 0x20, 0x3, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40, 0x25, 0x6, 0xef, 0xcc, 0xa9, 0x8a},
+ {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86, 0x47, 0x63, 0xf, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0, 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x8, 0xc9, 0xed, 0x81, 0xa5, 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x7, 0x6b, 0x4f, 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x9, 0xc8, 0xec, 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3, 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x6, 0x6a, 0x4e, 0x1, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd, 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f, 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0xe, 0x2a, 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, 0x2f, 0xb, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36, 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94, 0xf8, 0xdc, 0x20, 0x4, 0x68, 0x4c, 0x3, 0x27, 0x4b, 0x6f, 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85, 0x44, 0x60, 0xc, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x2, 0x26, 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53, 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, 0xa0, 0x84, 0x45, 0x61, 0xd, 0x29, 0xd5, 0xf1, 0x9d, 0xb9, 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0xa, 0xcb, 0xef, 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0, 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x5, 0x69, 0x4d},
+ {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f, 0x20, 0x5, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30, 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, 0xa, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a, 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0xf, 0x60, 0x45, 0x8b, 0xae, 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda, 0x21, 0x4, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x1, 0x24, 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44, 0x2b, 0xe, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b, 0xc0, 0xe5, 0x8a, 0xaf, 0xb, 0x2e, 0x41, 0x64, 0x9f, 0xba, 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5, 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67, 0x8, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72, 0x89, 0xac, 0xc3, 0xe6, 0x28, 0xd, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58, 0x96, 0xb3, 0xdc, 0xf9, 0x2, 0x27, 0x48, 0x6d, 0xc9, 0xec, 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93, 0x68, 0x4d, 0x22, 0x7, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0xc, 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8, 0xd7, 0xf2, 0x9, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79, 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x6, 0x69, 0x4c, 0xb7, 0x92, 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87, 0x49, 0x6c, 0x3, 0x26, 0xdd, 0xf8, 0x97, 0xb2},
+ {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85, 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0xa, 0x60, 0x46, 0x99, 0xbf, 0xd5, 0xf3, 0x1, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84, 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87, 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x9, 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x2, 0x24, 0x4e, 0x68, 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e, 0x9b, 0xbd, 0xd7, 0xf1, 0x3, 0x25, 0x4f, 0x69, 0xb6, 0x90, 0xfa, 0xdc, 0x2e, 0x8, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80, 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0xe, 0x64, 0x42, 0x9d, 0xbb, 0xd1, 0xf7, 0x5, 0x23, 0x49, 0x6f, 0x5e, 0x78, 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19, 0xeb, 0xcd, 0xa7, 0x81, 0x4, 0x22, 0x48, 0x6e, 0x9c, 0xba, 0xd0, 0xf6, 0x29, 0xf, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb, 0x9f, 0xb9, 0xd3, 0xf5, 0x7, 0x21, 0x4b, 0x6d, 0xb2, 0x94, 0xfe, 0xd8, 0x2a, 0xc, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf, 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, 0x3c, 0x1a, 0x2b, 0xd, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9, 0x6, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57, 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36, 0xc4, 0xe2, 0x88, 0xae},
+ {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0, 0x4a, 0x6d, 0x4, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48, 0x21, 0x6, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd, 0x8, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0xa, 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0xc, 0x2b, 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0xe, 0x35, 0x12, 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79, 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf, 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3, 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82, 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x3, 0xf6, 0xd1, 0xb8, 0x9f, 0x4f, 0x68, 0x1, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x7, 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x5, 0x22, 0x4b, 0x6c, 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, 0x2c, 0xb, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x9, 0x2e, 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0xf, 0x66, 0x41, 0x91, 0xb6, 0xdf, 0xf8, 0xd, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36, 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0, 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec, 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87, 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51},
+ {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2, 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1, 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb, 0x83, 0xab, 0x73, 0x5b, 0x23, 0xb, 0x8e, 0xa6, 0xde, 0xf6, 0x2e, 0x6, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, 0x22, 0xa, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x7, 0x7f, 0x57, 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d, 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3, 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, 0x16, 0x3e, 0x1, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9, 0x5c, 0x74, 0xc, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91, 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c, 0x44, 0x6c, 0x14, 0x3c, 0x3, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, 0xf3, 0xdb, 0x5e, 0x76, 0xe, 0x26, 0xfe, 0xd6, 0xae, 0x86, 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x8, 0x8d, 0xa5, 0xdd, 0xf5, 0x2d, 0x5, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12, 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3, 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9, 0x81, 0xa9, 0x71, 0x59, 0x21, 0x9, 0x8c, 0xa4, 0xdc, 0xf4, 0x2c, 0x4, 0x7c, 0x54, 0x2, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, 0xf2, 0xda, 0x5f, 0x77, 0xf, 0x27, 0xff, 0xd7, 0xaf, 0x87, 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd, 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d},
+ {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0xe, 0x27, 0x5c, 0x75, 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x9, 0x20, 0x49, 0x60, 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67, 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69, 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee, 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43, 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x4, 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x3, 0x78, 0x51, 0x71, 0x58, 0x23, 0xa, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0xd, 0x76, 0x5f, 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3, 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef, 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0xb, 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0xc, 0x77, 0x5e, 0x81, 0xa8, 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x5, 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x2, 0x79, 0x50, 0xab, 0x82, 0xf9, 0xd0, 0xf, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85, 0x5a, 0x73, 0x8, 0x21, 0x1, 0x28, 0x53, 0x7a, 0xa5, 0x8c, 0xf7, 0xde, 0x54, 0x7d, 0x6, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b, 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e, 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, 0xeb, 0xc2},
+ {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0, 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9, 0x7f, 0x55, 0x2b, 0x1, 0x29, 0x3, 0x7d, 0x57, 0x81, 0xab, 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2, 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4, 0xaa, 0x80, 0x56, 0x7c, 0x2, 0x28, 0x52, 0x78, 0x6, 0x2c, 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e, 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x7, 0x79, 0x53, 0x7b, 0x51, 0x2f, 0x5, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48, 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x4, 0x2e, 0x50, 0x7a, 0xa4, 0x8e, 0xf0, 0xda, 0xc, 0x26, 0x58, 0x72, 0xe9, 0xc3, 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40, 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0xd, 0xdb, 0xf1, 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0xf, 0x71, 0x5b, 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d, 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0xe, 0x24, 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, 0xa, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d, 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0xb, 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1, 0x77, 0x5d, 0x23, 0x9, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93, 0x8, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde},
+ {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0xd, 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e, 0x9, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67, 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe, 0x2f, 0x4, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3, 0x57, 0x7c, 0x1, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3, 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0, 0x71, 0x5a, 0x27, 0xc, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, 0xe1, 0xca, 0x5e, 0x75, 0x8, 0x23, 0xf2, 0xd9, 0xa4, 0x8f, 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff, 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x5, 0x24, 0xf, 0x72, 0x59, 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x2, 0x29, 0x54, 0x7f, 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x6, 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15, 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0xb, 0x20, 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33, 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58, 0x25, 0xe, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1, 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, 0x3, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee, 0x7a, 0x51, 0x2c, 0x7, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e, 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d, 0x5c, 0x77, 0xa, 0x21},
+ {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9, 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab, 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d, 0x59, 0x75, 0x1, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x8, 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7, 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3, 0x97, 0xbb, 0x7f, 0x53, 0x27, 0xb, 0xb2, 0x9e, 0xea, 0xc6, 0x2, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c, 0x26, 0xa, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77, 0x3, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8, 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47, 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55, 0x21, 0xd, 0xc9, 0xe5, 0x91, 0xbd, 0x4, 0x28, 0x5c, 0x70, 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3, 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0xc, 0x78, 0x54, 0xed, 0xc1, 0xb5, 0x99, 0x5d, 0x71, 0x5, 0x29, 0x4c, 0x60, 0x14, 0x38, 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x6, 0x2a, 0x5e, 0x72, 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0xf, 0xa5, 0x89, 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac, 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x7, 0x2b, 0xef, 0xc3, 0xb7, 0x9b, 0x22, 0xe, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6},
+ {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d, 0x5e, 0x73, 0x4, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x6, 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0xa, 0xbc, 0x91, 0xe6, 0xcb, 0x8, 0x25, 0x52, 0x7f, 0x23, 0xe, 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0xc, 0x21, 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39, 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d, 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31, 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f, 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x3, 0x2e, 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x1, 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, 0x7, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x5, 0x72, 0x5f, 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x9, 0xbf, 0x92, 0xe5, 0xc8, 0xb, 0x26, 0x51, 0x7c, 0x20, 0xd, 0x7a, 0x57, 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0xf, 0x22, 0xe1, 0xcc, 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f, 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b, 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64, 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3, 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7, 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19},
+ {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10, 0xb7, 0x99, 0xeb, 0xc5, 0xf, 0x21, 0x53, 0x7d, 0xa9, 0x87, 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6, 0x7c, 0x52, 0x20, 0xe, 0x73, 0x5d, 0x2f, 0x1, 0xcb, 0xe5, 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4, 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0xc, 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7, 0x2d, 0x3, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x2, 0x2c, 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12, 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0xd, 0x23, 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x8, 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39, 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x7, 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45, 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x6, 0x28, 0xe2, 0xcc, 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x9, 0x27, 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff, 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce, 0x4, 0x2a, 0x58, 0x76, 0xb, 0x25, 0x57, 0x79, 0xb3, 0x9d, 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac, 0x78, 0x56, 0x24, 0xa, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b, 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0, 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, 0x2b, 0x5},
+ {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5, 0x94, 0xbb, 0x76, 0x59, 0x28, 0x7, 0xaf, 0x80, 0xf1, 0xde, 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0xe, 0x21, 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x9, 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0xf, 0x20, 0x51, 0x7e, 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x8, 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9, 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92, 0x5f, 0x70, 0x1, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, 0xae, 0x81, 0x29, 0x6, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4, 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54, 0x25, 0xa, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5, 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0xd, 0x22, 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x4, 0x75, 0x5a, 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72, 0x3, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49, 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x5, 0xc8, 0xe7, 0x96, 0xb9, 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91, 0xe0, 0xcf, 0x2, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9, 0x24, 0xb, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, 0x1f, 0x30, 0x52, 0x7d, 0xc, 0x23, 0xee, 0xc1, 0xb0, 0x9f, 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa},
+ {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a, 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3, 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x9, 0x39, 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x4, 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0xc, 0x1, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b, 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76, 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf, 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x5, 0x55, 0x65, 0x68, 0x58, 0x8, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75, 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, 0x18, 0x28, 0x2, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92, 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0xf, 0x6b, 0x5b, 0xb, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6, 0x36, 0x6, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41, 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14, 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce, 0x5e, 0x6e, 0x3e, 0xe, 0x3, 0x33, 0x63, 0x53, 0xc3, 0xf3, 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x7, 0x57, 0x67, 0x6a, 0x5a, 0xa, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0, 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d, 0x8d, 0xbd, 0xed, 0xdd},
+ {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2, 0x37, 0x6, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93, 0xc0, 0xf1, 0x66, 0x57, 0x4, 0x35, 0x6e, 0x5f, 0xc, 0x3d, 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0xe, 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0xa, 0x9d, 0xac, 0xff, 0xce, 0xcc, 0xfd, 0xae, 0x9f, 0x8, 0x39, 0x6a, 0x5b, 0xdc, 0xed, 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a, 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9, 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16, 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6, 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x3, 0x32, 0x30, 0x1, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3, 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x5, 0x7, 0x36, 0x65, 0x54, 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0xf, 0x3e, 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0xd, 0x9a, 0xab, 0xf8, 0xc9, 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x9, 0x5a, 0x6b, 0x69, 0x58, 0xb, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a, 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9, 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26, 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1, 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22},
+ {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x7, 0x35, 0x63, 0x51, 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, 0x26, 0x14, 0xe, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90, 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x9, 0x3b, 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2, 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0xf, 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4, 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x8, 0x12, 0x20, 0x76, 0x44, 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, 0x33, 0x1, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b, 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x6, 0x38, 0xa, 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3, 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0xd, 0x5b, 0x69, 0xf7, 0xc5, 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c, 0x36, 0x4, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89, 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x3, 0x55, 0x67, 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba, 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x5, 0x37, 0x23, 0x11, 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8, 0x66, 0x54, 0x2, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0xb, 0x39, 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92, 0xc4, 0xf6, 0x68, 0x5a, 0xc, 0x3e},
+ {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e, 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0xb, 0x2e, 0x1d, 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe, 0x67, 0x54, 0x1, 0x32, 0x39, 0xa, 0x5f, 0x6c, 0xf5, 0xc6, 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25, 0x5c, 0x6f, 0x3a, 0x9, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea, 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e, 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x2, 0x31, 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb, 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x8, 0x5d, 0x6e, 0x65, 0x56, 0x3, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5, 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, 0x12, 0x21, 0x3d, 0xe, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4, 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x5, 0x36, 0x2a, 0x19, 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3, 0x5a, 0x69, 0x3c, 0xf, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18, 0x4, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7, 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x7, 0x34, 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0xc, 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef, 0xca, 0xf9, 0xac, 0x9f, 0x6, 0x35, 0x60, 0x53, 0x4f, 0x7c, 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88, 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0xd, 0x94, 0xa7, 0xf2, 0xc1},
+ {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31, 0x67, 0x53, 0xf, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86, 0xa, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff, 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20, 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd, 0x51, 0x65, 0x39, 0xd, 0x3c, 0x8, 0x54, 0x60, 0xec, 0xd8, 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x2, 0x5e, 0x6a, 0x5b, 0x6f, 0x33, 0x7, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b, 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae, 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19, 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96, 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24, 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x1, 0x35, 0x69, 0x5d, 0x6c, 0x58, 0x4, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82, 0xde, 0xea, 0x66, 0x52, 0xe, 0x3a, 0xb, 0x3f, 0x63, 0x57, 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf, 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70, 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0xc, 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x9, 0x55, 0x61, 0x37, 0x3, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb, 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x6},
+ {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49, 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0xf, 0x50, 0x65, 0x5b, 0x6e, 0x31, 0x4, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6, 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a, 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83, 0xdc, 0xe9, 0x62, 0x57, 0x8, 0x3d, 0x3, 0x36, 0x69, 0x5c, 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11, 0x58, 0x6d, 0x32, 0x7, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8, 0x87, 0xb2, 0x39, 0xc, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0, 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x9, 0x56, 0x63, 0x5d, 0x68, 0x37, 0x2, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44, 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b, 0x10, 0x25, 0x7a, 0x4f, 0x6, 0x33, 0x6c, 0x59, 0xd2, 0xe7, 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0xd, 0x38, 0x5e, 0x6b, 0x34, 0x1, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde, 0x81, 0xb4, 0x3f, 0xa, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76, 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0xe, 0x3b, 0x5, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2, 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d, 0xa6, 0x93, 0xcc, 0xf9},
+ {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f, 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc, 0x86, 0xb0, 0x32, 0x4, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4, 0x56, 0x60, 0x3a, 0xc, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b, 0x64, 0x52, 0x8, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x1, 0x37, 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6, 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x5, 0x5f, 0x69, 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0xd, 0x22, 0x14, 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92, 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x9, 0x3f, 0xbd, 0x8b, 0xd1, 0xe7, 0x2, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80, 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73, 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2, 0x30, 0x6, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, 0x38, 0xe, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3, 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50, 0xa, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x3, 0x35, 0x6f, 0x59, 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6, 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x7, 0x5d, 0x6b, 0x8d, 0xbb, 0xe1, 0xd7, 0x55, 0x63, 0x39, 0xf, 0x20, 0x16, 0x4c, 0x7a, 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, 0x7e, 0x48, 0x67, 0x51, 0xb, 0x3d, 0xbf, 0x89, 0xd3, 0xe5},
+ {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0xe, 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b, 0xb, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce, 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x5, 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0xf, 0x56, 0x61, 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84, 0xdd, 0xea, 0x6f, 0x58, 0x1, 0x36, 0xef, 0xd8, 0x81, 0xb6, 0x33, 0x4, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0xa, 0x3d, 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5, 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x7, 0x27, 0x10, 0x49, 0x7e, 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x9, 0x3e, 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5, 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe, 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0xc, 0x7b, 0x4c, 0x15, 0x22, 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x2, 0x35, 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46, 0x66, 0x51, 0x8, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3, 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x6, 0x5f, 0x68, 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x3, 0x34, 0xb1, 0x86, 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d, 0x3a, 0xd, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8, 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a},
+ {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0xf, 0x7a, 0x42, 0xa, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b, 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6, 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81, 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0xe, 0x7b, 0x43, 0xb, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x1, 0x39, 0x71, 0x49, 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x4, 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d, 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a, 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7, 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24, 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13, 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x2, 0x3a, 0x72, 0x4a, 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x7, 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0xd, 0x78, 0x40, 0x8, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf, 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62, 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25, 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0xc, 0x79, 0x41, 0x9, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x3, 0x3b, 0x73, 0x4b, 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x6, 0x4e, 0x76},
+ {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a, 0xb7, 0x8e, 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29, 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x1, 0x38, 0x97, 0xae, 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x9, 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28, 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad, 0x2, 0x3b, 0x70, 0x49, 0x33, 0xa, 0x41, 0x78, 0xd7, 0xee, 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe, 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac, 0xe7, 0xde, 0x71, 0x48, 0x3, 0x3a, 0x40, 0x79, 0x32, 0xb, 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58, 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0xc, 0x47, 0x7e, 0x4, 0x3d, 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d, 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0xd, 0x77, 0x4e, 0x5, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c, 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b, 0x24, 0x1d, 0x56, 0x6f, 0x37, 0xe, 0x45, 0x7c, 0xd3, 0xea, 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x6, 0x3f, 0x74, 0x4d, 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c, 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0xf, 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, 0x7, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c, 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89},
+ {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x4, 0xa2, 0x98, 0xd6, 0xec, 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4, 0xaa, 0x90, 0x36, 0xc, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda, 0x7c, 0x46, 0x8, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80, 0xeb, 0xd1, 0x9f, 0xa5, 0x3, 0x39, 0x77, 0x4d, 0xa1, 0x9b, 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x7, 0x6c, 0x56, 0x18, 0x22, 0x84, 0xbe, 0xf0, 0xca, 0x35, 0xf, 0x41, 0x7b, 0xdd, 0xe7, 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e, 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45, 0xb, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x2, 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d, 0x6, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65, 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc, 0x7a, 0x40, 0xe, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0xa, 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3, 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d, 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x1, 0xed, 0xd7, 0x99, 0xa3, 0x5, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, 0xbc, 0x86, 0x79, 0x43, 0xd, 0x37, 0x91, 0xab, 0xe5, 0xdf, 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4, 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x9, 0x47, 0x7d, 0xdb, 0xe1, 0xaf, 0x95},
+ {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64, 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0xd, 0x36, 0x52, 0x69, 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x8, 0x45, 0x7e, 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x5, 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d, 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee, 0x4f, 0x74, 0x39, 0x2, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, 0x6b, 0x50, 0x34, 0xf, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95, 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab, 0xe6, 0xdd, 0x7c, 0x47, 0xa, 0x31, 0xc2, 0xf9, 0xb4, 0x8f, 0x2e, 0x15, 0x58, 0x63, 0x7, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d, 0x9, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60, 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3, 0x72, 0x49, 0x4, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, 0x65, 0x5e, 0x3a, 0x1, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b, 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96, 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0xc, 0xaa, 0x91, 0xdc, 0xe7, 0x46, 0x7d, 0x30, 0xb, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, 0xf5, 0xce, 0x3d, 0x6, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c, 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2, 0xef, 0xd4, 0x75, 0x4e, 0x3, 0x38, 0x5c, 0x67, 0x2a, 0x11, 0xb0, 0x8b, 0xc6, 0xfd, 0xe, 0x35, 0x78, 0x43, 0xe2, 0xd9, 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a},
+ {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3, 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67, 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x8, 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d, 0x39, 0x5, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, 0x33, 0xf, 0x46, 0x7a, 0x3e, 0x2, 0xb6, 0x8a, 0xce, 0xf2, 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d, 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c, 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x7, 0x3b, 0x72, 0x4e, 0xa, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57, 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2, 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, 0x4, 0x38, 0x71, 0x4d, 0x9, 0x35, 0x81, 0xbd, 0xf9, 0xc5, 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0xc, 0x45, 0x79, 0x3d, 0x1, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b, 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64, 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0xb, 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e, 0x3a, 0x6, 0x42, 0x7e, 0x3, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0xe, 0x32, 0x76, 0x4a, 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25, 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad},
+ {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x3, 0x3e, 0x79, 0x44, 0x2, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce, 0x89, 0xb4, 0x7, 0x3a, 0x7d, 0x40, 0x6, 0x3b, 0x7c, 0x41, 0xf2, 0xcf, 0x88, 0xb5, 0x4, 0x39, 0x7e, 0x43, 0xf0, 0xcd, 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x5, 0x38, 0x7f, 0x42, 0xfb, 0xc6, 0x81, 0xbc, 0xf, 0x32, 0x75, 0x48, 0xe, 0x33, 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0xc, 0x31, 0x76, 0x4b, 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0xd, 0x30, 0x77, 0x4a, 0x8, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb, 0xfd, 0xc0, 0x87, 0xba, 0x9, 0x34, 0x73, 0x4e, 0xff, 0xc2, 0x85, 0xb8, 0xb, 0x36, 0x71, 0x4c, 0xa, 0x37, 0x70, 0x4d, 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad, 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4, 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f, 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c, 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d, 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2, 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1, 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b, 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53, 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, 0x6f, 0x52},
+ {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x1, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68, 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, 0x17, 0x29, 0x7e, 0x40, 0x2, 0x3c, 0x86, 0xb8, 0xfa, 0xc4, 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x3, 0x3b, 0x5, 0x47, 0x79, 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x4, 0x3a, 0x78, 0x46, 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96, 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x7, 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x6, 0x38, 0x76, 0x48, 0xa, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5, 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3, 0x49, 0x77, 0x35, 0xb, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f, 0x8, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c, 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d, 0x37, 0x9, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0xf, 0xb5, 0x8b, 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a, 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0xe, 0x30, 0x67, 0x59, 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c, 0x26, 0x18, 0x5a, 0x64, 0x33, 0xd, 0x4f, 0x71, 0xcb, 0xf5, 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3, 0xf4, 0xca, 0x88, 0xb6, 0xc, 0x32, 0x70, 0x4e},
+ {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, 0x55, 0x6a, 0x32, 0xd, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f, 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0xe, 0x56, 0x69, 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25, 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, 0x3, 0x3c, 0x7b, 0x44, 0x5, 0x3a, 0x87, 0xb8, 0xf9, 0xc6, 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93, 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x8, 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0xb, 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90, 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5, 0x84, 0xbb, 0x6, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7, 0xa, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c, 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x7, 0x46, 0x79, 0x45, 0x7a, 0x3b, 0x4, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1, 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, 0x10, 0x2f, 0x77, 0x48, 0x9, 0x36, 0x8b, 0xb4, 0xf5, 0xca, 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0xf, 0x30, 0x68, 0x57, 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b, 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, 0x3d, 0x2, 0x3e, 0x1, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83, 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6, 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0xc, 0x33, 0x72, 0x4d, 0xf0, 0xcf, 0x8e, 0xb1},
+ {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7, 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0xe, 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28, 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, 0x4f, 0xf, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x1, 0x41, 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d, 0x4d, 0xd, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37, 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, 0x24, 0x64, 0x83, 0xc3, 0x3, 0x43, 0x9e, 0xde, 0x1e, 0x5e, 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f, 0x9f, 0xdf, 0x2, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91, 0x4c, 0xc, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, 0xf6, 0xb6, 0x87, 0xc7, 0x7, 0x47, 0x9a, 0xda, 0x1a, 0x5a, 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3, 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x9, 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x8, 0xc8, 0x88, 0x1b, 0x5b, 0x9b, 0xdb, 0x6, 0x46, 0x86, 0xc6, 0x21, 0x61, 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0xa, 0xca, 0x8a, 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3, 0x4, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2, 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58, 0x85, 0xc5, 0x5, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, 0x4b, 0xb, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31},
+ {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7, 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0xe, 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0xb, 0xd1, 0x90, 0x53, 0x12, 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed, 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d, 0x87, 0xc6, 0x5, 0x44, 0x8d, 0xcc, 0xf, 0x4e, 0x94, 0xd5, 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65, 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a, 0x59, 0x18, 0xc2, 0x83, 0x40, 0x1, 0x45, 0x4, 0xc7, 0x86, 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb, 0x13, 0x52, 0x91, 0xd0, 0xa, 0x4b, 0x88, 0xc9, 0x7, 0x46, 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6, 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x9, 0xca, 0x8b, 0xcf, 0x8e, 0x4d, 0xc, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc, 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68, 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, 0x2, 0x43, 0x8a, 0xcb, 0x8, 0x49, 0x93, 0xd2, 0x11, 0x50, 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf, 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f, 0xc5, 0x84, 0x47, 0x6, 0x42, 0x3, 0xc0, 0x81, 0x5b, 0x1a, 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa, 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55, 0x96, 0xd7, 0xd, 0x4c, 0x8f, 0xce},
+ {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x3, 0xc5, 0x87, 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea, 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x6, 0x44, 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x5, 0x4d, 0xf, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25, 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf, 0xc, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36, 0xcf, 0x8d, 0x4b, 0x9, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3, 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d, 0x8e, 0xcc, 0xa, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, 0xb, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63, 0xce, 0x8c, 0x4a, 0x8, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6, 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4, 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0xd, 0x4f, 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5, 0x4c, 0xe, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95, 0x53, 0x11, 0xc2, 0x80, 0x46, 0x4, 0xfd, 0xbf, 0x79, 0x3b, 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x7, 0x45, 0x96, 0xd4, 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a, 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17, 0xd1, 0x93, 0x40, 0x2, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed, 0x3e, 0x7c, 0xba, 0xf8, 0x1, 0x43, 0x85, 0xc7, 0x14, 0x56, 0x90, 0xd2},
+ {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x7, 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3, 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0xe, 0x4d, 0x99, 0xda, 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e, 0xcc, 0x8f, 0x4a, 0x9, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad, 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0xd, 0x4e, 0x8b, 0xc8, 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, 0xb8, 0xfb, 0x49, 0xa, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d, 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6, 0x3, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62, 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x4, 0xd0, 0x93, 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37, 0x1a, 0x59, 0x9c, 0xdf, 0xb, 0x48, 0x8d, 0xce, 0x38, 0x7b, 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b, 0x4f, 0xc, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x5, 0x46, 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95, 0x50, 0x13, 0xc7, 0x84, 0x41, 0x2, 0xf4, 0xb7, 0x72, 0x31, 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x6, 0x45, 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1, 0x53, 0x10, 0xd5, 0x96, 0x42, 0x1, 0xc4, 0x87, 0x71, 0x32, 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a, 0x8e, 0xcd, 0x8, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0xf, 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d},
+ {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef, 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36, 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90, 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x2, 0xce, 0x8a, 0x4b, 0xf, 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11, 0xca, 0x8e, 0x42, 0x6, 0xc7, 0x83, 0x4f, 0xb, 0xe4, 0xa0, 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32, 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63, 0x8c, 0xc8, 0x4, 0x40, 0x81, 0xc5, 0x9, 0x4d, 0x96, 0xd2, 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71, 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, 0x22, 0x66, 0x89, 0xcd, 0x1, 0x45, 0x84, 0xc0, 0xc, 0x48, 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91, 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x3, 0xc2, 0x86, 0x4a, 0xe, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a, 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33, 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95, 0x54, 0x10, 0xdc, 0x98, 0x43, 0x7, 0xcb, 0x8f, 0x4e, 0xa, 0xc6, 0x82, 0x5, 0x41, 0x8d, 0xc9, 0x8, 0x4c, 0x80, 0xc4, 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75, 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7, 0x26, 0x62, 0xae, 0xea},
+ {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4, 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73, 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0xd, 0xc2, 0x87, 0x41, 0x4, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa, 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5, 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x8, 0x4d, 0x8b, 0xce, 0x1, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60, 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f, 0x40, 0x5, 0xc3, 0x86, 0x49, 0xc, 0xfc, 0xb9, 0x76, 0x33, 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb, 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c, 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0xb, 0x4e, 0x81, 0xc4, 0x2, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1, 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x6, 0xc9, 0x8c, 0x4a, 0xf, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62, 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, 0x3c, 0x79, 0x89, 0xcc, 0x3, 0x46, 0x80, 0xc5, 0xa, 0x4f, 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0, 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38, 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0xe, 0xc8, 0x8d, 0x42, 0x7, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15},
+ {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde, 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7, 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a, 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc, 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95, 0x44, 0x2, 0xc8, 0x8e, 0x41, 0x7, 0xcd, 0x8b, 0x4e, 0x8, 0xc2, 0x84, 0x4b, 0xd, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2, 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3, 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6, 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60, 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71, 0x88, 0xce, 0x4, 0x42, 0x8d, 0xcb, 0x1, 0x47, 0x82, 0xc4, 0xe, 0x48, 0x87, 0xc1, 0xb, 0x4d, 0x9c, 0xda, 0x10, 0x56, 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f, 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2, 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24, 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d, 0xcc, 0x8a, 0x40, 0x6, 0xc9, 0x8f, 0x45, 0x3, 0xc6, 0x80, 0x4a, 0xc, 0xc3, 0x85, 0x4f, 0x9},
+ {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca, 0x4, 0x43, 0x8a, 0xcd, 0x5, 0x42, 0x8b, 0xcc, 0x6, 0x41, 0x88, 0xcf, 0x7, 0x40, 0x89, 0xce, 0x8, 0x4f, 0x86, 0xc1, 0x9, 0x4e, 0x87, 0xc0, 0xa, 0x4d, 0x84, 0xc3, 0xb, 0x4c, 0x85, 0xc2, 0xc, 0x4b, 0x82, 0xc5, 0xd, 0x4a, 0x83, 0xc4, 0xe, 0x49, 0x80, 0xc7, 0xf, 0x48, 0x81, 0xc6, 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55, 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd, 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0, 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b, 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7, 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea, 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61, 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1, 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4, 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77, 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb, 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe, 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d, 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5, 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, 0xb1, 0xf6},
+ {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f, 0xf4, 0xbc, 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56, 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a, 0x1, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33, 0xeb, 0xa3, 0x46, 0xe, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f, 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, 0x20, 0x68, 0x3, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6, 0x79, 0x31, 0xe9, 0xa1, 0x44, 0xc, 0xd4, 0x9c, 0x2, 0x4a, 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0, 0x45, 0xd, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69, 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1, 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x7, 0x4f, 0x97, 0xdf, 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x8, 0xd0, 0x98, 0x6, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3, 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x9, 0xd1, 0x99, 0xf2, 0xba, 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50, 0xb5, 0xfd, 0x25, 0x6d, 0x4, 0x4c, 0x94, 0xdc, 0x39, 0x71, 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0xb, 0xd3, 0x9b, 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2, 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29, 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, 0x26, 0x6e, 0x5, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0, 0x7f, 0x37, 0xef, 0xa7, 0x42, 0xa, 0xd2, 0x9a},
+ {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, 0x4f, 0x6, 0x96, 0xdf, 0x4, 0x4d, 0xaf, 0xe6, 0x3d, 0x74, 0xd5, 0x9c, 0x47, 0xe, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee, 0x35, 0x7c, 0x9e, 0xd7, 0xc, 0x45, 0x31, 0x78, 0xa3, 0xea, 0x8, 0x41, 0x9a, 0xd3, 0x43, 0xa, 0xd1, 0x98, 0x7a, 0x33, 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55, 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a, 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa, 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2, 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd, 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8, 0x4a, 0x3, 0xd8, 0x91, 0x1, 0x48, 0x93, 0xda, 0x38, 0x71, 0xaa, 0xe3, 0x97, 0xde, 0x5, 0x4c, 0xae, 0xe7, 0x3c, 0x75, 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x7, 0xa6, 0xef, 0x34, 0x7d, 0x9f, 0xd6, 0xd, 0x44, 0xd4, 0x9d, 0x46, 0xf, 0xed, 0xa4, 0x7f, 0x36, 0x42, 0xb, 0xd0, 0x99, 0x7b, 0x32, 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x9, 0x40, 0x9b, 0xd2, 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff, 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb, 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3, 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc, 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c, 0xbe, 0xf7, 0x2c, 0x65},
+ {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81, 0xd4, 0x9e, 0x40, 0xa, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4, 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b, 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x1, 0xea, 0xa0, 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a, 0xb, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d, 0xe3, 0xa9, 0x42, 0x8, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3, 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, 0x2, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22, 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2, 0x3c, 0x76, 0x9d, 0xd7, 0x9, 0x43, 0x16, 0x5c, 0x82, 0xc8, 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x3, 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x5, 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70, 0xae, 0xe4, 0xf, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e, 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x4, 0x4e, 0x90, 0xda, 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf, 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0xe, 0x99, 0xd3, 0xd, 0x47, 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, 0x52, 0x18, 0x4d, 0x7, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6, 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66, 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0xc, 0xd2, 0x98, 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, 0x59, 0x13, 0x92, 0xd8, 0x6, 0x4c, 0xa7, 0xed, 0x33, 0x79},
+ {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19, 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, 0x1, 0x4a, 0x95, 0xde, 0x3, 0x48, 0xa4, 0xef, 0x32, 0x79, 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a, 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee, 0x2, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x6, 0x4d, 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9, 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda, 0x7, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f, 0x93, 0xd8, 0x5, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a, 0x4, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25, 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0xc, 0x47, 0x9a, 0xd1, 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, 0xd, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24, 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2, 0xf, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2, 0xe, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5, 0x3b, 0x70, 0xad, 0xe6, 0xa, 0x41, 0x9c, 0xd7, 0x9d, 0xd6, 0xb, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22, 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x9, 0x42, 0x8, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21, 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86},
+ {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x1, 0x4d, 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39, 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb, 0x2, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, 0x74, 0x38, 0x9b, 0xd7, 0x3, 0x4f, 0xb6, 0xfa, 0x2e, 0x62, 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc, 0x28, 0x64, 0x9d, 0xd1, 0x5, 0x49, 0x5e, 0x12, 0xc6, 0x8a, 0x73, 0x3f, 0xeb, 0xa7, 0x4, 0x48, 0x9c, 0xd0, 0x29, 0x65, 0xb1, 0xfd, 0x9f, 0xd3, 0x7, 0x4b, 0xb2, 0xfe, 0x2a, 0x66, 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67, 0xb3, 0xff, 0x6, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5, 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, 0x7c, 0x30, 0x93, 0xdf, 0xb, 0x47, 0xbe, 0xf2, 0x26, 0x6a, 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b, 0xbf, 0xf3, 0xa, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68, 0x91, 0xdd, 0x9, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, 0x53, 0x1f, 0x8, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1, 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f, 0xbb, 0xf7, 0xe, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad, 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0xf, 0x43, 0xba, 0xf6, 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34, 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0xc, 0x40, 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36, 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, 0xd, 0x41},
+ {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9, 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21, 0xdf, 0x92, 0x45, 0x8, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, 0xe6, 0xab, 0x7, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9, 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0xf, 0xa3, 0xee, 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d, 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, 0x4b, 0x6, 0xe, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0, 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2, 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x1, 0xad, 0xe0, 0x37, 0x7a, 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, 0xe8, 0xa5, 0x9, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7, 0x49, 0x4, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56, 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a, 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, 0xc, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2, 0x4e, 0x3, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5, 0x22, 0x6f, 0x91, 0xdc, 0xb, 0x46, 0xea, 0xa7, 0x70, 0x3d, 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x2, 0x4f, 0x47, 0xa, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58, 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61, 0x9f, 0xd2, 0x5, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec, 0x40, 0xd, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe},
+ {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x8, 0x46, 0xb1, 0xff, 0x2d, 0x63, 0xde, 0x90, 0x42, 0xc, 0xfb, 0xb5, 0x67, 0x29, 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31, 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73, 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x1, 0xd3, 0x9d, 0x20, 0x6e, 0xbc, 0xf2, 0x5, 0x4b, 0x99, 0xd7, 0xfe, 0xb0, 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x9, 0xb4, 0xfa, 0x28, 0x66, 0x91, 0xdf, 0xd, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2, 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf, 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x6, 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x2, 0x4c, 0xbb, 0xf5, 0x27, 0x69, 0x40, 0xe, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7, 0xa, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf, 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79, 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8, 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x7, 0x49, 0xf4, 0xba, 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x3, 0x2a, 0x64, 0xb6, 0xf8, 0xf, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0xb, 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c, 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51, 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87, 0x70, 0x3e, 0xec, 0xa2},
+ {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2, 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89, 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4, 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, 0xe8, 0xa7, 0x91, 0xde, 0xf, 0x40, 0xb0, 0xff, 0x2e, 0x61, 0xd3, 0x9c, 0x4d, 0x2, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65, 0xb4, 0xfb, 0xb, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9, 0x49, 0x6, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c, 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32, 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a, 0x9a, 0xd5, 0x4, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, 0x46, 0x9, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4, 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f, 0x4e, 0x1, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0xc, 0x43, 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0xe, 0xdf, 0x90, 0x60, 0x2f, 0xfe, 0xb1, 0x3, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3, 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8, 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf, 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0xa, 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x7, 0x48, 0x6b, 0x24, 0xf5, 0xba, 0x4a, 0x5, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8, 0x8, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d},
+ {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99, 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f, 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb, 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x1, 0x51, 0xa1, 0xf1, 0x5c, 0xc, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, 0x44, 0x14, 0x3, 0x53, 0xa3, 0xf3, 0x5e, 0xe, 0xfe, 0xae, 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a, 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b, 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, 0x2c, 0x7c, 0x2, 0x52, 0xa2, 0xf2, 0x5f, 0xf, 0xff, 0xaf, 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f, 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25, 0x88, 0xd8, 0x28, 0x78, 0x6, 0x56, 0xa6, 0xf6, 0x5b, 0xb, 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11, 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x7, 0x57, 0xa7, 0xf7, 0x5a, 0xa, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24, 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b, 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef, 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x5, 0x55, 0xa5, 0xf5, 0x58, 0x8, 0xf8, 0xa8, 0x4, 0x54, 0xa4, 0xf4, 0x59, 0x9, 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13, 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87, 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a},
+ {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3, 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3, 0x50, 0x1, 0xab, 0xfa, 0x9, 0x58, 0x40, 0x11, 0xe2, 0xb3, 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93, 0xf9, 0xa8, 0x5b, 0xa, 0xa0, 0xf1, 0x2, 0x53, 0x4b, 0x1a, 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73, 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, 0xc9, 0x98, 0xb, 0x5a, 0xa9, 0xf8, 0x52, 0x3, 0xf0, 0xa1, 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23, 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33, 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, 0x14, 0x45, 0x5d, 0xc, 0xff, 0xae, 0x4, 0x55, 0xa6, 0xf7, 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75, 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee, 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0xd, 0x5c, 0xf6, 0xa7, 0x54, 0x5, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce, 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47, 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x6, 0x57, 0xfd, 0xac, 0x5f, 0xe, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77, 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x7, 0xf4, 0xa5, 0xf, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e, 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, 0xd4, 0x85},
+ {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9, 0x49, 0x1b, 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15, 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b, 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23, 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf, 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3, 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9, 0xf, 0x5d, 0xfe, 0xac, 0x5a, 0x8, 0x1, 0x53, 0xa5, 0xf7, 0x54, 0x6, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb, 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a, 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd, 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43, 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb, 0xd, 0x5f, 0xfc, 0xae, 0x58, 0xa, 0x3, 0x51, 0xa7, 0xf5, 0x56, 0x4, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42, 0x2, 0x50, 0xa6, 0xf4, 0x57, 0x5, 0xf3, 0xa1, 0xa8, 0xfa, 0xc, 0x5e, 0xfd, 0xaf, 0x59, 0xb, 0xd9, 0x8b, 0x7d, 0x2f, 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33, 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99},
+ {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6, 0x59, 0xa, 0xff, 0xac, 0x8, 0x5b, 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0xe, 0xaa, 0xf9, 0xc, 0x5f, 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43, 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e, 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd, 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73, 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77, 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd, 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63, 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x7, 0xa3, 0xf0, 0x5, 0x56, 0x50, 0x3, 0xf6, 0xa5, 0x1, 0x52, 0xa7, 0xf4, 0xab, 0xf8, 0xd, 0x5e, 0xfa, 0xa9, 0x5c, 0xf, 0x9, 0x5a, 0xaf, 0xfc, 0x58, 0xb, 0xfe, 0xad, 0x40, 0x13, 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17, 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f, 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a, 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27, 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d, 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33, 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37, 0x93, 0xc0, 0x35, 0x66},
+ {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b, 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7, 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x2, 0x52, 0x6, 0xfa, 0xae, 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca, 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x4, 0x50, 0xa4, 0xf0, 0xc, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2, 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0xe, 0xf2, 0xa6, 0xf6, 0xa2, 0x5e, 0xa, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38, 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23, 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x8, 0x5c, 0xa0, 0xf4, 0x55, 0x1, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4, 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28, 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a, 0xab, 0xff, 0x3, 0x57, 0x7, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c, 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0, 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x5, 0xf1, 0xa5, 0x59, 0xd, 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69, 0x42, 0x16, 0xea, 0xbe, 0xf, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7, 0xb, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5, 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x9, 0xf5, 0xa1},
+ {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6, 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x1, 0x54, 0xe2, 0xb7, 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4, 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x3, 0x56, 0x4b, 0x1e, 0xe1, 0xb4, 0x2, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26, 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, 0x7, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0, 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a, 0xe5, 0xb0, 0x6, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69, 0xdf, 0x8a, 0x75, 0x20, 0x4, 0x51, 0xae, 0xfb, 0x4d, 0x18, 0xe7, 0xb2, 0xaf, 0xfa, 0x5, 0x50, 0xe6, 0xb3, 0x4c, 0x19, 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80, 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8, 0xe, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, 0xf, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8, 0xa7, 0xf2, 0xd, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60, 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61, 0xd7, 0x82, 0x7d, 0x28, 0xc, 0x59, 0xa6, 0xf3, 0x45, 0x10, 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87, 0xa3, 0xf6, 0x9, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x8, 0x5d, 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65, 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0xa, 0x5f, 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67, 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd, 0x42, 0x17, 0xa1, 0xf4, 0xb, 0x5e},
+ {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35, 0x9, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6, 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44, 0xbe, 0xe8, 0x57, 0x1, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62, 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x8, 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e, 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8, 0x2, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7, 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0xb, 0x5d, 0xe2, 0xb4, 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89, 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x3, 0x3f, 0x69, 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f, 0xf0, 0xa6, 0x5c, 0xa, 0x48, 0x1e, 0xe4, 0xb2, 0xd, 0x5b, 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d, 0x41, 0x17, 0xed, 0xbb, 0x4, 0x52, 0xa8, 0xfe, 0xcb, 0x9d, 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0xc, 0xf6, 0xa0, 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, 0x39, 0x6f, 0x53, 0x5, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec, 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a, 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c, 0xa3, 0xf5, 0xf, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x6, 0x50, 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2, 0x58, 0xe, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d, 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x7, 0xb8, 0xee, 0x14, 0x42},
+ {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e, 0xb7, 0xe0, 0x58, 0xf, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62, 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x8, 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe, 0x7, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d, 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, 0x9, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5, 0xff, 0xa8, 0x51, 0x6, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x1, 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d, 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0xe, 0x59, 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75, 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d, 0xe4, 0xb3, 0xb, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28, 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x4, 0xfd, 0xaa, 0x12, 0x45, 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x3, 0xbb, 0xec, 0x15, 0x42, 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4, 0x4d, 0x1a, 0xa2, 0xf5, 0xc, 0x5b, 0x61, 0x36, 0xcf, 0x98, 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x2, 0x55, 0xed, 0xba, 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96, 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0xd, 0x37, 0x60, 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67, 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0xa, 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f, 0x5, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd},
+ {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x1, 0x94, 0xcc, 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86, 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x2, 0x5a, 0x35, 0x6d, 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce, 0x5b, 0x3, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16, 0x79, 0x21, 0xc9, 0x91, 0x4, 0x5c, 0xb4, 0xec, 0x6a, 0x32, 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78, 0xed, 0xb5, 0x5d, 0x5, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, 0x81, 0xd9, 0xb6, 0xee, 0x6, 0x5e, 0xcb, 0x93, 0x7b, 0x23, 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x7, 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3, 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x9, 0x9c, 0xc4, 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67, 0x8, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c, 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6, 0x53, 0xb, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0xa, 0x52, 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0xd, 0x62, 0x3a, 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99, 0xc, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, 0x46, 0x1e, 0x57, 0xf, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2, 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6, 0xe, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac, 0x39, 0x61, 0x89, 0xd1},
+ {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60, 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0xb, 0x52, 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x4, 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84, 0xe4, 0xbd, 0x56, 0xf, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a, 0x71, 0x28, 0xba, 0xe3, 0x8, 0x51, 0x31, 0x68, 0x83, 0xda, 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x3, 0x5a, 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0xc, 0xe7, 0xbe, 0xde, 0x87, 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e, 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x7, 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x9, 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b, 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b, 0xe9, 0xb0, 0x5b, 0x2, 0x74, 0x2d, 0xc6, 0x9f, 0xd, 0x54, 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14, 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26, 0xcd, 0x94, 0x6, 0x5f, 0xb4, 0xed, 0x58, 0x1, 0xea, 0xb3, 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33, 0x53, 0xa, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee, 0x5, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae, 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, 0x85, 0xdc, 0xbc, 0xe5, 0xe, 0x57, 0xc5, 0x9c, 0x77, 0x2e},
+ {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27, 0xbc, 0xe6, 0x8, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0xc, 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14, 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c, 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42, 0xd9, 0x83, 0x6d, 0x37, 0x3, 0x59, 0xb7, 0xed, 0x76, 0x2c, 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x7, 0x9c, 0xc6, 0x28, 0x72, 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0xb, 0x51, 0x20, 0x7a, 0x94, 0xce, 0x55, 0xf, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62, 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde, 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x6, 0x5c, 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x2, 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, 0xe, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0xa, 0xe4, 0xbe, 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39, 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae, 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, 0x6b, 0x31, 0x5, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e, 0xef, 0xb5, 0x5b, 0x1, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96, 0x78, 0x22, 0xb9, 0xe3, 0xd, 0x57, 0x26, 0x7c, 0x92, 0xc8, 0x53, 0x9, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb, 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3, 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32},
+ {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45, 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4, 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0, 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x7, 0x5c, 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x8, 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa, 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77, 0xeb, 0xb0, 0x5d, 0x6, 0x78, 0x23, 0xce, 0x95, 0x9, 0x52, 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x1, 0x9d, 0xc6, 0x2b, 0x70, 0xe, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e, 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a, 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8, 0x5f, 0x4, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6, 0xb, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4, 0x58, 0x3, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, 0xc, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c, 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e, 0x73, 0x28, 0xb4, 0xef, 0x2, 0x59, 0x27, 0x7c, 0x91, 0xca, 0x56, 0xd, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62, 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3, 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x5, 0x5e, 0xc2, 0x99, 0x74, 0x2f, 0x51, 0xa, 0xe7, 0xbc, 0x20, 0x7b, 0x96, 0xcd},
+ {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53, 0xa9, 0xf5, 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97, 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c, 0xe6, 0xba, 0x5e, 0x2, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60, 0x84, 0xd8, 0x51, 0xd, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a, 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x6, 0xe2, 0xbe, 0xed, 0xb1, 0x55, 0x9, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d, 0x69, 0x35, 0xbc, 0xe0, 0x4, 0x58, 0xb, 0x57, 0xb3, 0xef, 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b, 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7, 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c, 0xe5, 0xb9, 0x5d, 0x1, 0x52, 0xe, 0xea, 0xb6, 0x3f, 0x63, 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x3, 0x5f, 0xbb, 0xe7, 0xb4, 0xe8, 0xc, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b, 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9, 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x7, 0x5b, 0xd2, 0x8e, 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x8, 0x54, 0xb0, 0xec, 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90, 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14, 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, 0xff, 0xa3, 0x59, 0x5, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0, 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0xa},
+ {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c, 0xb9, 0xe4, 0x3, 0x5e, 0xd0, 0x8d, 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x2, 0x5f, 0xb8, 0xe5, 0x6f, 0x32, 0xd5, 0x88, 0x6, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0, 0x7, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31, 0xbf, 0xe2, 0x5, 0x58, 0x4, 0x59, 0xbe, 0xe3, 0x6d, 0x30, 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0xd, 0x50, 0xc, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a, 0xdd, 0x80, 0xe, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0xf, 0x52, 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0xb, 0x56, 0xd8, 0x85, 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0xa, 0x57, 0xb0, 0xed, 0x8, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87, 0x60, 0x3d, 0xb3, 0xee, 0x9, 0x54, 0xa1, 0xfc, 0x1b, 0x46, 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96, 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93, 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb, 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b, 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0, 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21, 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e, 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4, 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c, 0x12, 0x4f, 0xa8, 0xf5},
+ {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d, 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0xe, 0x43, 0x1d, 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0xf, 0x51, 0xb3, 0xed, 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x1, 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40, 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36, 0xb1, 0xef, 0xd, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, 0x4e, 0x10, 0x5d, 0x3, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda, 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85, 0x67, 0x39, 0xbe, 0xe0, 0x2, 0x5c, 0x98, 0xc6, 0x24, 0x7a, 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0xc, 0xee, 0xb0, 0x37, 0x69, 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x7, 0xe5, 0xbb, 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb, 0x9, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d, 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x8, 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e, 0xba, 0xe4, 0x6, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e, 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0, 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0xa, 0x8d, 0xd3, 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c, 0x61, 0x3f, 0xdd, 0x83, 0x4, 0x5a, 0xb8, 0xe6, 0x2d, 0x73, 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x5, 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0xb, 0x55, 0xb7, 0xe9},
+ {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78, 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x4, 0xe5, 0xba, 0x3a, 0x65, 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf, 0xed, 0xb2, 0x53, 0xc, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9, 0x8, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95, 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x1, 0xe0, 0xbf, 0x3f, 0x60, 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c, 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x5, 0x5a, 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90, 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0xd, 0x52, 0xd2, 0x8d, 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x9, 0x89, 0xd6, 0x37, 0x68, 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3, 0x2, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f, 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x6, 0x86, 0xd9, 0x38, 0x67, 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0xe, 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0xa, 0x55, 0xb4, 0xeb, 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x3, 0x83, 0xdc, 0x3d, 0x62, 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24, 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x7, 0x58, 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, 0x12, 0x4d, 0xf, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f, 0x54, 0xb, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9, 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16},
+ {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73, 0x69, 0x9, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc, 0x5c, 0x3c, 0x1, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b, 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x8, 0xa8, 0xc8, 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x2, 0x62, 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0xb, 0xab, 0xcb, 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84, 0x9e, 0xfe, 0x5e, 0x3e, 0x3, 0x63, 0xc3, 0xa3, 0xf7, 0x97, 0x37, 0x57, 0x6a, 0xa, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70, 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, 0x17, 0x77, 0x6d, 0xd, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50, 0x4, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43, 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76, 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0xc, 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x5, 0x65, 0xc5, 0xa5, 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0xf, 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8, 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, 0x7c, 0x1c, 0x6, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b, 0xf3, 0x93, 0x33, 0x53, 0x6e, 0xe, 0xae, 0xce, 0xd4, 0xb4, 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d, 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x7, 0x67, 0xc7, 0xa7},
+ {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f, 0x9c, 0xfd, 0xc7, 0xa6, 0x5, 0x64, 0x71, 0x10, 0xb3, 0xd2, 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0xa, 0x6b, 0xc8, 0xa9, 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac, 0xf, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x4, 0xa7, 0xc6, 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x1, 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8, 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55, 0x6f, 0xe, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0xb, 0x31, 0x50, 0xf3, 0x92, 0xca, 0xab, 0x8, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84, 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37, 0xd, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c, 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x2, 0x63, 0x28, 0x49, 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x7, 0x66, 0xc5, 0xa4, 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0xc, 0x36, 0x57, 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba, 0xf1, 0x90, 0x33, 0x52, 0x68, 0x9, 0xaa, 0xcb, 0xde, 0xbf, 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0, 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, 0x67, 0x6, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77, 0x62, 0x3, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58},
+ {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4, 0x6e, 0xc, 0xaa, 0xc8, 0xfb, 0x99, 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x8, 0x6a, 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89, 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14, 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x3, 0x30, 0x52, 0xf4, 0x96, 0x92, 0xf0, 0x56, 0x34, 0x7, 0x65, 0xc3, 0xa1, 0xcb, 0xa9, 0xf, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a, 0x69, 0xb, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d, 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42, 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1, 0xc2, 0xa0, 0x6, 0x64, 0x60, 0x2, 0xa4, 0xc6, 0xf5, 0x97, 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0xa, 0xe, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9, 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a, 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1, 0xf2, 0x90, 0x36, 0x54, 0x67, 0x5, 0xa3, 0xc1, 0xc5, 0xa7, 0x1, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a, 0x9, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0xd, 0x3e, 0x5c, 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d, 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22, 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1, 0xe2, 0x80, 0x26, 0x44},
+ {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb, 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22, 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59, 0x6d, 0xe, 0xab, 0xc8, 0xc3, 0xa0, 0x5, 0x66, 0x52, 0x31, 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6, 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86, 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f, 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0xa, 0x69, 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x1, 0x35, 0x56, 0xf3, 0x90, 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45, 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x4, 0xa1, 0xc2, 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, 0xf, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3, 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca, 0x6f, 0xc, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33, 0x7, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20, 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x2, 0x61, 0x6a, 0x9, 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97, 0xa3, 0xc0, 0x65, 0x6, 0xd, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78, 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad, 0x8, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54, 0x60, 0x3, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb},
+ {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0xe, 0x6a, 0xc6, 0xa2, 0x83, 0xe7, 0x4b, 0x2f, 0x9, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d, 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76, 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9, 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e, 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55, 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88, 0xa9, 0xcd, 0x61, 0x5, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, 0x66, 0x2, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0xb, 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0xc, 0x70, 0x14, 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb, 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58, 0x6c, 0x8, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0xf, 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x6, 0xaa, 0xce, 0xef, 0x8b, 0x27, 0x43, 0x65, 0x1, 0xad, 0xc9, 0xe8, 0x8c, 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0xd, 0x69, 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0xa, 0x6e, 0x46, 0x22, 0x8e, 0xea, 0xcb, 0xaf, 0x3, 0x67, 0x41, 0x25, 0x89, 0xed, 0xcc, 0xa8, 0x4, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72, 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39, 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c},
+ {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38, 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59, 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c, 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, 0x61, 0x4, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0xb, 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12, 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x3, 0xac, 0xc9, 0xef, 0x8a, 0x25, 0x40, 0x69, 0xc, 0xa3, 0xc6, 0xe0, 0x85, 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x7, 0x62, 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x8, 0x6d, 0x5a, 0x3f, 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa, 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9, 0xee, 0x8b, 0x24, 0x41, 0x67, 0x2, 0xad, 0xc8, 0xe1, 0x84, 0x2b, 0x4e, 0x68, 0xd, 0xa2, 0xc7, 0xcc, 0xa9, 0x6, 0x63, 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x9, 0x6c, 0x4a, 0x2f, 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4, 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed, 0x42, 0x27, 0x1, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28, 0xe, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf, 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde, 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x5, 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0xa, 0x2c, 0x49, 0xe6, 0x83},
+ {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48, 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x1, 0x39, 0x5f, 0xf5, 0x93, 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x2, 0x64, 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x3, 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12, 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x5, 0x2a, 0x4c, 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9, 0x81, 0xe7, 0x4d, 0x2b, 0x4, 0x62, 0xc8, 0xae, 0xe4, 0x82, 0x28, 0x4e, 0x61, 0x7, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59, 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x6, 0x60, 0x4f, 0x29, 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2, 0x6d, 0xb, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c, 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9, 0xc6, 0xa0, 0xa, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e, 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x9, 0x1f, 0x79, 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x8, 0x6e, 0xc4, 0xa2, 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, 0x9c, 0xfa, 0xc2, 0xa4, 0xe, 0x68, 0x47, 0x21, 0x8b, 0xed, 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a, 0x20, 0x46, 0x69, 0xf, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23, 0xc, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0xd, 0x22, 0x44, 0xee, 0x88, 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f},
+ {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x9, 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x4, 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb, 0xc3, 0xa4, 0xd, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0, 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x1, 0xa8, 0xcf, 0xc6, 0xa1, 0x8, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70, 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x5, 0x62, 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3, 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2, 0x6b, 0xc, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44, 0x6c, 0xb, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb, 0xcc, 0xab, 0x2, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6, 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27, 0xf, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x6, 0x2e, 0x49, 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98, 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0xa, 0x6d, 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82, 0xaa, 0xcd, 0x64, 0x3, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, 0x7b, 0x1c, 0x69, 0xe, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41, 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30, 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1, 0xc9, 0xae, 0x7, 0x60},
+ {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62, 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1, 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39, 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a, 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77, 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0, 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x1, 0x69, 0x6c, 0x4, 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0xe, 0xb, 0x63, 0xdb, 0xb3, 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91, 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8, 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b, 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98, 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f, 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x7, 0x2, 0x6a, 0xd2, 0xba, 0xd8, 0xb0, 0x8, 0x60, 0x65, 0xd, 0xb5, 0xdd, 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e, 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99, 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea, 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8, 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f, 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x6, 0xbe, 0xd6, 0xd3, 0xbb, 0x3, 0x6b, 0x9, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0xc},
+ {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d, 0xde, 0xb7, 0xc, 0x65, 0x67, 0xe, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0xa, 0x8, 0x61, 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3, 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16, 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab, 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32, 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87, 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45, 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22, 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7, 0x6c, 0x5, 0x7, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x3, 0x6a, 0x68, 0x1, 0xba, 0xd3, 0x60, 0x9, 0xb2, 0xdb, 0xd9, 0xb0, 0xb, 0x62, 0xf, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0xd, 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a, 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3, 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56, 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb, 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d, 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98, 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3},
+ {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5, 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab, 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x8, 0x3, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53, 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82, 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf, 0xc4, 0xae, 0x10, 0x7a, 0x6, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, 0x67, 0xd, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55, 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda, 0x64, 0xe, 0x5, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79, 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f, 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0, 0x6e, 0x4, 0xf, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c, 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, 0xf4, 0x9e, 0xc, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x7, 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6, 0x68, 0x2, 0x9, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75, 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e, 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0xa, 0x60, 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x1, 0x93, 0xf9, 0x47, 0x2d, 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, 0x85, 0xef},
+ {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95, 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c, 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92, 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0xb, 0xb6, 0xdd, 0xd1, 0xba, 0x7, 0x6c, 0xdf, 0xb4, 0x9, 0x62, 0x6e, 0x5, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d, 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55, 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc, 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x2, 0xe, 0x65, 0xd8, 0xb3, 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7, 0xa, 0x61, 0x6d, 0x6, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0, 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e, 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7, 0x6a, 0x1, 0xd, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e, 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, 0x1b, 0x70, 0x3, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0xf, 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96, 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20, 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, 0x85, 0xee, 0x63, 0x8, 0xb5, 0xde, 0xd2, 0xb9, 0x4, 0x6f, 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10},
+ {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x8, 0xbc, 0xd0, 0x1, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a, 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b, 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x9, 0xbd, 0xd1, 0x2, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b, 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0, 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f, 0x66, 0xa, 0xbe, 0xd2, 0x3, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d, 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6, 0x12, 0x7e, 0x67, 0xb, 0xbf, 0xd3, 0x4, 0x68, 0xdc, 0xb0, 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93, 0xcd, 0xa1, 0x15, 0x79, 0x60, 0xc, 0xb8, 0xd4, 0x5, 0x69, 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6, 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0xd, 0xb9, 0xd5, 0x6, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d, 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c, 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0xe, 0xba, 0xd6, 0x7, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e, 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5, 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a, 0x63, 0xf, 0xbb, 0xd7},
+ {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51, 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc, 0xb, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96, 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x3, 0xb4, 0xd9, 0xc7, 0xaa, 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x8, 0x16, 0x7b, 0xcc, 0xa1, 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f, 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0xd, 0x60, 0xd7, 0xba, 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x6, 0x6b, 0x75, 0x18, 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d, 0x63, 0xe, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41, 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a, 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x5, 0x1b, 0x76, 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a, 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0xf, 0xb8, 0xd5, 0x1a, 0x77, 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x4, 0x55, 0x38, 0x8f, 0xe2, 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0xc, 0x61, 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4, 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19, 0xae, 0xc3, 0xdd, 0xb0, 0x7, 0x6a, 0xc6, 0xab, 0x1c, 0x71, 0x6f, 0x2, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46, 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x9, 0xe7, 0x8a, 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f, 0x1, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, 0xa, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28},
+ {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c, 0xb, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56, 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x1, 0xef, 0x81, 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0xa, 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, 0xfb, 0x95, 0xd5, 0xbb, 0x9, 0x67, 0x70, 0x1e, 0xac, 0xc2, 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15, 0xa7, 0xc9, 0xde, 0xb0, 0x2, 0x6c, 0xc3, 0xad, 0x1f, 0x71, 0x66, 0x8, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, 0xed, 0x83, 0x6d, 0x3, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a, 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77, 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0xe, 0x4e, 0x20, 0x92, 0xfc, 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x5, 0x12, 0x7c, 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7, 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0xf, 0x61, 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44, 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x4, 0x6a, 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c, 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x7, 0xb5, 0xdb, 0x35, 0x5b, 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0xc, 0xbe, 0xd0, 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x6, 0x68, 0x7f, 0x11, 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a, 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0xd, 0x63, 0x23, 0x4d, 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34},
+ {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0xf, 0x1f, 0x70, 0xc1, 0xae, 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0xe, 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f, 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x1, 0x6e, 0x7e, 0x11, 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90, 0xc2, 0xad, 0x1c, 0x73, 0x63, 0xc, 0xbd, 0xd2, 0x9d, 0xf2, 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd, 0xdd, 0xb2, 0x3, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x2, 0x6d, 0xdc, 0xb3, 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72, 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0xd, 0x42, 0x2d, 0x9c, 0xf3, 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x8, 0xb9, 0xd6, 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17, 0xa6, 0xc9, 0xd9, 0xb6, 0x7, 0x68, 0xf8, 0x97, 0x26, 0x49, 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x6, 0x69, 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56, 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x9, 0x5b, 0x34, 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x4, 0x6b, 0xda, 0xb5, 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0xb, 0x1b, 0x74, 0xc5, 0xaa, 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0xa, 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35, 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x5, 0x6a, 0x7a, 0x15, 0xa4, 0xcb},
+ {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea, 0x53, 0x23, 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64, 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0xb, 0x9b, 0xeb, 0x1, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c, 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22, 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1, 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, 0xcb, 0xbb, 0x2, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f, 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x8, 0x98, 0xe8, 0xf7, 0x87, 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0, 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x9, 0x99, 0xe9, 0x3, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e, 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0xf, 0x9f, 0xef, 0x5, 0x75, 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61, 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, 0x6b, 0x1b, 0x4, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49, 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0xe, 0x9e, 0xee, 0x57, 0x27, 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60, 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19, 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0xd, 0x9d, 0xed, 0x7, 0x77, 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5, 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, 0xcf, 0xbf, 0x6, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b, 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0xc, 0x9c, 0xec},
+ {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, 0x78, 0x9, 0xec, 0x9d, 0xe, 0x7f, 0x35, 0x44, 0xd7, 0xa6, 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58, 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56, 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b, 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23, 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e, 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x4, 0x4e, 0x3f, 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x3, 0x72, 0xd4, 0xa5, 0x36, 0x47, 0xd, 0x7c, 0xef, 0x9e, 0x7b, 0xa, 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1, 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b, 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5, 0x46, 0x37, 0x7d, 0xc, 0x9f, 0xee, 0xb, 0x7a, 0xe9, 0x98, 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x5, 0x74, 0x3e, 0x4f, 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x2, 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x8, 0x79, 0x9c, 0xed, 0x7e, 0xf, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x1, 0x92, 0xe3, 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x6, 0x77, 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff, 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87, 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca, 0x80, 0xf1, 0x62, 0x13},
+ {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4, 0x73, 0x1, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6, 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x2, 0x70, 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, 0x60, 0x12, 0x95, 0xe7, 0x71, 0x3, 0x40, 0x32, 0xa4, 0xd6, 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3, 0x35, 0x47, 0x4, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0, 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x5, 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56, 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x6, 0x74, 0x80, 0xf2, 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2, 0x91, 0xe3, 0x75, 0x7, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc, 0x8, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe, 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x9, 0x9f, 0xed, 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, 0x68, 0x1a, 0xee, 0x9c, 0xa, 0x78, 0x3b, 0x49, 0xdf, 0xad, 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef, 0x79, 0xb, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8, 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0xc, 0x7e, 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e, 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0xd, 0x9b, 0xe9, 0x88, 0xfa, 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9, 0xea, 0x98, 0xe, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0xf},
+ {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6, 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0xd, 0x7e, 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82, 0x79, 0xa, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6, 0x43, 0x30, 0x74, 0x7, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f, 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x4, 0x40, 0x33, 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a, 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e, 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x9, 0x57, 0x24, 0xb1, 0xc2, 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0xe, 0x7d, 0x39, 0x4a, 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x3, 0x70, 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c, 0xd9, 0xaa, 0xee, 0x9d, 0x8, 0x7b, 0x80, 0xf3, 0x66, 0x15, 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, 0x6b, 0x18, 0xe3, 0x90, 0x5, 0x76, 0x32, 0x41, 0xd4, 0xa7, 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35, 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x2, 0x9a, 0xe9, 0x7c, 0xf, 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0xc, 0x99, 0xea, 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe, 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x1, 0x94, 0xe7, 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x6, 0x75, 0xe0, 0x93, 0xb, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7, 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0},
+ {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42, 0x94, 0xe0, 0x7c, 0x8, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52, 0xce, 0xba, 0xeb, 0x9f, 0x3, 0x77, 0xa1, 0xd5, 0x49, 0x3d, 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0xb, 0x97, 0xe3, 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf, 0x23, 0x57, 0x6, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3, 0x92, 0xe6, 0x7a, 0xe, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b, 0xed, 0x99, 0x5, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0xd, 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62, 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x4, 0x55, 0x21, 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e, 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0xc, 0x78, 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22, 0x73, 0x7, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc, 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0xf, 0x7b, 0xd4, 0xa0, 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf, 0x9e, 0xea, 0x76, 0x2, 0xc7, 0xb3, 0x2f, 0x5b, 0xa, 0x7e, 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11, 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x1, 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x9, 0x7d, 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, 0x43, 0x37},
+ {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x3, 0x76, 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13, 0x45, 0x30, 0xaf, 0xda, 0x6, 0x73, 0xec, 0x99, 0xcf, 0xba, 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf, 0x5, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff, 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0xc, 0x79, 0xe6, 0x93, 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, 0xa0, 0xd5, 0xf, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59, 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0xa, 0x7f, 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a, 0x4c, 0x39, 0xa6, 0xd3, 0x9, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0, 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2, 0x7d, 0x8, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84, 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0xb, 0x5d, 0x28, 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48, 0x91, 0xe4, 0x7b, 0xe, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68, 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0xd, 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, 0x37, 0x42, 0x9b, 0xee, 0x71, 0x4, 0x52, 0x27, 0xb8, 0xcd, 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed, 0x72, 0x7, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d, 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x2, 0x54, 0x21, 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47, 0x9e, 0xeb, 0x74, 0x1, 0x57, 0x22, 0xbd, 0xc8},
+ {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb, 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87, 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf, 0x90, 0xe6, 0x7c, 0xa, 0xc2, 0xb4, 0x2e, 0x58, 0x7, 0x71, 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x9, 0x7f, 0xe5, 0x93, 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x4, 0xff, 0x89, 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2, 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62, 0x99, 0xef, 0x75, 0x3, 0x5c, 0x2a, 0xb0, 0xc6, 0xe, 0x78, 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f, 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x5, 0x9f, 0xe9, 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x8, 0x7e, 0xe3, 0x95, 0xf, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x2, 0x98, 0xee, 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18, 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8, 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0xc, 0x96, 0xe0, 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x1, 0x77, 0x28, 0x5e, 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x6, 0x70, 0xb8, 0xce, 0x54, 0x22, 0x7d, 0xb, 0x91, 0xe7, 0x1c, 0x6a, 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11, 0x4e, 0x38, 0xa2, 0xd4},
+ {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7, 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0xc, 0x7b, 0xbc, 0xcb, 0x52, 0x25, 0x7d, 0xa, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf, 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d, 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb, 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a, 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68, 0xca, 0xbd, 0x24, 0x53, 0xb, 0x7c, 0xe5, 0x92, 0x55, 0x22, 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0xd, 0xe9, 0x9e, 0x7, 0x70, 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x1, 0x98, 0xef, 0xb7, 0xc0, 0x59, 0x2e, 0x5, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d, 0x9a, 0xed, 0x74, 0x3, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51, 0xc8, 0xbf, 0xe7, 0x90, 0x9, 0x7e, 0xb9, 0xce, 0x57, 0x20, 0x78, 0xf, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84, 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88, 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10, 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2, 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8, 0x21, 0x56, 0xe, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9, 0x91, 0xe6, 0x7f, 0x8, 0xec, 0x9b, 0x2, 0x75, 0x2d, 0x5a, 0xc3, 0xb4, 0x73, 0x4, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b},
+ {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b, 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce, 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10, 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x7, 0x72, 0xa, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x4, 0x71, 0x9, 0x81, 0xf9, 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27, 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58, 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, 0x3a, 0x42, 0x3, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0xe, 0x76, 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae, 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9, 0xcc, 0xb4, 0x3c, 0x44, 0x5, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, 0x8, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97, 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2, 0x7a, 0x2, 0x77, 0xf, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36, 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8, 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16, 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x1, 0x74, 0xc, 0x84, 0xfc, 0x6, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, 0xb, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94, 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a, 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47},
+ {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1, 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2, 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0xd, 0x86, 0xff, 0x8d, 0xf4, 0x7f, 0x6, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5, 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd, 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63, 0x11, 0x68, 0xe3, 0x9a, 0x7, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, 0xc, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2, 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0xa, 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x1, 0x9c, 0xe5, 0x6e, 0x17, 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb, 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x3, 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x8, 0xcd, 0xb4, 0x3f, 0x46, 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, 0x29, 0x50, 0xe, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x5, 0x7c, 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x4, 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0xf, 0x92, 0xeb, 0x60, 0x19, 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23, 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x9, 0x70, 0xfb, 0x82, 0xf0, 0x89, 0x2, 0x7b, 0x25, 0x5c, 0xd7, 0xae, 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, 0xc1, 0xb8},
+ {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c, 0xf3, 0x89, 0x7, 0x7d, 0x6, 0x7c, 0xf2, 0x88, 0x4, 0x7e, 0xf0, 0x8a, 0xf1, 0x8b, 0x5, 0x7f, 0xfb, 0x81, 0xf, 0x75, 0xe, 0x74, 0xfa, 0x80, 0xc, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0xd, 0x77, 0x8, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x9, 0x73, 0xff, 0x85, 0xb, 0x71, 0xa, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65, 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63, 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a, 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69, 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f, 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46, 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6, 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b, 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9, 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba, 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac, 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e, 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55, 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53, 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4},
+ {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60, 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f, 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5, 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7, 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33, 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a, 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0xb, 0x86, 0xfd, 0x81, 0xfa, 0x77, 0xc, 0x8f, 0xf4, 0x79, 0x2, 0x7e, 0x5, 0x88, 0xf3, 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17, 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6, 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4, 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb, 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2, 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x8, 0x85, 0xfe, 0x82, 0xf9, 0x74, 0xf, 0x8c, 0xf7, 0x7a, 0x1, 0x7d, 0x6, 0x8b, 0xf0, 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64, 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x3, 0x78, 0xf5, 0x8e, 0xf2, 0x89, 0x4, 0x7f, 0xfc, 0x87, 0xa, 0x71, 0xd, 0x76, 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47, 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3, 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa, 0xd6, 0xad, 0x20, 0x5b},
+ {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae, 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x2, 0x86, 0xfa, 0x54, 0x28, 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf, 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x4, 0x78, 0x11, 0x6d, 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1, 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x6, 0x76, 0xa, 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35, 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x8, 0x74, 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b, 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6, 0x72, 0xe, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a, 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, 0xc, 0x70, 0xec, 0x90, 0x14, 0x68, 0x1, 0x7d, 0xf9, 0x85, 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x3, 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c, 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x5, 0x79, 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff, 0x7b, 0x7, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e, 0x77, 0xb, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, 0x48, 0x34, 0x9, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60, 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd, 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2, 0x8b, 0xf7, 0x73, 0xf, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, 0x27, 0x5b, 0xf5, 0x89, 0xd, 0x71, 0x18, 0x64, 0xe0, 0x9c},
+ {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x4, 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x8, 0x75, 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5, 0x62, 0x1f, 0x71, 0xc, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0, 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, 0x25, 0x58, 0xf9, 0x84, 0x3, 0x7e, 0x10, 0x6d, 0xea, 0x97, 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x7, 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa, 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0xb, 0x76, 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0, 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0xf, 0x6c, 0x11, 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x2, 0xa3, 0xde, 0x59, 0x24, 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x6, 0x7b, 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e, 0x77, 0xa, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5, 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0xe, 0x73, 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34, 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x1, 0x86, 0xfb, 0xd9, 0xa4, 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91, 0xff, 0x82, 0x5, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x9, 0x67, 0x1a, 0x9d, 0xe0, 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0xd, 0x70, 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63},
+ {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4, 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x3, 0x7b, 0x5, 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e, 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x6, 0x78, 0xf6, 0x88, 0xa, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f, 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7, 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x9, 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0xf, 0x68, 0x16, 0x94, 0xea, 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40, 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b, 0xc, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0xd, 0x73, 0x14, 0x6a, 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41, 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb, 0x69, 0x17, 0x70, 0xe, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x8, 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e, 0xee, 0x90, 0x12, 0x6c, 0xb, 0x75, 0xf7, 0x89, 0x7, 0x79, 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52, 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x4, 0x7c, 0x2, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5, 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d, 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, 0x1, 0x7f},
+ {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc, 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x3, 0x82, 0xfd, 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x5, 0x65, 0x1a, 0x9b, 0xe4, 0xf8, 0x87, 0x6, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58, 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37, 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, 0x76, 0x9, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0xa, 0x75, 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92, 0x13, 0x6c, 0xc, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3, 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0xf, 0x8e, 0xf1, 0x71, 0xe, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1, 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53, 0x33, 0x4c, 0xcd, 0xb2, 0xd, 0x72, 0xf3, 0x8c, 0xec, 0x93, 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a, 0xf5, 0x8a, 0xb, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6, 0x77, 0x8, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7, 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x7, 0x78, 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x4, 0xbb, 0xc4, 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d, 0x7d, 0x2, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f, 0xe0, 0x9f, 0x1e, 0x61, 0x1, 0x7e, 0xff, 0x80},
+ {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x1, 0xa6, 0x26, 0xbb, 0x3b, 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39, 0xa4, 0x24, 0x83, 0x3, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8, 0x1f, 0x9f, 0x2, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, 0x76, 0xf6, 0x87, 0x7, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20, 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef, 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x6, 0x86, 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x4, 0x84, 0x19, 0x99, 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x5, 0xd6, 0x56, 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0xe, 0x8e, 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c, 0x8f, 0xf, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e, 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37, 0x90, 0x10, 0x8d, 0xd, 0x36, 0xb6, 0x2b, 0xab, 0xc, 0x8c, 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5, 0x94, 0x14, 0x89, 0x9, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60, 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1, 0x46, 0xc6, 0x5b, 0xdb, 0x8, 0x88, 0x15, 0x95, 0x32, 0xb2, 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe, 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0xa, 0x8a, 0xb1, 0x31, 0xac, 0x2c, 0x8b, 0xb, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58, 0xff, 0x7f, 0xe2, 0x62},
+ {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc, 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x5, 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73, 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0xf, 0xaf, 0x2e, 0xb0, 0x31, 0x15, 0x94, 0xa, 0x8b, 0x2b, 0xaa, 0x34, 0xb5, 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46, 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25, 0x85, 0x4, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x1, 0x80, 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3, 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0xb, 0x8a, 0x56, 0xd7, 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c, 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, 0x8f, 0xe, 0x93, 0x12, 0x8c, 0xd, 0xad, 0x2c, 0xb2, 0x33, 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea, 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x8, 0x89, 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, 0x5f, 0xde, 0x2, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2, 0x86, 0x7, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b, 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca, 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, 0x9, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0xc, 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38, 0xa6, 0x27, 0x87, 0x6, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b, 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x3, 0x82, 0x1c, 0x9d},
+ {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53, 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, 0x87, 0x5, 0x8d, 0xf, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24, 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7, 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba, 0x13, 0x91, 0xa, 0x88, 0x7, 0x85, 0x1e, 0x9c, 0x35, 0xb7, 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca, 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29, 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x2, 0x8a, 0x8, 0x93, 0x11, 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb, 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0xd, 0x8f, 0xe, 0x8c, 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1, 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0xb, 0x83, 0x1, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65, 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0, 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, 0x4, 0x86, 0x9, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0, 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43, 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e, 0x97, 0x15, 0x8e, 0xc, 0x84, 0x6, 0x9d, 0x1f, 0xb6, 0x34, 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49, 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa, 0x31, 0xb3, 0x1a, 0x98, 0x3, 0x81},
+ {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76, 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x1, 0x99, 0x1a, 0xad, 0x2e, 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x3, 0xc1, 0x42, 0xda, 0x59, 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, 0x58, 0xdb, 0x19, 0x9a, 0x2, 0x81, 0x2f, 0xac, 0x34, 0xb7, 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8, 0x30, 0xb3, 0x1d, 0x9e, 0x6, 0x85, 0x9f, 0x1c, 0x84, 0x7, 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44, 0x86, 0x5, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1, 0x29, 0xaa, 0x4, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6, 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0xd, 0x95, 0x16, 0xb8, 0x3b, 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c, 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9, 0x21, 0xa2, 0xc, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb, 0x15, 0x96, 0xe, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55, 0x97, 0x14, 0x8c, 0xf, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a, 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d, 0x93, 0x10, 0x88, 0xb, 0x11, 0x92, 0xa, 0x89, 0x27, 0xa4, 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3, 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x8, 0x8b, 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24, 0x8a, 0x9, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, 0xfd, 0x7e},
+ {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c, 0xbd, 0x39, 0x82, 0x6, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d, 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, 0x72, 0xf6, 0x19, 0x9d, 0xc, 0x88, 0x33, 0xb7, 0x26, 0xa2, 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35, 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0xa, 0x9a, 0x1e, 0x8f, 0xb, 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0xd, 0x89, 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53, 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x7, 0x96, 0x12, 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x1, 0x85, 0x14, 0x90, 0x29, 0xad, 0x3c, 0xb8, 0x3, 0x87, 0x16, 0x92, 0x7d, 0xf9, 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x5, 0x94, 0x10, 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf, 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0xf, 0x8b, 0xcc, 0x48, 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x9, 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, 0x8c, 0x8, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c, 0x1b, 0x9f, 0xe, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb, 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f, 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x4, 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed, 0x2, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9},
+ {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, 0x81, 0x4, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58, 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4, 0x26, 0xa3, 0x1f, 0x9a, 0x8, 0x8d, 0xd5, 0x50, 0xc2, 0x47, 0xfb, 0x7e, 0xec, 0x69, 0x89, 0xc, 0x9e, 0x1b, 0xa7, 0x22, 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66, 0x86, 0x3, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7, 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac, 0x10, 0x95, 0x7, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, 0x8e, 0xb, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57, 0xf, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6, 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b, 0x87, 0x2, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, 0xcc, 0x49, 0x11, 0x94, 0x6, 0x83, 0x3f, 0xba, 0x28, 0xad, 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41, 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0xa, 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0xe, 0x8b, 0x19, 0x9c, 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa, 0x38, 0xbd, 0x1, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59, 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x5, 0xb9, 0x3c, 0xae, 0x2b, 0x1e, 0x9b, 0x9, 0x8c, 0x30, 0xb5, 0x27, 0xa2, 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23, 0xb1, 0x34, 0x88, 0xd, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68, 0xd4, 0x51, 0xc3, 0x46},
+ {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1, 0x88, 0xe, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a, 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0xd, 0x8b, 0x1c, 0x9a, 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, 0x7a, 0xfc, 0x85, 0x3, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30, 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c, 0xb, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9, 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x5, 0xb0, 0x36, 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63, 0x17, 0x91, 0x6, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5, 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x8, 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x7, 0x81, 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a, 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x9, 0xf8, 0x7e, 0xe9, 0x6f, 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, 0xa, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8, 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x4, 0xf5, 0x73, 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9, 0xc, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x2, 0x95, 0x13, 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5, 0x32, 0xb4, 0x1, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0, 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0xf, 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a},
+ {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0xc, 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0xb, 0x8c, 0x18, 0x9f, 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32, 0xa6, 0x21, 0x93, 0x14, 0x80, 0x7, 0xf9, 0x7e, 0xea, 0x6d, 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, 0x6f, 0xe8, 0x16, 0x91, 0x5, 0x82, 0x30, 0xb7, 0x23, 0xa4, 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x9, 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3, 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, 0xe, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d, 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x2, 0x96, 0x11, 0xb4, 0x33, 0xa7, 0x20, 0x92, 0x15, 0x81, 0x6, 0xf8, 0x7f, 0xeb, 0x6c, 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0xa, 0x8d, 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2, 0x99, 0x1e, 0x8a, 0xd, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52, 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x1, 0x86, 0x12, 0x95, 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c, 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x3, 0x97, 0x10, 0x76, 0xf1, 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae, 0x1c, 0x9b, 0xf, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, 0xf6, 0x71, 0x8f, 0x8, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d, 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90, 0x4, 0x83, 0x31, 0xb6, 0x22, 0xa5},
+ {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7, 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58, 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61, 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, 0xaf, 0x27, 0x8c, 0x4, 0x81, 0x9, 0x96, 0x1e, 0x9b, 0x13, 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x1, 0x84, 0xc, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50, 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2, 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x5, 0x8d, 0x8, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4, 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc, 0xf, 0x87, 0x2, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3, 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32, 0xad, 0x25, 0xa0, 0x28, 0x83, 0xb, 0x8e, 0x6, 0x99, 0x11, 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40, 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52, 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b, 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, 0xa5, 0x2d, 0x86, 0xe, 0x8b, 0x3, 0x9c, 0x14, 0x91, 0x19, 0xa, 0x82, 0x7, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6, 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7, 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, 0x41, 0xc9},
+ {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1, 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2, 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54, 0x88, 0x1, 0x87, 0xe, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d, 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b, 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, 0xd0, 0x59, 0x85, 0xc, 0x8a, 0x3, 0x9b, 0x12, 0x94, 0x1d, 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0xd, 0x84, 0x2, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7, 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1, 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52, 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19, 0x81, 0x8, 0x8e, 0x7, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x9, 0x80, 0x6, 0x8f, 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6, 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5, 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x4, 0x8d, 0xb, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe, 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7, 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c, 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x5, 0x83, 0xa, 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36},
+ {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd, 0x90, 0x1a, 0x99, 0x13, 0x82, 0x8, 0x8b, 0x1, 0xb4, 0x3e, 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b, 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac, 0x19, 0x93, 0x10, 0x9a, 0xb, 0x81, 0x2, 0x88, 0x75, 0xff, 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2, 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, 0xb6, 0x3c, 0x89, 0x3, 0x80, 0xa, 0x9b, 0x11, 0x92, 0x18, 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b, 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9, 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3, 0x16, 0x9c, 0x1f, 0x95, 0x4, 0x8e, 0xd, 0x87, 0xea, 0x60, 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d, 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, 0xb9, 0x33, 0x86, 0xc, 0x8f, 0x5, 0x94, 0x1e, 0x9d, 0x17, 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9, 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0xf, 0x85, 0x6, 0x8c, 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46, 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15, 0x96, 0x1c, 0x8d, 0x7, 0x84, 0xe, 0xbb, 0x31, 0xb2, 0x38, 0xa9, 0x23, 0xa0, 0x2a},
+ {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba, 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff, 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30, 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x1, 0x81, 0xa, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e, 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6, 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1, 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, 0x38, 0xb3, 0x9, 0x82, 0x2, 0x89, 0x1f, 0x94, 0x14, 0x9f, 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a, 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15, 0x83, 0x8, 0x88, 0x3, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c, 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29, 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x5, 0x85, 0xe, 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0, 0x12, 0x99, 0x19, 0x92, 0x4, 0x8f, 0xf, 0x84, 0x3e, 0xb5, 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0xc, 0x8c, 0x7, 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49, 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc, 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b, 0xd, 0x86, 0x6, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5},
+ {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1, 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3, 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4, 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5, 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37, 0x88, 0x4, 0x8d, 0x1, 0x82, 0xe, 0x87, 0xb, 0x9c, 0x10, 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79, 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b, 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1, 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0, 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2, 0xd, 0x81, 0x8, 0x84, 0x7, 0x8b, 0x2, 0x8e, 0x19, 0x95, 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac, 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e, 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59, 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48, 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a, 0x85, 0x9, 0x80, 0xc, 0x8f, 0x3, 0x8a, 0x6, 0x91, 0x1d, 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12},
+ {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc, 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd, 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6, 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0, 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71, 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52, 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14, 0x8c, 0x1, 0x8b, 0x6, 0x82, 0xf, 0x85, 0x8, 0xa8, 0x25, 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e, 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45, 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74, 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27, 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11, 0x89, 0x4, 0x8e, 0x3, 0x87, 0xa, 0x80, 0xd, 0x3d, 0xb0, 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab, 0x2f, 0xa2, 0x28, 0xa5, 0x5, 0x88, 0x2, 0x8f, 0xb, 0x86, 0xc, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d, 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc, 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff, 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, 0x60, 0xed},
+ {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89, 0x8, 0x86, 0x9, 0x87, 0xa, 0x84, 0xb, 0x85, 0xc, 0x82, 0xd, 0x83, 0xe, 0x80, 0xf, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99, 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92, 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf, 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5, 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe, 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb, 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1, 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca, 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7, 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd, 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6, 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3, 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9, 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2, 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff, 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5, 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1},
+ {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e, 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3, 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4, 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea, 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7, 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8, 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6, 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb, 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c, 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52, 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f, 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70, 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e, 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23, 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34, 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a, 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x7, 0x8b, 0x4, 0x8e, 0x1, 0x8d, 0x2, 0x84, 0xb, 0x87, 0x8, 0x82, 0xd, 0x81, 0xe},
+ {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23, 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x1, 0x91, 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a, 0x8d, 0x1d, 0xb0, 0x20, 0x3, 0x93, 0x3e, 0xae, 0x79, 0xe9, 0x44, 0xd4, 0x2, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5, 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63, 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x7, 0x97, 0x3a, 0xaa, 0x7d, 0xed, 0x40, 0xd0, 0x6, 0x96, 0x3b, 0xab, 0x7c, 0xec, 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25, 0x4, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60, 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c, 0x8b, 0x1b, 0xb6, 0x26, 0x5, 0x95, 0x38, 0xa8, 0x7f, 0xef, 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c, 0xf, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0xe, 0x9e, 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57, 0x80, 0x10, 0xbd, 0x2d, 0xc, 0x9c, 0x31, 0xa1, 0x76, 0xe6, 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f, 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0xd, 0x9d, 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x8, 0x98, 0x35, 0xa5, 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a, 0x9, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f, 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0xb, 0x9b, 0x36, 0xa6, 0x71, 0xe1, 0x4c, 0xdc, 0xa, 0x9a, 0x37, 0xa7, 0x70, 0xe0, 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29},
+ {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b, 0x9b, 0xa, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x7, 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3, 0xd, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60, 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0xe, 0x9f, 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b, 0x95, 0x4, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca, 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x9, 0xa7, 0x36, 0xe6, 0x77, 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51, 0x7d, 0xec, 0x42, 0xd3, 0x3, 0x92, 0x3c, 0xad, 0x7b, 0xea, 0x44, 0xd5, 0x5, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29, 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0xf, 0xa1, 0x30, 0xe0, 0x71, 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2, 0xac, 0x3d, 0x93, 0x2, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1, 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7, 0x37, 0xa6, 0x8, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18, 0x34, 0xa5, 0xb, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc, 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f, 0xaf, 0x3e, 0x90, 0x1, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0xc, 0xa2, 0x33, 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x6, 0x97, 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6},
+ {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0xc, 0x31, 0xa3, 0x8, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25, 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8, 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f, 0x73, 0xe1, 0x4a, 0xd8, 0x1, 0x93, 0x38, 0xaa, 0x97, 0x5, 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0xd, 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, 0x9, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d, 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83, 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e, 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x6, 0xad, 0x3f, 0x2, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb, 0x33, 0xa1, 0xa, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45, 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0xe, 0x51, 0xc3, 0x68, 0xfa, 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d, 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x7, 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda, 0x3, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, 0xb, 0x99, 0xa4, 0x36, 0x9d, 0xf, 0xd6, 0x44, 0xef, 0x7d, 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54, 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c, 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, 0x58, 0xca},
+ {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32, 0xc5, 0x56, 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81, 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x4, 0xac, 0x3f, 0xe1, 0x72, 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0xd, 0x9e, 0x36, 0xa5, 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d, 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x8, 0x9b, 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, 0x92, 0x1, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28, 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37, 0x9f, 0xc, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0, 0x3e, 0xad, 0x5, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53, 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19, 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0xb, 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, 0x2, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f, 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7, 0xf, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70, 0xae, 0x3d, 0x95, 0x6, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67, 0x90, 0x3, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef, 0x47, 0xd4, 0xa, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a, 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, 0x63, 0xf0, 0x7, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9, 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0xe, 0xa6, 0x35},
+ {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0xb, 0x9f, 0x3e, 0xaa, 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37, 0x96, 0x2, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63, 0xa8, 0x3c, 0x9d, 0x9, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25, 0x3a, 0xae, 0xf, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf, 0x6e, 0xfa, 0x31, 0xa5, 0x4, 0x90, 0x8f, 0x1b, 0xba, 0x2e, 0xe5, 0x71, 0xd0, 0x44, 0x99, 0xd, 0xac, 0x38, 0xf3, 0x67, 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86, 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c, 0xcd, 0x59, 0x92, 0x6, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60, 0xab, 0x3f, 0x9e, 0xa, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf, 0xa0, 0x34, 0x95, 0x1, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22, 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3, 0x8, 0x9c, 0x3d, 0xa9, 0x3, 0x97, 0x36, 0xa2, 0x69, 0xfd, 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c, 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f, 0xce, 0x5a, 0x91, 0x5, 0xa4, 0x30, 0x9a, 0xe, 0xaf, 0x3b, 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x7, 0x93, 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79, 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0xc, 0x98, 0x53, 0xc7, 0x66, 0xf2},
+ {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10, 0xa5, 0x30, 0x92, 0x7, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec, 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5, 0x39, 0xac, 0xe, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x9, 0xab, 0x3e, 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b, 0x99, 0xc, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0, 0x1c, 0x89, 0x2b, 0xbe, 0xb, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b, 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x2, 0xa0, 0x35, 0x25, 0xb0, 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe, 0x32, 0xa7, 0x5, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d, 0x9d, 0x8, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71, 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0xf, 0x9a, 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x6, 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa, 0x58, 0xcd, 0x1, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d, 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x4, 0x91, 0x5d, 0xc8, 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86, 0x96, 0x3, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d, 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6, 0xa, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0xd},
+ {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1, 0x95, 0x3, 0xa4, 0x32, 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, 0x2, 0x94, 0x37, 0xa1, 0x6, 0x90, 0x55, 0xc3, 0x64, 0xf2, 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x7, 0xa0, 0x36, 0xa2, 0x34, 0x93, 0x5, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1, 0x4, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0xc, 0x9a, 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0xd, 0xc8, 0x5e, 0xf9, 0x6f, 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0xf, 0xa8, 0x3e, 0x3f, 0xa9, 0xe, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe, 0x3b, 0xad, 0xa, 0x9c, 0x9d, 0xb, 0xac, 0x3a, 0xff, 0x69, 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x9, 0x8, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a, 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf, 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48, 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9, 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9, 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77, 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1, 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44, 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84, 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42, 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11},
+ {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47, 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80, 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f, 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x5, 0xa1, 0x36, 0xf4, 0x63, 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0xb, 0x9c, 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75, 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0xf, 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x1, 0x96, 0x32, 0xa5, 0x39, 0xae, 0xa, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb, 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x4, 0xa0, 0x37, 0xbc, 0x2b, 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4, 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, 0x9, 0x9e, 0x90, 0x7, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52, 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82, 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef, 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0xc, 0x2, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5, 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a, 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x6, 0xa2, 0x35, 0x3b, 0xac, 0x8, 0x9f, 0x5d, 0xca, 0x6e, 0xf9, 0x65, 0xf2, 0x56, 0xc1, 0x3, 0x94, 0x30, 0xa7, 0xa9, 0x3e, 0x9a, 0xd, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44, 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, 0x79, 0xee},
+ {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed, 0x58, 0xc0, 0x2f, 0xb7, 0x2, 0x9a, 0xc1, 0x59, 0xec, 0x74, 0x9b, 0x3, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, 0x9d, 0x5, 0x5e, 0xc6, 0x73, 0xeb, 0x4, 0x9c, 0x29, 0xb1, 0x9f, 0x7, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3, 0x6, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c, 0x93, 0xb, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, 0xa, 0x92, 0xbc, 0x24, 0x91, 0x9, 0xe6, 0x7e, 0xcb, 0x53, 0x8, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb, 0xe, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0xf, 0xba, 0x22, 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0xc, 0x94, 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0xd, 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3, 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f, 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a, 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88, 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11, 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d, 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f, 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19, 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36, 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82},
+ {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31, 0xca, 0x53, 0xe5, 0x7c, 0x94, 0xd, 0xbb, 0x22, 0x76, 0xef, 0x59, 0xc0, 0x28, 0xb1, 0x7, 0x9e, 0xaf, 0x36, 0x80, 0x19, 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61, 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75, 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x4, 0x50, 0xc9, 0x7f, 0xe6, 0xe, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17, 0x26, 0xbf, 0x9, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x3, 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0xf, 0x96, 0x20, 0xb9, 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x5, 0xed, 0x74, 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82, 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c, 0xea, 0x73, 0x9b, 0x2, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf, 0x27, 0xbe, 0x8, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4, 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3, 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55, 0xbd, 0x24, 0x92, 0xb, 0x5f, 0xc6, 0x70, 0xe9, 0x1, 0x98, 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4, 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0, 0x6, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0xc, 0xba, 0x23, 0xcb, 0x52, 0xe4, 0x7d},
+ {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45, 0x55, 0xcf, 0x7c, 0xe6, 0x7, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b, 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19, 0xf8, 0x62, 0xd1, 0x4b, 0xe, 0x94, 0x27, 0xbd, 0x5c, 0xc6, 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e, 0x5b, 0xc1, 0x72, 0xe8, 0x9, 0x93, 0x20, 0xba, 0x49, 0xd3, 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e, 0xbf, 0x25, 0x96, 0xc, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, 0x67, 0xfd, 0xb8, 0x22, 0x91, 0xb, 0xea, 0x70, 0xc3, 0x59, 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x2, 0x47, 0xdd, 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x5, 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, 0x69, 0xf3, 0x92, 0x8, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73, 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d, 0xee, 0x74, 0x95, 0xf, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0, 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, 0x43, 0xd9, 0x9c, 0x6, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d, 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53, 0xe0, 0x7a, 0x9b, 0x1, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68, 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, 0x4, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f, 0x2a, 0xb0, 0x3, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb, 0x58, 0xc2, 0x23, 0xb9, 0xa, 0x90, 0xd5, 0x4f, 0xfc, 0x66, 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0xd, 0x97, 0x76, 0xec, 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61},
+ {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5, 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, 0x94, 0xf, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c, 0x26, 0xbd, 0xd, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54, 0xe4, 0x7f, 0x99, 0x2, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3, 0x35, 0xae, 0x1e, 0x85, 0x9, 0x92, 0x22, 0xb9, 0x5f, 0xc4, 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43, 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b, 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x6, 0x83, 0x18, 0xa8, 0x33, 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x4, 0x9f, 0x79, 0xe2, 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0xb, 0xbb, 0x20, 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89, 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0xe, 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x1, 0x9a, 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d, 0x98, 0x3, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf, 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d, 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, 0xc, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd, 0xb7, 0x2c, 0x9c, 0x7, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5, 0x75, 0xee, 0x8, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42, 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0xa, 0xba, 0x21, 0xc7, 0x5c, 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb, 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3, 0x53, 0xc8, 0x2e, 0xb5, 0x5, 0x9e},
+ {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6, 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6, 0x4f, 0xd3, 0x20, 0xbc, 0x5, 0x99, 0xfe, 0x62, 0xdb, 0x47, 0xb4, 0x28, 0x91, 0xd, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38, 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x2, 0xbb, 0x27, 0x40, 0xdc, 0x65, 0xf9, 0xa, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58, 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x7, 0xf4, 0x68, 0xd1, 0x4d, 0x2a, 0xb6, 0xf, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17, 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6, 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0xc, 0xff, 0x63, 0xda, 0x46, 0x21, 0xbd, 0x4, 0x98, 0x6b, 0xf7, 0x4e, 0xd2, 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88, 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66, 0x95, 0x9, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x1, 0x9d, 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19, 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd, 0x44, 0xd8, 0x2b, 0xb7, 0xe, 0x92, 0xf5, 0x69, 0xd0, 0x4c, 0xbf, 0x23, 0x9a, 0x6, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33, 0xb, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x3, 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87, 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, 0xc5, 0x59},
+ {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8, 0x2, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x3, 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x4, 0x99, 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x5, 0xbf, 0x22, 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x6, 0x9b, 0xf3, 0x6e, 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x7, 0x94, 0x9, 0xb3, 0x2e, 0xda, 0x47, 0xfd, 0x60, 0x8, 0x95, 0x2f, 0xb2, 0x46, 0xdb, 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0xb, 0xff, 0x62, 0xd8, 0x45, 0x2d, 0xb0, 0xa, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43, 0xf9, 0x64, 0x90, 0xd, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8, 0xc, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, 0x92, 0xf, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0xe, 0x93, 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34, 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa, 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b, 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7, 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c, 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9, 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85, 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51, 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a, 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6},
+ {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, 0x76, 0xe8, 0x91, 0xf, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c, 0x2a, 0xb4, 0xb, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30, 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80, 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x4, 0xf9, 0x67, 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9, 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0xc, 0xb3, 0x2d, 0x41, 0xdf, 0x60, 0xfe, 0x3, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a, 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x7, 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x8, 0x96, 0xef, 0x71, 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17, 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0xd, 0x93, 0x6e, 0xf0, 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x2, 0xff, 0x61, 0xde, 0x40, 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c, 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x6, 0x98, 0x27, 0xb9, 0x44, 0xda, 0x65, 0xfb, 0x97, 0x9, 0xb6, 0x28, 0xd5, 0x4b, 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee, 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x1, 0x78, 0xe6, 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56, 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, 0xe, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0xa, 0xb5, 0x2b, 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d, 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8, 0x5, 0x9b, 0x24, 0xba},
+ {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76, 0x5, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16, 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0xa, 0x95, 0x29, 0xb6, 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, 0xe3, 0x7c, 0xf, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5, 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b, 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x7, 0xbb, 0x24, 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, 0x74, 0xeb, 0x9d, 0x2, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67, 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0xd, 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7, 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x8, 0xb4, 0x2b, 0xd1, 0x4e, 0xf2, 0x6d, 0x28, 0xb7, 0xb, 0x94, 0x6e, 0xf1, 0x4d, 0xd2, 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2, 0xe, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d, 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x1, 0x9e, 0x64, 0xfb, 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54, 0x27, 0xb8, 0x4, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34, 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80, 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0xc, 0xf6, 0x69, 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3, 0xb5, 0x2a, 0x96, 0x9, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9, 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x6, 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x3, 0xf9, 0x66, 0xda, 0x45},
+ {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f, 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x1, 0xa1, 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x3, 0xa3, 0x5e, 0xfe, 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb, 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x2, 0xa2, 0x5f, 0xff, 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, 0x88, 0x28, 0x6, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41, 0xbd, 0x1d, 0xe0, 0x40, 0x7, 0xa7, 0x5a, 0xfa, 0xd4, 0x74, 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b, 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x5, 0xa5, 0x58, 0xf8, 0x4, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43, 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e, 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a, 0xd, 0xad, 0x50, 0xf0, 0xc, 0xac, 0x51, 0xf1, 0xb6, 0x16, 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22, 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0xe, 0xae, 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48, 0xf, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0xb, 0xab, 0x56, 0xf6, 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3, 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0xa, 0xaa, 0x57, 0xf7, 0xb0, 0x10, 0xed, 0x4d, 0x8, 0xa8, 0x55, 0xf5, 0xb2, 0x12, 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26, 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13, 0xee, 0x4e, 0x9, 0xa9, 0x54, 0xf4},
+ {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82, 0xa3, 0x2, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38, 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x6, 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x4, 0xa5, 0xe5, 0x44, 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a, 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef, 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0xe, 0xed, 0x4c, 0xb2, 0x13, 0x53, 0xf2, 0xc, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x8, 0xa9, 0x57, 0xf6, 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5, 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb, 0xab, 0xa, 0xf4, 0x55, 0x5e, 0xff, 0x1, 0xa0, 0xe0, 0x41, 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f, 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c, 0xa2, 0x3, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39, 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x7, 0xf9, 0x58, 0x18, 0xb9, 0x47, 0xe6, 0x5, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45, 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0, 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee, 0xae, 0xf, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0xd, 0xac, 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92, 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x9, 0xa8, 0x89, 0x28, 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4, 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, 0xaa, 0xb},
+ {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50, 0xab, 0x9, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70, 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x2, 0x4b, 0xe9, 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9, 0xb, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0, 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14, 0x5d, 0xff, 0x4, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0xd, 0xf6, 0x54, 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4, 0x4f, 0xed, 0xa4, 0x6, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94, 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, 0xf, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4, 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18, 0xe3, 0x41, 0x8, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca, 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, 0xa3, 0x1, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73, 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0xa, 0xc8, 0x6a, 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a, 0x3, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, 0xc7, 0x65, 0x55, 0xf7, 0xc, 0xae, 0xe7, 0x45, 0xbe, 0x1c, 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x5, 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e, 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0xe, 0xf5, 0x57, 0x1e, 0xbc, 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e, 0x5e, 0xfc, 0x7, 0xa5, 0xec, 0x4e, 0xb5, 0x17},
+ {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, 0xf, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd, 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0xb, 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3, 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, 0xa7, 0x4, 0xaf, 0xc, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1, 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee, 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x3, 0x3c, 0x9f, 0x67, 0xc4, 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, 0x9b, 0x38, 0x7, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49, 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46, 0xbe, 0x1d, 0x53, 0xf0, 0x8, 0xab, 0x43, 0xe0, 0x18, 0xbb, 0xf5, 0x56, 0xae, 0xd, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, 0xdf, 0x7c, 0xa1, 0x2, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef, 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39, 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13, 0x5d, 0xfe, 0x6, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, 0x95, 0x36, 0x9, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47, 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x1, 0xa2, 0x9d, 0x3e, 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0xe, 0xad, 0x55, 0xf6, 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b, 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0xa, 0xd7, 0x74, 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x5, 0xfd, 0x5e, 0x10, 0xb3, 0x4b, 0xe8},
+ {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12, 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f, 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8, 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, 0x8f, 0x2b, 0xab, 0xf, 0xfe, 0x5a, 0x1, 0xa5, 0x54, 0xf0, 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6, 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca, 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, 0x1f, 0xbb, 0xa9, 0xd, 0xfc, 0x58, 0x3, 0xa7, 0x56, 0xf2, 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x2, 0xa6, 0x57, 0xf3, 0xa8, 0xc, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28, 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf, 0xad, 0x9, 0xf8, 0x5c, 0x7, 0xa3, 0x52, 0xf6, 0x76, 0xd2, 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce, 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf, 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x6, 0xa2, 0x53, 0xf7, 0xac, 0x8, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67, 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, 0x20, 0x84, 0x4, 0xa0, 0x51, 0xf5, 0xae, 0xa, 0xfb, 0x5f, 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0xb, 0xfa, 0x5e, 0x5, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17, 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f},
+ {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70, 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45, 0x58, 0xfd, 0xf, 0xaa, 0xf6, 0x53, 0xa1, 0x4, 0x9b, 0x3e, 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28, 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f, 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54, 0xa6, 0x3, 0x5f, 0xfa, 0x8, 0xad, 0x2b, 0x8e, 0x7c, 0xd9, 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, 0x93, 0x36, 0xa9, 0xc, 0xfe, 0x5b, 0x7, 0xa2, 0x50, 0xf5, 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1, 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7, 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, 0x1f, 0xba, 0xa7, 0x2, 0xf0, 0x55, 0x9, 0xac, 0x5e, 0xfb, 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99, 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0xd, 0x51, 0xf4, 0x6, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, 0x47, 0xe2, 0x56, 0xf3, 0x1, 0xa4, 0xf8, 0x5d, 0xaf, 0xa, 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71, 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67, 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, 0xb6, 0x13, 0xe, 0xab, 0x59, 0xfc, 0xa0, 0x5, 0xf7, 0x52, 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29, 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0},
+ {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7, 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf, 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7, 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7, 0xf2, 0x54, 0xa3, 0x5, 0x50, 0xf6, 0x1, 0xa7, 0xab, 0xd, 0xfa, 0x5c, 0x9, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7, 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde, 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f, 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97, 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0xe, 0x5b, 0xfd, 0xa, 0xac, 0xa0, 0x6, 0xf1, 0x57, 0x2, 0xa4, 0x53, 0xf5, 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4, 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77, 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67, 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0xb, 0xad, 0x5a, 0xfc, 0xa9, 0xf, 0xf8, 0x5e, 0x52, 0xf4, 0x3, 0xa5, 0xf0, 0x56, 0xa1, 0x7, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5, 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d, 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37, 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, 0x6a, 0xcc},
+ {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3, 0xa2, 0x5, 0xf1, 0x56, 0x4, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x7, 0x55, 0xf2, 0x6, 0xa1, 0x59, 0xfe, 0xa, 0xad, 0xff, 0x58, 0xac, 0xb, 0x8, 0xaf, 0x5b, 0xfc, 0xae, 0x9, 0xfd, 0x5a, 0xfb, 0x5c, 0xa8, 0xf, 0x5d, 0xfa, 0xe, 0xa9, 0xaa, 0xd, 0xf9, 0x5e, 0xc, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46, 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42, 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c, 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e, 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a, 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f, 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f, 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72, 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25, 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27, 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8, 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f, 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66, 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62, 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33},
+ {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x4, 0xa4, 0xc, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25, 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13, 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, 0x8, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a, 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x3, 0x7, 0xaf, 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb, 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0xf, 0xa7, 0xa3, 0xb, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22, 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x2, 0xe7, 0x4f, 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87, 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x6, 0xae, 0xe, 0xa6, 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2, 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0xa, 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e, 0x9b, 0x33, 0x4c, 0xe4, 0x1, 0xa9, 0xad, 0x5, 0xe0, 0x48, 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24, 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0xd, 0x9, 0xa1, 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5, 0xba, 0x12, 0xf7, 0x5f},
+ {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59, 0x42, 0xeb, 0xd, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca, 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62, 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0xc, 0xea, 0x43, 0x3b, 0x92, 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe, 0xe7, 0x4e, 0xa8, 0x1, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc, 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2, 0xaa, 0x3, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0xe, 0x91, 0x38, 0xde, 0x77, 0xf, 0xa6, 0x40, 0xe9, 0xb0, 0x19, 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35, 0x4d, 0xe4, 0x2, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52, 0xb, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1, 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x6, 0xaf, 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x7, 0xe1, 0x48, 0x30, 0x99, 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7, 0xec, 0x45, 0xa3, 0xa, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64, 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9, 0xa1, 0x8, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x5, 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12, 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c, 0x4, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x9, 0xa0},
+ {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91, 0xe0, 0x4a, 0xa9, 0x3, 0x4b, 0xe1, 0x2, 0xa8, 0xd9, 0x73, 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x7, 0x76, 0xdc, 0x3f, 0x95, 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x6, 0xac, 0x96, 0x3c, 0xdf, 0x75, 0x4, 0xae, 0x4d, 0xe7, 0xaf, 0x5, 0xe6, 0x4c, 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, 0xe, 0xa4, 0xec, 0x46, 0xa5, 0xf, 0x7e, 0xd4, 0x37, 0x9d, 0xa7, 0xd, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34, 0xd7, 0x7d, 0xc, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2, 0xa3, 0x9, 0xea, 0x40, 0x8, 0xa2, 0x41, 0xeb, 0x9a, 0x30, 0xd3, 0x79, 0x43, 0xe9, 0xa, 0xa0, 0xd1, 0x7b, 0x98, 0x32, 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0xb, 0xb7, 0x1d, 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d, 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d, 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0, 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2, 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13, 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba, 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca, 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce, 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67, 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc},
+ {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14, 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f, 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15, 0x63, 0xc8, 0x28, 0x83, 0xa6, 0xd, 0xed, 0x46, 0x30, 0x9b, 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x1, 0xaa, 0x4a, 0xe1, 0x95, 0x3e, 0xde, 0x75, 0x3, 0xa8, 0x48, 0xe3, 0xa4, 0xf, 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17, 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27, 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98, 0x78, 0xd3, 0xa5, 0xe, 0xee, 0x45, 0x2, 0xa9, 0x49, 0xe2, 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0xa, 0xea, 0x41, 0x6, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70, 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf, 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13, 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x7, 0xac, 0x4c, 0xe7, 0xa0, 0xb, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x9, 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73, 0x5, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87, 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc, 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x4, 0xaf, 0x4f, 0xe4, 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x8, 0xe8, 0x43},
+ {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe, 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2, 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x2, 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0xb, 0xe2, 0x4e, 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93, 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0xd, 0xa1, 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x4, 0xa8, 0xcb, 0x67, 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39, 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0, 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0xf, 0x65, 0xc9, 0x20, 0x8c, 0xef, 0x43, 0xaa, 0x6, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14, 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35, 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b, 0x8, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x1, 0xad, 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7, 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0xa, 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x3, 0xea, 0x46, 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2, 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x5, 0xa9, 0xc3, 0x6f, 0x86, 0x2a, 0x49, 0xe5, 0xc, 0xa0, 0xfc, 0x50, 0xb9, 0x15, 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, 0x3a, 0x96, 0xee, 0x42, 0xab, 0x7, 0x64, 0xc8, 0x21, 0x8d, 0xe7, 0x4b, 0xa2, 0xe, 0x6d, 0xc1, 0x28, 0x84},
+ {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65, 0x2, 0xaf, 0x45, 0xe8, 0x8c, 0x21, 0xcb, 0x66, 0x3, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67, 0x4, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x5, 0xa8, 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x6, 0xab, 0x41, 0xec, 0x88, 0x25, 0xcf, 0x62, 0x7, 0xaa, 0x40, 0xed, 0x89, 0x24, 0xce, 0x63, 0x8, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c, 0x9, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0xa, 0xa7, 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0xb, 0xa6, 0x4c, 0xe1, 0x85, 0x28, 0xc2, 0x6f, 0xc, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, 0xc5, 0x68, 0xd, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69, 0xe, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0xf, 0xa2, 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa, 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76, 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9, 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff, 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73, 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4, 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0, 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78, 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3, 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5, 0x91, 0x3c, 0xd6, 0x7b},
+ {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74, 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85, 0x6a, 0xc4, 0xa9, 0x7, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b, 0xe6, 0x48, 0xa7, 0x9, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b, 0x4f, 0xe1, 0xe, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66, 0x89, 0x27, 0x4a, 0xe4, 0xb, 0xa5, 0xd1, 0x7f, 0x90, 0x3e, 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0xc, 0x61, 0xcf, 0x20, 0x8e, 0xac, 0x2, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b, 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71, 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x5, 0xab, 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0xf, 0xa1, 0x4e, 0xe0, 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11, 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x8, 0xe7, 0x49, 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x6, 0x6b, 0xc5, 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d, 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c, 0x83, 0x2d, 0x40, 0xee, 0x1, 0xaf, 0x45, 0xeb, 0x4, 0xaa, 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a, 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x3, 0x21, 0x8f, 0x60, 0xce, 0xa3, 0xd, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7, 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, 0xd0, 0x7e, 0xa, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67},
+ {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce, 0xa4, 0xb, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, 0xf6, 0x59, 0x44, 0xeb, 0x7, 0xa8, 0xc2, 0x6d, 0x81, 0x2e, 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9, 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0xc, 0x77, 0xd8, 0x34, 0x9b, 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0xe, 0xa1, 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3, 0xaa, 0x5, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14, 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20, 0x4a, 0xe5, 0x9, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, 0x18, 0xb7, 0xee, 0x41, 0xad, 0x2, 0x68, 0xc7, 0x2b, 0x84, 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0xd, 0xa2, 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0, 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x6, 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54, 0x49, 0xe6, 0xa, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7, 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87, 0xed, 0x42, 0xae, 0x1, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x3, 0xac, 0x40, 0xef, 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x8, 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a, 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, 0x4, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba, 0xe3, 0x4c, 0xa0, 0xf, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d, 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98},
+ {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x2, 0x35, 0x85, 0x48, 0xf8, 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33, 0xfe, 0x4e, 0x79, 0xc9, 0x4, 0xb4, 0x6a, 0xda, 0x17, 0xa7, 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x6, 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92, 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42, 0x8f, 0x3f, 0x8, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19, 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, 0xba, 0xa, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf, 0x71, 0xc1, 0xc, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7, 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0xe, 0xc3, 0x73, 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, 0xb1, 0x1, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8, 0xf9, 0x49, 0x84, 0x34, 0x3, 0xb3, 0x7e, 0xce, 0x10, 0xa0, 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x5, 0xc8, 0x78, 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, 0xdb, 0x6b, 0x7a, 0xca, 0x7, 0xb7, 0x80, 0x30, 0xfd, 0x4d, 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d, 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x9, 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, 0x65, 0xd5, 0xb, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c, 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7, 0x3a, 0x8a, 0xbd, 0xd, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac, 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, 0xf, 0xbf},
+ {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e, 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0, 0xc0, 0x71, 0xbf, 0xe, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0xd, 0xc3, 0x72, 0x7c, 0xcd, 0x3, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c, 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95, 0xa5, 0x14, 0xda, 0x6b, 0xba, 0xb, 0xc5, 0x74, 0x44, 0xf5, 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x5, 0xb4, 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49, 0x87, 0x36, 0x6, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7, 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x8, 0x38, 0x89, 0x47, 0xf6, 0xb6, 0x7, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6, 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7, 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, 0x9, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25, 0xf4, 0x45, 0x8b, 0x3a, 0xa, 0xbb, 0x75, 0xc4, 0xca, 0x7b, 0xb5, 0x4, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5, 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, 0x6c, 0xdd, 0xc, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c, 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x2, 0xd3, 0x62, 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80, 0xb0, 0x1, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1, 0x70, 0xc1, 0xf, 0xbe, 0x8e, 0x3f, 0xf1, 0x40},
+ {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f, 0xc3, 0x71, 0xba, 0x8, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88, 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x3, 0x2c, 0x9e, 0x55, 0xe7, 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2, 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6, 0xd, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46, 0x7f, 0xcd, 0x6, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98, 0xb7, 0x5, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc, 0x37, 0x85, 0xbc, 0xe, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0, 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0xf, 0x36, 0x84, 0x4f, 0xfd, 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x4, 0xe8, 0x5a, 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda, 0xe3, 0x51, 0x9a, 0x28, 0x7, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0xc, 0xbe, 0x75, 0xc7, 0xb0, 0x2, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb, 0x30, 0x82, 0xbb, 0x9, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94, 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, 0x2d, 0x9f, 0x73, 0xc1, 0xa, 0xb8, 0x81, 0x33, 0xf8, 0x4a, 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x1, 0xb3, 0x9c, 0x2e, 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae, 0x97, 0x25, 0xee, 0x5c},
+ {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf, 0xff, 0x4c, 0x84, 0x37, 0x9, 0xba, 0x72, 0xc1, 0xe, 0xbd, 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b, 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22, 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68, 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2, 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb, 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x6, 0xc9, 0x7a, 0xb2, 0x1, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0xf, 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, 0xbb, 0x8, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95, 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7, 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d, 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0xd, 0xc5, 0x76, 0xb9, 0xa, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87, 0xb7, 0x4, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5, 0x3d, 0x8e, 0xb0, 0x3, 0xcb, 0x78, 0x70, 0xc3, 0xb, 0xb8, 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, 0xc, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x2, 0xb1, 0x7e, 0xcd, 0x5, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20, 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa, 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3},
+ {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e, 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, 0xd9, 0x6d, 0x3, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28, 0xca, 0x7e, 0xbf, 0xb, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38, 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84, 0xaf, 0x1b, 0xda, 0x6e, 0x6, 0xb2, 0x73, 0xc7, 0xec, 0x58, 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0xe, 0x25, 0x91, 0x50, 0xe4, 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4, 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x5, 0xb1, 0x70, 0xc4, 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0xd, 0x26, 0x92, 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1, 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0xc, 0xb8, 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x4, 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61, 0xf, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72, 0xb3, 0x7, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41, 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, 0xd6, 0x62, 0xa, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21, 0xc3, 0x77, 0xb6, 0x2, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31, 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d, 0xa6, 0x12, 0xd3, 0x67, 0x9, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x1, 0x2a, 0x9e, 0x5f, 0xeb, 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb, 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64},
+ {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x6, 0xb3, 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x5, 0xc7, 0x72, 0x23, 0x96, 0x54, 0xe1, 0xcd, 0x78, 0xba, 0xf, 0xe2, 0x57, 0x95, 0x20, 0xc, 0xb9, 0x7b, 0xce, 0xbc, 0x9, 0xcb, 0x7e, 0x52, 0xe7, 0x25, 0x90, 0x7d, 0xc8, 0xa, 0xbf, 0x93, 0x26, 0xe4, 0x51, 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32, 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b, 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49, 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f, 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9, 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61, 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67, 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d, 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c, 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f, 0xbd, 0x8, 0x24, 0x91, 0x53, 0xe6, 0xb, 0xbe, 0x7c, 0xc9, 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0xe, 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0xd, 0xb8, 0xe9, 0x5c, 0x9e, 0x2b, 0x7, 0xb2, 0x70, 0xc5, 0x28, 0x9d, 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x4, 0x76, 0xc3, 0x1, 0xb4, 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x2, 0xc0, 0x75, 0x59, 0xec, 0x2e, 0x9b},
+ {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19, 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x7, 0xb1, 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x9, 0xbf, 0xec, 0x5a, 0x9d, 0x2b, 0xe, 0xb8, 0x7f, 0xc9, 0x35, 0x83, 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41, 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0xb, 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0xc, 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73, 0xb4, 0x2, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb, 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, 0xf9, 0x4f, 0xb3, 0x5, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96, 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e, 0xb9, 0xf, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x8, 0xcf, 0x79, 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x6, 0xc1, 0x77, 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b, 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3, 0xc6, 0x70, 0xb7, 0x1, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, 0x4, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b, 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57, 0x90, 0x26, 0x3, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13, 0x36, 0x80, 0x47, 0xf1, 0xd, 0xbb, 0x7c, 0xca, 0xef, 0x59, 0x9e, 0x28, 0x7b, 0xcd, 0xa, 0xbc, 0x99, 0x2f, 0xe8, 0x5e, 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87},
+ {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x8, 0xcc, 0x7b, 0x59, 0xee, 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c, 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x5, 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18, 0x3a, 0x8d, 0x49, 0xfe, 0xd, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x2, 0x20, 0x97, 0x53, 0xe4, 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce, 0xa, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c, 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, 0x30, 0x87, 0x74, 0xc3, 0x7, 0xb0, 0x92, 0x25, 0xe1, 0x56, 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c, 0xb8, 0xf, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55, 0x77, 0xc0, 0x4, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0xc, 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45, 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7, 0xc5, 0x72, 0xb6, 0x1, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x9, 0xbe, 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x6, 0xc2, 0x75, 0x86, 0x31, 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c, 0xe, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16, 0xe5, 0x52, 0x96, 0x21, 0x3, 0xb4, 0x70, 0xc7, 0x8b, 0x3c, 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e, 0xbc, 0xb, 0xcf, 0x78},
+ {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6, 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e, 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b, 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x4, 0xb, 0xb3, 0x66, 0xde, 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99, 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d, 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x3, 0xbb, 0xb4, 0xc, 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8, 0xbf, 0x7, 0xd2, 0x6a, 0x65, 0xdd, 0x8, 0xb0, 0x16, 0xae, 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25, 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d, 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0xd, 0xb5, 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x2, 0xa4, 0x1c, 0xc9, 0x71, 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x9, 0x6, 0xbe, 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a, 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82, 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0xe, 0xb6, 0xb9, 0x1, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23, 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45, 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81, 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0xa, 0xdf, 0x67, 0x68, 0xd0, 0x5, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14},
+ {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89, 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, 0x4f, 0xf6, 0xbe, 0x7, 0xd1, 0x68, 0x60, 0xd9, 0xf, 0xb6, 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58, 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96, 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0xe, 0xb7, 0xbf, 0x6, 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8, 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26, 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x9, 0x1, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88, 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b, 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0xc, 0xb5, 0xbd, 0x4, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34, 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64, 0xb2, 0xb, 0x3, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5, 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab, 0x2, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0xa, 0xfc, 0x45, 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b, 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, 0xac, 0x15, 0xbc, 0x5, 0xd3, 0x6a, 0x62, 0xdb, 0xd, 0xb4, 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a, 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb},
+ {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8, 0x6f, 0xd5, 0x6, 0xbc, 0xbd, 0x7, 0xd4, 0x6e, 0xd6, 0x6c, 0xbf, 0x5, 0x4, 0xbe, 0x6d, 0xd7, 0xde, 0x64, 0xb7, 0xd, 0xc, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0xe, 0xb4, 0xb5, 0xf, 0xdc, 0x66, 0xb1, 0xb, 0xd8, 0x62, 0x63, 0xd9, 0xa, 0xb0, 0x8, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x9, 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2, 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d, 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e, 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa, 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a, 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7, 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33, 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52, 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef, 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44, 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94, 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29, 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23, 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c, 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, 0x4d, 0xf7},
+ {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7, 0x7f, 0xc4, 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e, 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49, 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b, 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31, 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98, 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4, 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e, 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0xb, 0xb0, 0xb6, 0xd, 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x1, 0x7, 0xbc, 0x6c, 0xd7, 0xdf, 0x64, 0xb4, 0xf, 0x9, 0xb2, 0x62, 0xd9, 0x6e, 0xd5, 0x5, 0xbe, 0xb8, 0x3, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70, 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27, 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5, 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f, 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89, 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b, 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10, 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, 0xcc, 0x77, 0xbf, 0x4, 0xd4, 0x6f, 0x69, 0xd2, 0x2, 0xb9, 0xe, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x8},
+ {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a, 0xf, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95, 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0xd, 0x97, 0x2b, 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8, 0xdb, 0x67, 0xbe, 0x2, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f, 0xb5, 0x9, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f, 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x6, 0xdf, 0x63, 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x4, 0xb8, 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18, 0xc1, 0x7d, 0x6e, 0xd2, 0xb, 0xb7, 0x78, 0xc4, 0x1d, 0xa1, 0xb2, 0xe, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x1, 0xd8, 0x64, 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda, 0x3, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36, 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0xc, 0xb0, 0xa3, 0x1f, 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3, 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71, 0xa8, 0x14, 0x7, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92, 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x8, 0xb4, 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49, 0xd3, 0x6f, 0xb6, 0xa, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9, 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x5, 0x16, 0xaa, 0x73, 0xcf},
+ {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95, 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0xb, 0x9e, 0x23, 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4, 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x2, 0xd8, 0x65, 0x71, 0xcc, 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35, 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x9, 0xb4, 0x7c, 0xc1, 0x1b, 0xa6, 0xb2, 0xf, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27, 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x4, 0xb9, 0xad, 0x10, 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6, 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e, 0xa4, 0x19, 0xd, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87, 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x6, 0x12, 0xaf, 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec, 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0xa, 0xd0, 0x6d, 0xe7, 0x5a, 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x1, 0xbc, 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x8, 0xb5, 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53, 0xd9, 0x64, 0xbe, 0x3, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5, 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e, 0x4a, 0xf7, 0x2d, 0x90, 0x5, 0xb8, 0x62, 0xdf, 0xcb, 0x76, 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f, 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0xe, 0xba, 0x7, 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1, 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, 0xc, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30},
+ {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0, 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x8, 0xd7, 0x69, 0x74, 0xca, 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43, 0xc7, 0x79, 0xa6, 0x18, 0x5, 0xbb, 0x64, 0xda, 0x71, 0xcf, 0x10, 0xae, 0xb3, 0xd, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37, 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x2, 0xdd, 0x63, 0x7e, 0xc0, 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38, 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0xa, 0xb4, 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d, 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x7, 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0xf, 0xb1, 0x6e, 0xd0, 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb, 0x4, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23, 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0xc, 0x11, 0xaf, 0x70, 0xce, 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c, 0xc3, 0x7d, 0x60, 0xde, 0x1, 0xbf, 0x14, 0xaa, 0x75, 0xcb, 0xd6, 0x68, 0xb7, 0x9, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x6, 0x1b, 0xa5, 0x7a, 0xc4, 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48, 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0xe, 0xb0, 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x3, 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0xb, 0xb5, 0x31, 0x8f, 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c},
+ {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25, 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0xb, 0xb4, 0x7e, 0xc1, 0x1d, 0xa2, 0xb8, 0x7, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33, 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0xc, 0x16, 0xa9, 0x75, 0xca, 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2, 0xe, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f, 0x5, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98, 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x9, 0xbd, 0x2, 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0, 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0xd, 0xd1, 0x6e, 0xda, 0x65, 0xb9, 0x6, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4, 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47, 0x5d, 0xe2, 0x3e, 0x81, 0xa, 0xb5, 0x69, 0xd6, 0xcc, 0x73, 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x1, 0xbe, 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6, 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x3, 0x88, 0x37, 0xeb, 0x54, 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, 0x83, 0x3c, 0xb7, 0x8, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad, 0x67, 0xd8, 0x4, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49, 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84, 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0xf, 0xb0, 0x6c, 0xd3},
+ {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c, 0x1, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f, 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x2, 0xc2, 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11, 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x3, 0xc3, 0xf7, 0x37, 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17, 0x6d, 0xad, 0xf0, 0x30, 0x4, 0xc4, 0x99, 0x59, 0x23, 0xe3, 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac, 0x98, 0x58, 0x5, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf, 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c, 0x6, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x7, 0xc7, 0x94, 0x54, 0x9, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a, 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x8, 0xc8, 0x95, 0x55, 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0xb, 0xcb, 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed, 0xb0, 0x70, 0xa, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e, 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, 0x64, 0xa4, 0x90, 0x50, 0xd, 0xcd, 0xb7, 0x77, 0x2a, 0xea, 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0xc, 0xcc, 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6, 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, 0xf, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d, 0x29, 0xe9, 0xb4, 0x74, 0xe, 0xce, 0x93, 0x53},
+ {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, 0x30, 0xf1, 0xca, 0xb, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7, 0x5, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82, 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7, 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0xe, 0x50, 0x91, 0xec, 0x2d, 0x73, 0xb2, 0xa, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77, 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47, 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x1, 0x5f, 0x9e, 0xe3, 0x22, 0x7c, 0xbd, 0xf, 0xce, 0x90, 0x51, 0x2c, 0xed, 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34, 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x4, 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a, 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0xc, 0x71, 0xb0, 0xee, 0x2f, 0x98, 0x59, 0x7, 0xc6, 0xbb, 0x7a, 0x24, 0xe5, 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0, 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x9, 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x2, 0xc3, 0xbe, 0x7f, 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6, 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99, 0xc7, 0x6, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0xd, 0xcc, 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66, 0x5d, 0x9c, 0xc2, 0x3, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56, 0x8, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f, 0xf2, 0x33, 0x6d, 0xac},
+ {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a, 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0xa, 0xc8, 0xe2, 0x20, 0x7b, 0xb9, 0xcd, 0xf, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e, 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad, 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x8, 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0xd, 0xcf, 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, 0xc0, 0x2, 0x28, 0xea, 0xb1, 0x73, 0x7, 0xc5, 0x9e, 0x5c, 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33, 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48, 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd, 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7, 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30, 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x1, 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x4, 0xc6, 0x50, 0x92, 0xc9, 0xb, 0x7f, 0xbd, 0xe6, 0x24, 0xe, 0xcc, 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18, 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b, 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4, 0xbf, 0x7d, 0x9, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23, 0x57, 0x95, 0xce, 0xc, 0x9a, 0x58, 0x3, 0xc1, 0xb5, 0x77, 0x2c, 0xee, 0xc4, 0x6, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0},
+ {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4, 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36, 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a, 0x72, 0xb1, 0xc2, 0x1, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7, 0x94, 0x57, 0xf, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf, 0x26, 0xe5, 0xbd, 0x7e, 0xd, 0xce, 0x96, 0x55, 0x70, 0xb3, 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x3, 0xcf, 0xc, 0x54, 0x97, 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x2, 0xc1, 0xb2, 0x71, 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10, 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x9, 0xca, 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x7, 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80, 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9, 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8, 0xcb, 0x8, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, 0x6, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0, 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec, 0xb4, 0x77, 0x4, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21, 0x52, 0x91, 0xc9, 0xa, 0xc6, 0x5, 0x5d, 0x9e, 0xed, 0x2e, 0x76, 0xb5, 0x90, 0x53, 0xb, 0xc8, 0xbb, 0x78, 0x20, 0xe3, 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff, 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f},
+ {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba, 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61, 0x30, 0xf4, 0x92, 0x56, 0x7, 0xc3, 0xcb, 0xf, 0x5e, 0x9a, 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71, 0x57, 0x93, 0xc2, 0x6, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd, 0xac, 0x68, 0xe, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda, 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x1, 0x50, 0x94, 0x9c, 0x58, 0x9, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea, 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11, 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, 0xc, 0xc8, 0xc0, 0x4, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6, 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8, 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0xb, 0xcf, 0x9e, 0x5a, 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, 0xc7, 0x3, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1, 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d, 0x6c, 0xa8, 0xce, 0xa, 0x5b, 0x9f, 0x97, 0x53, 0x2, 0xc6, 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d, 0x5c, 0x98, 0xc9, 0xd, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6, 0xa7, 0x63, 0x5, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1, 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, 0x4c, 0x88},
+ {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7, 0xcc, 0x9, 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8, 0x99, 0x5c, 0xe, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82, 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea, 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45, 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba, 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57, 0x5, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6, 0xc7, 0x2, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0xc, 0x6d, 0xa8, 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0xb, 0xce, 0x9c, 0x59, 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d, 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0, 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0xf, 0xca, 0xcd, 0x8, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2, 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x6, 0x1, 0xc4, 0x96, 0x53, 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0xa, 0xcf, 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0xd, 0x6c, 0xa9, 0xfb, 0x3e, 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x3, 0x51, 0x94, 0x93, 0x56, 0x4, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee, 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11, 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77},
+ {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x5, 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea, 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d, 0xa, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e, 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, 0xc9, 0xf, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf, 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x7, 0xc1, 0x2b, 0xed, 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x2, 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0xd, 0xcb, 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24, 0xce, 0x8, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76, 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4, 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0xb, 0x5c, 0x9a, 0xf2, 0x34, 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27, 0x31, 0xf7, 0xa0, 0x66, 0xe, 0xc8, 0x9f, 0x59, 0x56, 0x90, 0xc7, 0x1, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f, 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, 0x4, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc, 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0xc, 0x1a, 0xdc, 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x9, 0xcf, 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9, 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x6, 0x51, 0x97, 0x7d, 0xbb, 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x3, 0xc5, 0x92, 0x54, 0x3c, 0xfa, 0xad, 0x6b},
+ {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19, 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d, 0x9, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x2, 0x56, 0x91, 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46, 0x5f, 0x98, 0xcc, 0xb, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50, 0x4, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5, 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, 0xd3, 0x14, 0xd, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62, 0x52, 0x95, 0xc1, 0x6, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3, 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea, 0x85, 0x42, 0x16, 0xd1, 0xc8, 0xf, 0x5b, 0x9c, 0xf3, 0x34, 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x8, 0xcf, 0x9b, 0x5c, 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18, 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd, 0x92, 0x55, 0x1, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0xa, 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef, 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab, 0xff, 0x38, 0x57, 0x90, 0xc4, 0x3, 0xa4, 0x63, 0x37, 0xf0, 0x9f, 0x58, 0xc, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27, 0x3e, 0xf9, 0xad, 0x6a, 0x5, 0xc2, 0x96, 0x51, 0x61, 0xa6, 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0xe, 0x17, 0xd0, 0x84, 0x43, 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x7, 0x53, 0x94},
+ {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59, 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a, 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec, 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f, 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c, 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa, 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0xd, 0x4f, 0x87, 0xc2, 0xa, 0x46, 0x8e, 0xcb, 0x3, 0x41, 0x89, 0xcc, 0x4, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16, 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28, 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab, 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0, 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e, 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0xc, 0x49, 0x81, 0xc3, 0xb, 0x4e, 0x86, 0xca, 0x2, 0x47, 0x8f, 0xcd, 0x5, 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2, 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44, 0x1, 0xc9, 0x8b, 0x43, 0x6, 0xce, 0x82, 0x4a, 0xf, 0xc7, 0x85, 0x4d, 0x8, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4, 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72, 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8},
+ {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43, 0xc, 0xc5, 0x83, 0x4a, 0xf, 0xc6, 0x80, 0x49, 0xa, 0xc3, 0x85, 0x4c, 0x9, 0xc0, 0x86, 0x4f, 0x18, 0xd1, 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58, 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57, 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff, 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a, 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d, 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed, 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64, 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23, 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3, 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e, 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31, 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99, 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10, 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f, 0x48, 0x81, 0xc7, 0xe, 0x4b, 0x82, 0xc4, 0xd, 0x4e, 0x87, 0xc1, 0x8, 0x4d, 0x84, 0xc2, 0xb, 0x44, 0x8d, 0xcb, 0x2, 0x47, 0x8e, 0xc8, 0x1, 0x42, 0x8b, 0xcd, 0x4, 0x41, 0x88, 0xce, 0x7},
+ {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6, 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61, 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a, 0x44, 0x8e, 0xcd, 0x7, 0x4b, 0x81, 0xc2, 0x8, 0x5a, 0x90, 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3, 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, 0x68, 0xa2, 0xcc, 0x6, 0x45, 0x8f, 0xc3, 0x9, 0x4a, 0x80, 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42, 0x1, 0xcb, 0x87, 0x4d, 0xe, 0xc4, 0x96, 0x5c, 0x1f, 0xd5, 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6, 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29, 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0xb, 0x48, 0x82, 0xce, 0x4, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, 0x59, 0x93, 0x85, 0x4f, 0xc, 0xc6, 0x8a, 0x40, 0x3, 0xc9, 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73, 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4, 0xa8, 0x62, 0x21, 0xeb, 0xd, 0xc7, 0x84, 0x4e, 0x2, 0xc8, 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f, 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5, 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36, 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, 0xed, 0x27, 0x49, 0x83, 0xc0, 0xa, 0x46, 0x8c, 0xcf, 0x5, 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b},
+ {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71, 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85, 0xc5, 0xe, 0x45, 0x8e, 0xce, 0x5, 0x74, 0xbf, 0xff, 0x34, 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb, 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57, 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x1, 0xca, 0x81, 0x4a, 0xa, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5, 0xc4, 0xf, 0x4f, 0x84, 0xcf, 0x4, 0x44, 0x8f, 0xd2, 0x19, 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d, 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a, 0x47, 0x8c, 0xcc, 0x7, 0x4c, 0x87, 0xc7, 0xc, 0x25, 0xee, 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73, 0x38, 0xf3, 0xb3, 0x78, 0x9, 0xc2, 0x82, 0x49, 0x2, 0xc9, 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54, 0xcd, 0x6, 0x46, 0x8d, 0xc6, 0xd, 0x4d, 0x86, 0xdb, 0x10, 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1, 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde, 0x83, 0x48, 0x8, 0xc3, 0x88, 0x43, 0x3, 0xc8, 0xb9, 0x72, 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef, 0xa4, 0x6f, 0x2f, 0xe4},
+ {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70, 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x2, 0x72, 0xbe, 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1, 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, 0x4, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba, 0xca, 0x6, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1, 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0xa, 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x8, 0xc4, 0x8d, 0x41, 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0xe, 0x47, 0x8b, 0xfb, 0x37, 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0xc, 0xc0, 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x1, 0x48, 0x84, 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a, 0x3, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1, 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x5, 0x5b, 0x97, 0xde, 0x12, 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc, 0x95, 0x59, 0x7, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe, 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, 0xb, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5, 0xc5, 0x9, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0xf, 0xc3, 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68, 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, 0xc1, 0xd, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23},
+ {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x6, 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0xc, 0xc1, 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19, 0x53, 0x9e, 0xc7, 0xa, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8, 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, 0xb9, 0x74, 0xb, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52, 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a, 0xc0, 0xd, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff, 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, 0x7, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0, 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x1, 0x4b, 0x86, 0x5a, 0x97, 0xdd, 0x10, 0x49, 0x84, 0xce, 0x3, 0x7c, 0xb1, 0xfb, 0x36, 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x5, 0xc8, 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69, 0xc2, 0xf, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29, 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x9, 0xc4, 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e, 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x8, 0x3b, 0xf6, 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57, 0xe, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, 0x7b, 0xb6, 0xc9, 0x4, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90, 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48, 0x2, 0xcf, 0x96, 0x5b, 0x11, 0xdc},
+ {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c, 0x42, 0x8c, 0xc3, 0xd, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36, 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x8, 0x47, 0x89, 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x5, 0xcb, 0x9b, 0x55, 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea, 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d, 0x52, 0x9c, 0xcc, 0x2, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde, 0x8e, 0x40, 0xf, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0xa, 0xc4, 0x8b, 0x45, 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7, 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18, 0x48, 0x86, 0xc9, 0x7, 0xc7, 0x9, 0x46, 0x88, 0xd8, 0x16, 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9, 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b, 0x4, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70, 0x20, 0xee, 0xa1, 0x6f, 0x1, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0xc, 0x5c, 0x92, 0xdd, 0x13, 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4, 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b, 0xb, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, 0xc8, 0x6, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38, 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x3, 0x4c, 0x82, 0xec, 0x22, 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1, 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, 0xe, 0xc0},
+ {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3, 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16, 0x41, 0x8e, 0xc2, 0xd, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0xc, 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9, 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d, 0x1, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1, 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, 0x3, 0xcc, 0xc1, 0xe, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96, 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba, 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0xf, 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x2, 0xcd, 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78, 0x47, 0x88, 0xc4, 0xb, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe, 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x6, 0xc9, 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x7, 0xc8, 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c, 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0xa, 0x46, 0x89, 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x8, 0x44, 0x8b, 0x86, 0x49, 0x5, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f, 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e, 0x29, 0xe6, 0xaa, 0x65, 0x4, 0xcb, 0x87, 0x48, 0x1f, 0xd0, 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x9, 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f},
+ {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45, 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x1, 0x6c, 0xbc, 0xb6, 0x66, 0xb, 0xdb, 0x9e, 0x4e, 0x23, 0xf3, 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34, 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f, 0x2, 0xd2, 0xd8, 0x8, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c, 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5, 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe, 0xd3, 0x3, 0x9, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11, 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0xf, 0xdf, 0xd5, 0x5, 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7, 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3, 0xde, 0xe, 0x4, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0, 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26, 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c, 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae, 0xa4, 0x74, 0x19, 0xc9, 0xd, 0xdd, 0xb0, 0x60, 0x6a, 0xba, 0xd7, 0x7, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57, 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0xc, 0x61, 0xb1, 0xbb, 0x6b, 0x6, 0xd6, 0x12, 0xc2, 0xaf, 0x7f, 0x75, 0xa5, 0xc8, 0x18},
+ {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb, 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86, 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51, 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3, 0x68, 0xb9, 0xd7, 0x6, 0xb, 0xda, 0xb4, 0x65, 0x7e, 0xaf, 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x7, 0xd6, 0xdb, 0xa, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24, 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56, 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x1, 0x6f, 0xbe, 0xb3, 0x62, 0xc, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1, 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc, 0xd2, 0x3, 0xe, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5, 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, 0x1f, 0xce, 0x5, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x8, 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45, 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec, 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e, 0xd5, 0x4, 0x6a, 0xbb, 0xb6, 0x67, 0x9, 0xd8, 0xbd, 0x6c, 0x2, 0xd3, 0xde, 0xf, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15, 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7},
+ {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda, 0xa1, 0x73, 0x18, 0xca, 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b, 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c, 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b, 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x7, 0xd5, 0xd1, 0x3, 0x68, 0xba, 0x60, 0xb2, 0xd9, 0xb, 0xf, 0xdd, 0xb6, 0x64, 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13, 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a, 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44, 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3, 0xd8, 0xa, 0xe, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x6, 0xd4, 0xd0, 0x2, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a, 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32, 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4, 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, 0x97, 0x45, 0xdf, 0xd, 0x66, 0xb4, 0xb0, 0x62, 0x9, 0xdb, 0x1, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x5, 0x7e, 0xac, 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb, 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a, 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d, 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb},
+ {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5, 0xb1, 0x62, 0xa, 0xd9, 0xda, 0x9, 0x61, 0xb2, 0x67, 0xb4, 0xdc, 0xf, 0xc, 0xdf, 0xb7, 0x64, 0x7f, 0xac, 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1, 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b, 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb, 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27, 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82, 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3, 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e, 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34, 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55, 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6, 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c, 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc, 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1, 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b, 0x60, 0xb3, 0xdb, 0x8, 0xb, 0xd8, 0xb0, 0x63, 0xb6, 0x65, 0xd, 0xde, 0xdd, 0xe, 0x66, 0xb5, 0xd1, 0x2, 0x6a, 0xb9, 0xba, 0x69, 0x1, 0xd2, 0x7, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, 0xd7, 0x4},
+ {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15, 0x74, 0xa0, 0xb6, 0x62, 0x3, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e, 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x6, 0xd2, 0xb3, 0x67, 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64, 0x5, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42, 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, 0xf, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4, 0xc, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68, 0x9, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33, 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0xa, 0xde, 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85, 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c, 0x1d, 0xc9, 0xdf, 0xb, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6, 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x8, 0x1e, 0xca, 0xab, 0x7f, 0xd9, 0xd, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf, 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc, 0xad, 0x79, 0x6f, 0xbb, 0xda, 0xe, 0xf6, 0x22, 0x43, 0x97, 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x4, 0x12, 0xc6, 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d, 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x7, 0x66, 0xb2, 0x4a, 0x9e, 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b, 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, 0xd6, 0x2, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d, 0xd5, 0x1, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3},
+ {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x4, 0x66, 0xb3, 0xa2, 0x77, 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26, 0xbf, 0x6a, 0x8, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c, 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0xc, 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x1, 0x10, 0xc5, 0xa7, 0x72, 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67, 0x5, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36, 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x9, 0x6b, 0xbe, 0xaf, 0x7a, 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b, 0xd, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e, 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4, 0xb5, 0x60, 0x2, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x6, 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac, 0xce, 0x1b, 0xa, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd, 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0xe, 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f, 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x3, 0x61, 0xb4, 0x43, 0x96, 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16, 0x7, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0xb, 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e, 0x7c, 0xa9, 0xb8, 0x6d, 0xf, 0xda, 0x2d, 0xf8, 0x9a, 0x4f, 0x5e, 0x8b, 0xe9, 0x3c},
+ {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6, 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9, 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x7, 0xdf, 0x9, 0x6e, 0xb8, 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26, 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0xe, 0xd8, 0xa3, 0x75, 0x12, 0xc4, 0xdc, 0xa, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a, 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, 0x8c, 0x5a, 0xbc, 0x6a, 0xd, 0xdb, 0xc3, 0x15, 0x72, 0xa4, 0x7c, 0xaa, 0xcd, 0x1b, 0x3, 0xd5, 0xb2, 0x64, 0x82, 0x54, 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa, 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x4, 0x1c, 0xca, 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43, 0xa5, 0x73, 0x14, 0xc2, 0xda, 0xc, 0x6b, 0xbd, 0xba, 0x6c, 0xb, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23, 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x5, 0xd3, 0xb4, 0x62, 0x65, 0xb3, 0xd4, 0x2, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d, 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f, 0x87, 0x51, 0x36, 0xe0, 0x6, 0xd0, 0xb7, 0x61, 0x79, 0xaf, 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x1, 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1, 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0xf, 0x68, 0xbe, 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, 0x8, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20},
+ {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95, 0x8a, 0x5d, 0x39, 0xee, 0x7, 0xd0, 0xb4, 0x63, 0x7c, 0xab, 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0, 0x9, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0xe, 0xd9, 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c, 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0xa, 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0xd, 0xe4, 0x33, 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78, 0x67, 0xb0, 0xd4, 0x3, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2, 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x4, 0xdb, 0xc, 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49, 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, 0xe2, 0x35, 0xdc, 0xb, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3, 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x5, 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x2, 0x66, 0xb1, 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27, 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x6, 0xd1, 0xc9, 0x1e, 0x7a, 0xad, 0xb2, 0x65, 0x1, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b, 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, 0xf, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e, 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17, 0x73, 0xa4, 0xbb, 0x6c, 0x8, 0xdf},
+ {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc, 0x1, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33, 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x2, 0xda, 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9, 0xcb, 0x13, 0x66, 0xbe, 0x3, 0xdb, 0xae, 0x76, 0x44, 0x9c, 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf, 0x4, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52, 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x5, 0xdd, 0xa8, 0x70, 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, 0x61, 0xb9, 0x6, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34, 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x7, 0xdf, 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc, 0xce, 0x16, 0x63, 0xbb, 0x8, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4, 0x9, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f, 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0xa, 0xd2, 0xa7, 0x7f, 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, 0x6e, 0xb6, 0xb, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39, 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0xc, 0xd4, 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7, 0xc5, 0x1d, 0x68, 0xb0, 0xd, 0xd5, 0xa0, 0x78, 0x4a, 0x92, 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1, 0xe, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58, 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0xf, 0xd7, 0xa2, 0x7a, 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, 0x6b, 0xb3},
+ {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8, 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1, 0xd4, 0xd, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, 0xce, 0x17, 0xa4, 0x7d, 0xb, 0xd2, 0xe7, 0x3e, 0x48, 0x91, 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x6, 0xb5, 0x6c, 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32, 0x7, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60, 0xd3, 0xa, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf, 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96, 0xa3, 0x7a, 0xc, 0xd5, 0x77, 0xae, 0xd8, 0x1, 0x34, 0xed, 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4, 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0xe, 0xd7, 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef, 0xda, 0x3, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, 0xf3, 0x2a, 0xaa, 0x73, 0x5, 0xdc, 0xe9, 0x30, 0x46, 0x9f, 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62, 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b, 0x7e, 0xa7, 0xd1, 0x8, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x9, 0xd0, 0xa6, 0x7f, 0xdd, 0x4, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82, 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98, 0xad, 0x74, 0x2, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca, 0x79, 0xa0, 0xd6, 0xf, 0x3a, 0xe3, 0x95, 0x4c},
+ {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83, 0x42, 0x98, 0xeb, 0x31, 0xd, 0xd7, 0xa4, 0x7e, 0xdc, 0x6, 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10, 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8, 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f, 0xc, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48, 0x74, 0xae, 0xdd, 0x7, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64, 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x1, 0xdb, 0x79, 0xa3, 0xd0, 0xa, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66, 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x8, 0xaa, 0x70, 0x3, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d, 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba, 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x5, 0x39, 0xe3, 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0xe, 0xd4, 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x4, 0x77, 0xad, 0xf, 0xd5, 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3, 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, 0xc8, 0x12, 0xd3, 0x9, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef, 0x4d, 0x97, 0xe4, 0x3e, 0x2, 0xd8, 0xab, 0x71, 0xf2, 0x28, 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f, 0x23, 0xf9, 0x8a, 0x50},
+ {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad, 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0xa, 0xa7, 0x7c, 0xc, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12, 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68, 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f, 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22, 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58, 0xa6, 0x7d, 0xd, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb, 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0xb, 0x97, 0x4c, 0x3c, 0xe7, 0xdc, 0x7, 0x77, 0xac, 0x1, 0xda, 0xaa, 0x71, 0x4a, 0x91, 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x5, 0x75, 0xae, 0x3, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f, 0xf, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42, 0x79, 0xa2, 0xd2, 0x9, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a, 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b, 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21, 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b, 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8, 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x8, 0xa5, 0x7e, 0xe, 0xd5, 0xee, 0x35, 0x45, 0x9e, 0x2, 0xd9, 0xa9, 0x72, 0x49, 0x92, 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x4, 0x74, 0xaf},
+ {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38, 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x9, 0x70, 0xac, 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x2, 0xc3, 0x1f, 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14, 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99, 0x58, 0x84, 0xfd, 0x21, 0xf, 0xd3, 0xaa, 0x76, 0xf6, 0x2a, 0x53, 0x8f, 0xa1, 0x7d, 0x4, 0xd8, 0x9b, 0x47, 0x3e, 0xe2, 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, 0xc7, 0x1b, 0xda, 0x6, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4, 0x74, 0xa8, 0xd1, 0xd, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee, 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5, 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0xa, 0x24, 0xf8, 0x81, 0x5d, 0xdd, 0x1, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3, 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2, 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88, 0xa6, 0x7a, 0x3, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x8, 0xd4, 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x5, 0x85, 0x59, 0x20, 0xfc, 0xd2, 0xe, 0x77, 0xab, 0x6a, 0xb6, 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd, 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0xc, 0xd0, 0xfe, 0x22, 0x5b, 0x87, 0x7, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29, 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a, 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68},
+ {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x2, 0xdf, 0xa5, 0x78, 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x3, 0xde, 0xa2, 0x7f, 0x5, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x4, 0xd9, 0xa3, 0x7e, 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, 0x7, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x6, 0xdb, 0xa1, 0x7c, 0x59, 0x84, 0xfe, 0x23, 0xa, 0xd7, 0xad, 0x70, 0xff, 0x22, 0x58, 0x85, 0xac, 0x71, 0xb, 0xd6, 0x8, 0xd5, 0xaf, 0x72, 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x9, 0xd4, 0xfd, 0x20, 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0xf, 0xd2, 0x5d, 0x80, 0xfa, 0x27, 0xe, 0xd3, 0xa9, 0x74, 0xaa, 0x77, 0xd, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0xc, 0xd1, 0xab, 0x76, 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d, 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98, 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a, 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68, 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36, 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37, 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35, 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32, 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62, 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, 0x4a, 0x97},
+ {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf, 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x1, 0x7e, 0xa0, 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x3, 0x23, 0xfd, 0x82, 0x5c, 0xa3, 0x7d, 0x2, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3, 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6, 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, 0xd9, 0x7, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x6, 0xd8, 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85, 0xfa, 0x24, 0x4, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a, 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x5, 0x7a, 0xa4, 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0xf, 0x91, 0x4f, 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31, 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, 0xe, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd, 0x53, 0x8d, 0xf2, 0x2c, 0xc, 0xd2, 0xad, 0x73, 0x8c, 0x52, 0x2d, 0xf3, 0xd3, 0xd, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d, 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, 0x48, 0x96, 0x8, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28, 0xd7, 0x9, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7, 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0xb, 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35, 0xab, 0x75, 0xa, 0xd4, 0xf4, 0x2a, 0x55, 0x8b},
+ {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0xd, 0x2a, 0xf5, 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0, 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b, 0xf7, 0x28, 0xf, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef, 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, 0xdd, 0x2, 0xd9, 0x6, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe, 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77, 0xb, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62, 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x9, 0x75, 0xaa, 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23, 0x5f, 0x80, 0xa7, 0x78, 0x4, 0xdb, 0xaf, 0x70, 0xc, 0xd3, 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, 0xe1, 0x3e, 0xde, 0x1, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9, 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92, 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87, 0xa0, 0x7f, 0x3, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0xe, 0x72, 0xad, 0x76, 0xa9, 0xd5, 0xa, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f, 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x7, 0xd8, 0xa4, 0x7b, 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3, 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x5, 0xe5, 0x3a, 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f, 0x8, 0xd7, 0xab, 0x74},
+ {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9, 0xa6, 0x46, 0x7b, 0x9b, 0x1, 0xe1, 0xdc, 0x3c, 0xf5, 0x15, 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c, 0xf6, 0x16, 0x2b, 0xcb, 0x2, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d, 0xa4, 0x44, 0x79, 0x99, 0x3, 0xe3, 0xde, 0x3e, 0xa2, 0x42, 0x7f, 0x9f, 0x5, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc, 0x56, 0xb6, 0x8b, 0x6b, 0x4, 0xe4, 0xd9, 0x39, 0xa3, 0x43, 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd, 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40, 0x7d, 0x9d, 0x7, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68, 0xf2, 0x12, 0x2f, 0xcf, 0x6, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3, 0xa, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f, 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91, 0xb, 0xeb, 0xd6, 0x36, 0x8, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1, 0xae, 0x4e, 0x73, 0x93, 0x9, 0xe9, 0xd4, 0x34, 0xfd, 0x1d, 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6, 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0xf, 0xef, 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7, 0xe, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a, 0x77, 0x97, 0xd, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4, 0x5e, 0xbe, 0x83, 0x63, 0xc, 0xec, 0xd1, 0x31, 0xab, 0x4b, 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5},
+ {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88, 0x15, 0xf4, 0xca, 0x2b, 0xed, 0xc, 0x32, 0xd3, 0x4e, 0xaf, 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0xd, 0xec, 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26, 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2, 0x3f, 0xde, 0xe0, 0x1, 0xe2, 0x3, 0x3d, 0xdc, 0x41, 0xa0, 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24, 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0xf, 0xee, 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad, 0x30, 0xd1, 0xef, 0xe, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8, 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x2, 0xe3, 0xd9, 0x38, 0x6, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc, 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0xa, 0x97, 0x76, 0x48, 0xa9, 0xa8, 0x49, 0x77, 0x96, 0xb, 0xea, 0xd4, 0x35, 0xf3, 0x12, 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20, 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x7, 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x5, 0x98, 0x79, 0x47, 0xa6, 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c, 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x9, 0xe8, 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x8, 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c, 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46, 0x78, 0x99, 0x4, 0xe5, 0xdb, 0x3a},
+ {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12, 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3, 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69, 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, 0xe1, 0x3, 0xd4, 0x36, 0xd, 0xef, 0x7b, 0x99, 0xa2, 0x40, 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83, 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f, 0xb, 0xe9, 0xd2, 0x30, 0xe7, 0x5, 0x3e, 0xdc, 0x48, 0xaa, 0x91, 0x73, 0x33, 0xd1, 0xea, 0x8, 0x9c, 0x7e, 0x45, 0xa7, 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x6, 0xe4, 0xb5, 0x57, 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd, 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x9, 0x32, 0xd0, 0x7, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93, 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63, 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e, 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, 0x60, 0x82, 0xd3, 0x31, 0xa, 0xe8, 0x7c, 0x9e, 0xa5, 0x47, 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x4, 0x66, 0x84, 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e, 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x2, 0x39, 0xdb, 0x4f, 0xad, 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0xc, 0xee, 0xd5, 0x37, 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x1, 0xe3, 0x34, 0xd6, 0xed, 0xf, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca, 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, 0xc4, 0x26},
+ {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8, 0x96, 0x75, 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x5, 0xdd, 0x3e, 0x6, 0xe5, 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x9, 0x9a, 0x79, 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0xa, 0xe9, 0xa7, 0x44, 0x7c, 0x9f, 0xc, 0xef, 0xd7, 0x34, 0xec, 0xf, 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a, 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67, 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0, 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20, 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d, 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c, 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a, 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66, 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80, 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10, 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0xd, 0xee, 0xd6, 0x35, 0xed, 0xe, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e, 0x30, 0xd3, 0xeb, 0x8, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98, 0xa0, 0x43, 0xd0, 0x33, 0xb, 0xe8, 0x97, 0x74, 0x4c, 0xaf, 0x3c, 0xdf, 0xe7, 0x4, 0xdc, 0x3f, 0x7, 0xe4, 0x77, 0x94, 0xac, 0x4f, 0x1, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92, 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x2, 0x3a, 0xd9},
+ {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x2, 0x33, 0xd7, 0x51, 0xb5, 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13, 0xd1, 0x35, 0x4, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46, 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x6, 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x8, 0xec, 0xdd, 0x39, 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd, 0x8c, 0x68, 0xee, 0xa, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b, 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, 0xc, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b, 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0xe, 0xfb, 0x1f, 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52, 0xd4, 0x30, 0x1, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x3, 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56, 0x67, 0x83, 0x5, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0, 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x7, 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1, 0xdc, 0x38, 0x9, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b, 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0xb, 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, 0x2b, 0xcf, 0xd, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b, 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0xf, 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9, 0x2f, 0xcb, 0xfa, 0x1e},
+ {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa, 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68, 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0xc, 0xf1, 0x14, 0x26, 0xc3, 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, 0xee, 0xb, 0x7, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86, 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a, 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6, 0x37, 0xd2, 0xe0, 0x5, 0x9, 0xec, 0xde, 0x3b, 0xba, 0x5f, 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3, 0xe, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90, 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca, 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, 0xe7, 0x2, 0xe3, 0x6, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62, 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0, 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c, 0xdd, 0x38, 0xa, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0xd, 0xe8, 0xe4, 0x1, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a, 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e, 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, 0x3, 0xe6, 0xea, 0xf, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b, 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x8, 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4, 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x4, 0xe1},
+ {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb, 0xc6, 0x20, 0x17, 0xf1, 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19, 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1, 0x86, 0x60, 0xe8, 0xe, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x3, 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x8, 0x80, 0x66, 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x5, 0x32, 0xd4, 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c, 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99, 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x6, 0xe0, 0xb, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98, 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a, 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x7, 0xe1, 0xd6, 0x30, 0xdb, 0x3d, 0xa, 0xec, 0x64, 0x82, 0xb5, 0x53, 0xef, 0x9, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a, 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x4, 0x29, 0xcf, 0xf8, 0x1e, 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9, 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61, 0x56, 0xb0, 0x38, 0xde, 0xe9, 0xf, 0xe4, 0x2, 0x35, 0xd3, 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x1, 0xe7, 0x6f, 0x89, 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0xc, 0xea, 0xdd, 0x3b, 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93, 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd},
+ {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4, 0xd6, 0x31, 0x5, 0xe2, 0x6d, 0x8a, 0xbe, 0x59, 0xbd, 0x5a, 0x6e, 0x89, 0x6, 0xe1, 0xd5, 0x32, 0xb1, 0x56, 0x62, 0x85, 0xa, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x9, 0xee, 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, 0xf, 0xe8, 0xc, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83, 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3, 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d, 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41, 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff, 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47, 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a, 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4, 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b, 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16, 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66, 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0xe, 0xea, 0xd, 0x39, 0xde, 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0xb, 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x8, 0x87, 0x60, 0x54, 0xb3, 0x30, 0xd7, 0xe3, 0x4, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc, 0x88, 0x6f, 0xe0, 0x7, 0x33, 0xd4, 0xe6, 0x1, 0x35, 0xd2, 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, 0xe5, 0x2},
+ {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce, 0xeb, 0x3, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10, 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, 0x6, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd, 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x5, 0x20, 0xc8, 0x79, 0x91, 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd, 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0xc, 0xe4, 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c, 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0xf, 0xd4, 0x3c, 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0xa, 0xe2, 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, 0xb8, 0x50, 0xe1, 0x9, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43, 0x2d, 0xc5, 0xe0, 0x8, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6, 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0xb, 0xe3, 0xc6, 0x2e, 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0xe, 0x2b, 0xc3, 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf, 0x8a, 0x62, 0xc0, 0x28, 0xd, 0xe5, 0x54, 0xbc, 0x99, 0x71, 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x4, 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68, 0x4d, 0xa5, 0x7, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc, 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x2, 0x27, 0xcf, 0x6d, 0x85, 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d, 0xcc, 0x24, 0x1, 0xe9, 0x4b, 0xa3, 0x86, 0x6e},
+ {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0xb, 0xae, 0x47, 0x61, 0x88, 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x6, 0x20, 0xc9, 0x77, 0x9e, 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c, 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, 0xd, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d, 0xc3, 0x2a, 0xc, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x7, 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3, 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0xa, 0x82, 0x6b, 0x4d, 0xa4, 0x1, 0xe8, 0xce, 0x27, 0x99, 0x70, 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b, 0x2e, 0xc7, 0xe1, 0x8, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e, 0x80, 0x69, 0x4f, 0xa6, 0x3, 0xea, 0xcc, 0x25, 0xc1, 0x28, 0xe, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc, 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, 0xbb, 0x52, 0xec, 0x5, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49, 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87, 0xa1, 0x48, 0xed, 0x4, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65, 0xc0, 0x29, 0xf, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc, 0x2, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6, 0xe0, 0x9, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12, 0xb7, 0x5e, 0x78, 0x91},
+ {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf, 0x6, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x5, 0xef, 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0xc, 0xe6, 0xc5, 0x2f, 0x83, 0x69, 0x4a, 0xa0, 0xf, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, 0x49, 0xa3, 0xa, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6, 0x9, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2, 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38, 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1, 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd, 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31, 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c, 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc, 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16, 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93, 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3, 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0xb, 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x8, 0xa4, 0x4e, 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0xd, 0xa1, 0x4b, 0x68, 0x82, 0x2d, 0xc7, 0xe4, 0xe, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce, 0xed, 0x7, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x4, 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x1, 0xad, 0x47, 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x2, 0xae, 0x44, 0x67, 0x8d},
+ {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36, 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0xc, 0xa7, 0x4c, 0x6c, 0x87, 0x27, 0xcc, 0xec, 0x7, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1, 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11, 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8, 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0xe, 0xe5, 0x45, 0xae, 0x8e, 0x65, 0xce, 0x25, 0x5, 0xee, 0x74, 0x9f, 0xbf, 0x54, 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x2, 0x22, 0xc9, 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x9, 0x29, 0xc2, 0xb0, 0x5b, 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b, 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, 0xe6, 0xd, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x6, 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c, 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa, 0x1, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0xa, 0xe1, 0xc1, 0x2a, 0xe8, 0x3, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43, 0xe3, 0x8, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15, 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5, 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0xf, 0xe4, 0x4f, 0xa4, 0x84, 0x6f, 0xcf, 0x24, 0x4, 0xef, 0x44, 0xaf, 0x8f, 0x64, 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32, 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72},
+ {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8, 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x7, 0xeb, 0xcc, 0x20, 0x9, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6, 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0xe, 0xe2, 0xcb, 0x27, 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a, 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x8, 0xe3, 0xf, 0x26, 0xca, 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7, 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x1, 0x28, 0xc4, 0x2f, 0xc3, 0xea, 0x6, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35, 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0xd, 0xb3, 0x5f, 0x76, 0x9a, 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0xa, 0x23, 0xcf, 0x42, 0xae, 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2, 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x4, 0x2d, 0xc1, 0x7f, 0x93, 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x3, 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e, 0x57, 0xbb, 0x5, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88, 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, 0xa6, 0x4a, 0xc7, 0x2b, 0x2, 0xee, 0x50, 0xbc, 0x95, 0x79, 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0xc, 0xe0, 0x6d, 0x81, 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11, 0xaf, 0x43, 0x6a, 0x86, 0xb, 0xe7, 0xce, 0x22, 0x9c, 0x70, 0x59, 0xb5},
+ {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b, 0xb1, 0x5c, 0xe5, 0x8, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67, 0xde, 0x33, 0x19, 0xf4, 0xec, 0x1, 0x2b, 0xc6, 0x7f, 0x92, 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e, 0x9a, 0x77, 0x5d, 0xb0, 0x9, 0xe4, 0xce, 0x23, 0xa1, 0x4c, 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x2, 0xef, 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0xa, 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4, 0xee, 0x3, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38, 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, 0xb, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd, 0x97, 0x7a, 0x50, 0xbd, 0x4, 0xe9, 0xc3, 0x2e, 0xac, 0x41, 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0xc, 0x26, 0xcb, 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x5, 0x2f, 0xc2, 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0xd, 0xe0, 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c, 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, 0x6, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0, 0x24, 0xc9, 0xe3, 0xe, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2, 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94, 0x2d, 0xc0, 0xea, 0x7, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, 0xd1, 0x3c, 0xc8, 0x25, 0xf, 0xe2, 0x5b, 0xb6, 0x9c, 0x71, 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a},
+ {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5, 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41, 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0xb, 0xe5, 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x7, 0x28, 0xc6, 0x76, 0x98, 0xb7, 0x59, 0x5, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5, 0x26, 0xc8, 0xe7, 0x9, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad, 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f, 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a, 0xcf, 0x21, 0xe, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x2, 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0xa, 0xe4, 0xcb, 0x25, 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x6, 0xb6, 0x58, 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc, 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68, 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a, 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x1, 0xef, 0x5f, 0xb1, 0x9e, 0x70, 0xe3, 0xd, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53, 0xf, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2, 0xed, 0x3, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66, 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33, 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b, 0x4, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x8, 0x27, 0xc9, 0x79, 0x97, 0xb8, 0x56},
+ {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c, 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0xe, 0xe1, 0x7d, 0x92, 0xbe, 0x51, 0xe6, 0x9, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80, 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d, 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa, 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42, 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f, 0xe9, 0x6, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d, 0x1, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93, 0x24, 0xcb, 0xe7, 0x8, 0x94, 0x7b, 0x57, 0xb8, 0xf, 0xe0, 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d, 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33, 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb, 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0xa, 0xbd, 0x52, 0x7e, 0x91, 0xd, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba, 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x4, 0x28, 0xc7, 0x5b, 0xb4, 0x98, 0x77, 0xc0, 0x2f, 0x3, 0xec, 0xcf, 0x20, 0xc, 0xe3, 0x54, 0xbb, 0x97, 0x78, 0xe4, 0xb, 0x27, 0xc8, 0x7f, 0x90, 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x2, 0xed, 0xc1, 0x2e, 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x5, 0x63, 0x8c, 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64, 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9},
+ {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6, 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81, 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20, 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x3, 0xf3, 0xfe, 0xe, 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, 0xcc, 0x3c, 0x5, 0xf5, 0xf8, 0x8, 0xe2, 0x12, 0x1f, 0xef, 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e, 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0, 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, 0x74, 0x84, 0x6, 0xf6, 0xfb, 0xb, 0xe1, 0x11, 0x1c, 0xec, 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41, 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f, 0x85, 0x75, 0x78, 0x88, 0xa, 0xfa, 0xf7, 0x7, 0xed, 0x1d, 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33, 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x9, 0xf9, 0xf4, 0x4, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c, 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d, 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c, 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0xf, 0xff, 0xf2, 0x2, 0xe8, 0x18, 0x15, 0xe5, 0xc, 0xfc, 0xf1, 0x1, 0xeb, 0x1b, 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35, 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94, 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e},
+ {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46, 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba, 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e, 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x3, 0xf2, 0xfc, 0xd, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6, 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc, 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33, 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x5, 0xf4, 0xfa, 0xb, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30, 0x6, 0xf7, 0xf9, 0x8, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87, 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3, 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x9, 0xf8, 0xf6, 0x7, 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0, 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74, 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c, 0xa, 0xfb, 0xf5, 0x4, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56, 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72, 0x9f, 0x6e, 0x60, 0x91, 0xc, 0xfd, 0xf3, 0x2, 0xef, 0x1e, 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a, 0xec, 0x1d, 0x13, 0xe2, 0xf, 0xfe, 0xf0, 0x1, 0x37, 0xc6, 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49, 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, 0x80, 0x71},
+ {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69, 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53, 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x7, 0xf5, 0xfe, 0xc, 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81, 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d, 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29, 0xe, 0xfc, 0xf7, 0x5, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f, 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5, 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x9, 0xfb, 0xf0, 0x2, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1, 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d, 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c, 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x6, 0xd, 0xff, 0x1b, 0xe9, 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63, 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee, 0xe5, 0x17, 0xf3, 0x1, 0xa, 0xf8, 0xdf, 0x2d, 0x26, 0xd4, 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x8, 0x3, 0xf1, 0x15, 0xe7, 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd, 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50, 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda, 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0xf, 0x4, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae, 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d},
+ {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3, 0xb, 0xf8, 0xf0, 0x3, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33, 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88, 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0xe, 0x6, 0xf5, 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e, 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e, 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x5, 0xd, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35, 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae, 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24, 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0xc, 0xff, 0xf7, 0x4, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44, 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4, 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4, 0x7, 0xf4, 0xfc, 0xf, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84, 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x2, 0xa, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9, 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2, 0xfa, 0x9, 0x1, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49, 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79, 0x9a, 0x69, 0x61, 0x92},
+ {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5, 0xfb, 0xf, 0xe, 0xfa, 0xc, 0xf8, 0xf9, 0xd, 0x8, 0xfc, 0xfd, 0x9, 0xff, 0xb, 0xa, 0xfe, 0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55},
+ {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa, 0xeb, 0x1e, 0x1c, 0xe9, 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a, 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5, 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9, 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81, 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e, 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42, 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a, 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0xb, 0xfe, 0xfc, 0x9, 0xf8, 0xd, 0xf, 0xfa, 0xf0, 0x5, 0x7, 0xf2, 0x3, 0xf6, 0xf4, 0x1, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea, 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce, 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29, 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71, 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e, 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92, 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41, 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae, 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa},
+ {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23, 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d, 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f, 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b, 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e, 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97, 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18, 0x3, 0xf5, 0xf2, 0x4, 0xfc, 0xa, 0xd, 0xfb, 0x3b, 0xcd, 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf, 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d, 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58, 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a, 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e, 0x5, 0xf3, 0xf4, 0x2, 0xfa, 0xc, 0xb, 0xfd, 0xdd, 0x2b, 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39, 0xc1, 0x37, 0x30, 0xc6, 0x6, 0xf0, 0xf7, 0x1, 0xf9, 0xf, 0x8, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d, 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63, 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa, 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, 0x40, 0xb6},
+ {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c, 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24, 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f, 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c, 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0xb, 0xfc, 0xf8, 0xf, 0xf0, 0x7, 0x3, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f, 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77, 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f, 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f, 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0xa, 0xe, 0xf9, 0x6, 0xf1, 0xf5, 0x2, 0xdd, 0x2a, 0x2e, 0xd9, 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62, 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1, 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9, 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, 0x15, 0xe2, 0xf6, 0x1, 0x5, 0xf2, 0xd, 0xfa, 0xfe, 0x9, 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca, 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92, 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2, 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49},
+ {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x4, 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a, 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d, 0x8, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58, 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, 0xf4, 0xc, 0xec, 0x14, 0x1, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e, 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f, 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x5, 0x44, 0xbc, 0xa9, 0x51, 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, 0xb0, 0x48, 0x9, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb, 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca, 0xdf, 0x27, 0xf5, 0xd, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0, 0x2, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, 0x7c, 0x84, 0xfe, 0x6, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c, 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b, 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35, 0xe7, 0x1f, 0xa, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0xe, 0xdc, 0x24, 0x31, 0xc9, 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x3, 0xfb, 0xba, 0x42, 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x7, 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d, 0xcc, 0x34, 0x21, 0xd9, 0xb, 0xf3, 0xe6, 0x1e, 0x64, 0x9c, 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0xf, 0x1a, 0xe2, 0x30, 0xc8, 0xdd, 0x25},
+ {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e, 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x7, 0xfe, 0xb0, 0x49, 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40, 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0xe, 0xf7, 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8, 0xe6, 0x1f, 0x9, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55, 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21, 0xf4, 0xd, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0xa, 0xdf, 0x26, 0x30, 0xc9, 0xfa, 0x3, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98, 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7, 0x12, 0xeb, 0xfd, 0x4, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90, 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0xb, 0x6e, 0x97, 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0xc, 0x1a, 0xe3, 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x5, 0xd0, 0x29, 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d, 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x2, 0x14, 0xed, 0xa3, 0x5a, 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x6, 0xff, 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x1, 0xf8, 0xee, 0x17, 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46, 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32, 0xe7, 0x1e, 0x8, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, 0xb8, 0x41, 0xf, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda},
+ {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x8, 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x3, 0x10, 0xea, 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7, 0xc4, 0x3e, 0xe2, 0x18, 0xb, 0xf1, 0xae, 0x54, 0x47, 0xbd, 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, 0x4a, 0xb0, 0xef, 0x15, 0x6, 0xfc, 0x20, 0xda, 0xc9, 0x33, 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0xe, 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49, 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, 0xff, 0x5, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d, 0xc2, 0x38, 0x2b, 0xd1, 0xd, 0xf7, 0xe4, 0x1e, 0xd8, 0x22, 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x4, 0x5b, 0xa1, 0xb2, 0x48, 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0xc, 0xf6, 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c, 0xee, 0x14, 0x7, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97, 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0xf, 0x1c, 0xe6, 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68, 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x2, 0x11, 0xeb, 0xaf, 0x55, 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f, 0xe3, 0x19, 0xa, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, 0xa4, 0x5e, 0x1, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd, 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0, 0xf3, 0x9, 0xd5, 0x2f, 0x3c, 0xc6},
+ {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50, 0xb, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0, 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed, 0xfd, 0x6, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d, 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0xd, 0xd6, 0x2d, 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d, 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0xc, 0xf7, 0xa7, 0x5c, 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37, 0xec, 0x17, 0x7, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0xa, 0x1a, 0xe1, 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca, 0xda, 0x21, 0xfa, 0x1, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa, 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x8, 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23, 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x3, 0x4e, 0xb5, 0xa5, 0x5e, 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0xe, 0xf5, 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e, 0xce, 0x35, 0x25, 0xde, 0x5, 0xfe, 0xee, 0x15, 0x74, 0x8f, 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x4, 0x14, 0xef, 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, 0x5f, 0xa4, 0xf4, 0xf, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f, 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12, 0x2, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79, 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x9, 0xf2, 0x29, 0xd2, 0xc2, 0x39},
+ {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87, 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1, 0x1f, 0xe3, 0xfa, 0x6, 0xf6, 0xa, 0x13, 0xef, 0x21, 0xdd, 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b, 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2, 0xdb, 0x27, 0xe9, 0x15, 0xc, 0xf0, 0xf1, 0xd, 0x14, 0xe8, 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44, 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0xb, 0xf7, 0x7, 0xfb, 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad, 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x1, 0xff, 0x3, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0, 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d, 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, 0x5, 0xf9, 0x9, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7, 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e, 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8, 0x16, 0xea, 0xf3, 0xf, 0xe, 0xf2, 0xeb, 0x17, 0xd9, 0x25, 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73, 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a, 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x8, 0xf8, 0x4, 0x1d, 0xe1, 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d, 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x2, 0xfe},
+ {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x3, 0xfe, 0xe4, 0x19, 0xd6, 0x2b, 0x31, 0xcc, 0x5, 0xf8, 0xe2, 0x1f, 0x6d, 0x90, 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7, 0x6e, 0x93, 0x89, 0x74, 0x6, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78, 0xa, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27, 0x3d, 0xc0, 0x9, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b, 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0xf, 0xf2, 0xe8, 0x15, 0xc, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a, 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65, 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, 0xf0, 0xd, 0x14, 0xe9, 0xf3, 0xe, 0xc7, 0x3a, 0x20, 0xdd, 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54, 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x8, 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, 0xf6, 0xb, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0, 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x7, 0x75, 0x88, 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf, 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x4, 0xcd, 0x30, 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x2, 0xcb, 0x36, 0x2c, 0xd1, 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e, 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2, 0x1b, 0xe6, 0xfc, 0x1},
+ {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63, 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x6, 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9, 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0xa, 0xca, 0x34, 0x2b, 0xd5, 0xed, 0x13, 0xc, 0xf2, 0x32, 0xcc, 0xd3, 0x2d, 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f, 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd, 0xd, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0xb, 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49, 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x7, 0x64, 0x9a, 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83, 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, 0x1, 0xff, 0xe2, 0x1c, 0x3, 0xfd, 0x3d, 0xc3, 0xdc, 0x22, 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47, 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x5, 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, 0x6a, 0x94, 0xf7, 0x9, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37, 0xf, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52, 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c, 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, 0xe, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x8, 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb, 0xc4, 0x3a, 0xfa, 0x4, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99, 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x2, 0xfc, 0xe3, 0x1d},
+ {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57, 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x3, 0xfc, 0x3b, 0xc4, 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51, 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x5, 0xfa, 0xdd, 0x22, 0x3e, 0xc1, 0x6, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a, 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, 0x9, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d, 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e, 0x32, 0xcd, 0xa, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb, 0x7c, 0x83, 0x9f, 0x60, 0xc, 0xf3, 0xef, 0x10, 0xd7, 0x28, 0x34, 0xcb, 0xec, 0x13, 0xf, 0xf0, 0x37, 0xc8, 0xd4, 0x2b, 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d, 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5, 0x12, 0xed, 0xf1, 0xe, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0xd, 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45, 0xf4, 0xb, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0, 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3, 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x8, 0xcf, 0x30, 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94, 0xf8, 0x7, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7, 0xfb, 0x4, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf, 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, 0xfd, 0x2, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9, 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda, 0xc6, 0x39, 0xfe, 0x1, 0x1d, 0xe2}}
+
+var mulTableLow = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+ {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
+ {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e},
+ {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11},
+ {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c},
+ {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33},
+ {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22},
+ {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d},
+ {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78},
+ {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77},
+ {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66},
+ {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69},
+ {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44},
+ {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b},
+ {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a},
+ {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55},
+ {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0},
+ {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff},
+ {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee},
+ {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1},
+ {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc},
+ {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3},
+ {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2},
+ {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd},
+ {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88},
+ {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87},
+ {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96},
+ {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99},
+ {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4},
+ {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb},
+ {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa},
+ {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5},
+ {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd},
+ {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2},
+ {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3},
+ {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec},
+ {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1},
+ {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce},
+ {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf},
+ {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0},
+ {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85},
+ {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a},
+ {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b},
+ {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94},
+ {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9},
+ {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6},
+ {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7},
+ {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8},
+ {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd},
+ {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2},
+ {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13},
+ {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c},
+ {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31},
+ {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e},
+ {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f},
+ {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20},
+ {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75},
+ {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a},
+ {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b},
+ {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64},
+ {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49},
+ {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46},
+ {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57},
+ {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58},
+ {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7},
+ {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8},
+ {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9},
+ {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6},
+ {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb},
+ {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4},
+ {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5},
+ {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca},
+ {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f},
+ {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90},
+ {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81},
+ {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e},
+ {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3},
+ {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac},
+ {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd},
+ {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2},
+ {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17},
+ {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18},
+ {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9},
+ {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6},
+ {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b},
+ {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24},
+ {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35},
+ {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a},
+ {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f},
+ {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60},
+ {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71},
+ {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e},
+ {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53},
+ {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c},
+ {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d},
+ {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42},
+ {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a},
+ {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15},
+ {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4},
+ {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb},
+ {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26},
+ {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29},
+ {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38},
+ {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37},
+ {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62},
+ {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d},
+ {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c},
+ {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73},
+ {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e},
+ {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51},
+ {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40},
+ {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f},
+ {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea},
+ {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5},
+ {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4},
+ {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb},
+ {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6},
+ {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9},
+ {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8},
+ {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7},
+ {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92},
+ {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d},
+ {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c},
+ {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83},
+ {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae},
+ {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1},
+ {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0},
+ {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf},
+ {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3},
+ {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc},
+ {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd},
+ {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2},
+ {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef},
+ {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0},
+ {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1},
+ {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe},
+ {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab},
+ {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4},
+ {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5},
+ {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba},
+ {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97},
+ {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98},
+ {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89},
+ {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86},
+ {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23},
+ {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c},
+ {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d},
+ {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32},
+ {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f},
+ {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10},
+ {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1},
+ {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe},
+ {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b},
+ {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54},
+ {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45},
+ {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a},
+ {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67},
+ {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68},
+ {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79},
+ {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76},
+ {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e},
+ {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21},
+ {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30},
+ {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f},
+ {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12},
+ {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d},
+ {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc},
+ {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3},
+ {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56},
+ {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59},
+ {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48},
+ {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47},
+ {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a},
+ {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65},
+ {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74},
+ {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b},
+ {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde},
+ {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1},
+ {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0},
+ {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf},
+ {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2},
+ {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed},
+ {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc},
+ {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3},
+ {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6},
+ {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9},
+ {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8},
+ {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7},
+ {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a},
+ {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95},
+ {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84},
+ {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b},
+ {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34},
+ {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b},
+ {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a},
+ {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25},
+ {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8},
+ {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7},
+ {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16},
+ {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19},
+ {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c},
+ {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43},
+ {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52},
+ {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d},
+ {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70},
+ {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f},
+ {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e},
+ {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61},
+ {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4},
+ {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb},
+ {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda},
+ {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5},
+ {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8},
+ {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7},
+ {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6},
+ {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9},
+ {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc},
+ {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3},
+ {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2},
+ {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad},
+ {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80},
+ {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f},
+ {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e},
+ {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91},
+ {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9},
+ {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6},
+ {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7},
+ {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8},
+ {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5},
+ {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa},
+ {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb},
+ {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4},
+ {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1},
+ {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe},
+ {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf},
+ {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0},
+ {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d},
+ {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82},
+ {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93},
+ {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c},
+ {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39},
+ {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36},
+ {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27},
+ {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28},
+ {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5},
+ {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa},
+ {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b},
+ {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14},
+ {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41},
+ {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e},
+ {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f},
+ {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50},
+ {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d},
+ {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72},
+ {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63},
+ {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c}}
+var mulTableHigh = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+ {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0},
+ {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd},
+ {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd},
+ {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7},
+ {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17},
+ {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a},
+ {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea},
+ {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3},
+ {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23},
+ {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e},
+ {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde},
+ {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34},
+ {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4},
+ {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9},
+ {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39},
+ {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb},
+ {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b},
+ {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46},
+ {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6},
+ {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c},
+ {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac},
+ {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1},
+ {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51},
+ {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68},
+ {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98},
+ {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95},
+ {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65},
+ {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f},
+ {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f},
+ {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72},
+ {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82},
+ {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b},
+ {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b},
+ {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96},
+ {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66},
+ {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c},
+ {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c},
+ {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71},
+ {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81},
+ {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8},
+ {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48},
+ {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45},
+ {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5},
+ {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f},
+ {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf},
+ {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2},
+ {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52},
+ {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0},
+ {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20},
+ {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d},
+ {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd},
+ {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37},
+ {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7},
+ {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca},
+ {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a},
+ {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3},
+ {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3},
+ {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe},
+ {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe},
+ {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4},
+ {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14},
+ {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19},
+ {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9},
+ {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6},
+ {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26},
+ {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b},
+ {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb},
+ {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31},
+ {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1},
+ {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc},
+ {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c},
+ {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5},
+ {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5},
+ {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8},
+ {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8},
+ {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2},
+ {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12},
+ {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f},
+ {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef},
+ {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d},
+ {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d},
+ {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90},
+ {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60},
+ {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a},
+ {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a},
+ {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77},
+ {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87},
+ {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe},
+ {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e},
+ {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43},
+ {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3},
+ {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59},
+ {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9},
+ {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4},
+ {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54},
+ {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd},
+ {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d},
+ {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40},
+ {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0},
+ {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a},
+ {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa},
+ {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7},
+ {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57},
+ {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e},
+ {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e},
+ {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93},
+ {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63},
+ {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89},
+ {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79},
+ {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74},
+ {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84},
+ {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6},
+ {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6},
+ {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb},
+ {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb},
+ {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1},
+ {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11},
+ {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c},
+ {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec},
+ {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5},
+ {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25},
+ {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28},
+ {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8},
+ {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32},
+ {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2},
+ {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf},
+ {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f},
+ {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1},
+ {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41},
+ {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c},
+ {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc},
+ {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56},
+ {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6},
+ {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab},
+ {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b},
+ {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62},
+ {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92},
+ {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f},
+ {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f},
+ {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85},
+ {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75},
+ {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78},
+ {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88},
+ {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa},
+ {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa},
+ {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7},
+ {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7},
+ {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed},
+ {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d},
+ {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10},
+ {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0},
+ {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9},
+ {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29},
+ {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24},
+ {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4},
+ {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e},
+ {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce},
+ {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3},
+ {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33},
+ {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda},
+ {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a},
+ {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27},
+ {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7},
+ {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d},
+ {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd},
+ {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0},
+ {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30},
+ {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9},
+ {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9},
+ {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4},
+ {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4},
+ {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee},
+ {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e},
+ {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13},
+ {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3},
+ {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61},
+ {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91},
+ {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c},
+ {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c},
+ {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86},
+ {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76},
+ {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b},
+ {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b},
+ {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2},
+ {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42},
+ {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f},
+ {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf},
+ {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55},
+ {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5},
+ {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8},
+ {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58},
+ {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67},
+ {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97},
+ {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a},
+ {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a},
+ {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80},
+ {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70},
+ {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d},
+ {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d},
+ {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4},
+ {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44},
+ {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49},
+ {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9},
+ {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53},
+ {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3},
+ {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae},
+ {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e},
+ {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc},
+ {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c},
+ {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21},
+ {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1},
+ {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b},
+ {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb},
+ {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6},
+ {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36},
+ {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
+ {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff},
+ {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2},
+ {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2},
+ {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8},
+ {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18},
+ {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15},
+ {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5},
+ {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc},
+ {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc},
+ {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1},
+ {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1},
+ {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb},
+ {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b},
+ {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16},
+ {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6},
+ {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf},
+ {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f},
+ {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22},
+ {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2},
+ {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38},
+ {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8},
+ {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5},
+ {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35},
+ {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7},
+ {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47},
+ {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a},
+ {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba},
+ {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50},
+ {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0},
+ {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad},
+ {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d},
+ {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64},
+ {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94},
+ {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99},
+ {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69},
+ {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83},
+ {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73},
+ {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e},
+ {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e}}
+
+// galMultiply multiplies to elements of the field.
+// Uses lookup table ~40% faster
+func galMultiply(a, b byte) byte {
+ return mulTable[a][b]
+}
+
+// Original function:
+/*
+// galMultiply multiplies to elements of the field.
+func galMultiply(a, b byte) byte {
+ if a == 0 || b == 0 {
+ return 0
+ }
+ logA := int(logTable[a])
+ logB := int(logTable[b])
+ return expTable[logA+logB]
+}
+*/
+
+// galDivide is inverse of galMultiply.
+func galDivide(a, b byte) byte {
+ if a == 0 {
+ return 0
+ }
+ if b == 0 {
+ panic("Argument 'divisor' is 0")
+ }
+ logA := int(logTable[a])
+ logB := int(logTable[b])
+ logResult := logA - logB
+ if logResult < 0 {
+ logResult += 255
+ }
+ return expTable[logResult]
+}
+
+// Computes a**n.
+//
+// The result will be the same as multiplying a times itself n times.
+func galExp(a byte, n int) byte {
+ if n == 0 {
+ return 1
+ }
+ if a == 0 {
+ return 0
+ }
+
+ logA := logTable[a]
+ logResult := int(logA) * n
+ for logResult >= 255 {
+ logResult -= 255
+ }
+ return expTable[logResult]
+}
+
+func genAvx2Matrix(matrixRows [][]byte, inputs, outputs int, dst []byte) []byte {
+ if !avx2CodeGen {
+ panic("codegen not enabled")
+ }
+ total := inputs * outputs
+
+ // Duplicated in+out
+ wantBytes := total * 32 * 2
+ if cap(dst) < wantBytes {
+ dst = make([]byte, wantBytes)
+ } else {
+ dst = dst[:wantBytes]
+ }
+ for i, row := range matrixRows[:outputs] {
+ for j, idx := range row[:inputs] {
+ dstIdx := (j*outputs + i) * 64
+ lo := mulTableLow[idx][:]
+ hi := mulTableHigh[idx][:]
+ copy(dst[dstIdx:], lo)
+ copy(dst[dstIdx+16:], lo)
+ copy(dst[dstIdx+32:], hi)
+ copy(dst[dstIdx+48:], hi)
+ }
+ }
+ return dst
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
new file mode 100644
index 0000000..720196f
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
@@ -0,0 +1,338 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2019, Minio, Inc.
+
+package reedsolomon
+
+import (
+ "sync"
+)
+
+//go:noescape
+func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
+
+//go:noescape
+func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
+
+//go:noescape
+func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
+
+const (
+ dimIn = 8 // Number of input rows processed simultaneously
+ dimOut81 = 1 // Number of output rows processed simultaneously for x1 routine
+ dimOut82 = 2 // Number of output rows processed simultaneously for x2 routine
+ dimOut84 = 4 // Number of output rows processed simultaneously for x4 routine
+ matrixSize81 = (16 + 16) * dimIn * dimOut81 // Dimension of slice of matrix coefficient passed into x1 routine
+ matrixSize82 = (16 + 16) * dimIn * dimOut82 // Dimension of slice of matrix coefficient passed into x2 routine
+ matrixSize84 = (16 + 16) * dimIn * dimOut84 // Dimension of slice of matrix coefficient passed into x4 routine
+)
+
+// Construct block of matrix coefficients for single output row in parallel
+func setupMatrix81(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize81]byte) {
+ offset := 0
+ for c := inputOffset; c < inputOffset+dimIn; c++ {
+ for iRow := outputOffset; iRow < outputOffset+dimOut81; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ coeff := matrixRows[iRow][c]
+ copy(matrix[offset*32:], mulTableLow[coeff][:])
+ copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+ } else {
+ // coefficients not used for this input shard (so null out)
+ v := matrix[offset*32 : offset*32+32]
+ for i := range v {
+ v[i] = 0
+ }
+ }
+ offset += dimIn
+ if offset >= dimIn*dimOut81 {
+ offset -= dimIn*dimOut81 - 1
+ }
+ }
+ }
+}
+
+// Construct block of matrix coefficients for 2 output rows in parallel
+func setupMatrix82(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize82]byte) {
+ offset := 0
+ for c := inputOffset; c < inputOffset+dimIn; c++ {
+ for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ coeff := matrixRows[iRow][c]
+ copy(matrix[offset*32:], mulTableLow[coeff][:])
+ copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+ } else {
+ // coefficients not used for this input shard (so null out)
+ v := matrix[offset*32 : offset*32+32]
+ for i := range v {
+ v[i] = 0
+ }
+ }
+ offset += dimIn
+ if offset >= dimIn*dimOut82 {
+ offset -= dimIn*dimOut82 - 1
+ }
+ }
+ }
+}
+
+// Construct block of matrix coefficients for 4 output rows in parallel
+func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize84]byte) {
+ offset := 0
+ for c := inputOffset; c < inputOffset+dimIn; c++ {
+ for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ coeff := matrixRows[iRow][c]
+ copy(matrix[offset*32:], mulTableLow[coeff][:])
+ copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+ } else {
+ // coefficients not used for this input shard (so null out)
+ v := matrix[offset*32 : offset*32+32]
+ for i := range v {
+ v[i] = 0
+ }
+ }
+ offset += dimIn
+ if offset >= dimIn*dimOut84 {
+ offset -= dimIn*dimOut84 - 1
+ }
+ }
+ }
+}
+
+// Invoke AVX512 routine for single output row in parallel
+func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) {
+ done := stop - start
+ if done <= 0 {
+ return
+ }
+
+ inputEnd := inputOffset + dimIn
+ if inputEnd > len(in) {
+ inputEnd = len(in)
+ }
+ outputEnd := outputOffset + dimOut81
+ if outputEnd > len(out) {
+ outputEnd = len(out)
+ }
+
+ // We know the max size, alloc temp array.
+ var inTmp [dimIn][]byte
+ for i, v := range in[inputOffset:inputEnd] {
+ inTmp[i] = v[start:stop]
+ }
+ var outTmp [dimOut81][]byte
+ for i, v := range out[outputOffset:outputEnd] {
+ outTmp[i] = v[start:stop]
+ }
+
+ addTo := inputOffset != 0 // Except for the first input column, add to previous results
+ _galMulAVX512Parallel81(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix81, addTo)
+
+ done = start + ((done >> 6) << 6)
+ if done < stop {
+ galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
+ }
+}
+
+// Invoke AVX512 routine for 2 output rows in parallel
+func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) {
+ done := stop - start
+ if done <= 0 {
+ return
+ }
+
+ inputEnd := inputOffset + dimIn
+ if inputEnd > len(in) {
+ inputEnd = len(in)
+ }
+ outputEnd := outputOffset + dimOut82
+ if outputEnd > len(out) {
+ outputEnd = len(out)
+ }
+
+ // We know the max size, alloc temp array.
+ var inTmp [dimIn][]byte
+ for i, v := range in[inputOffset:inputEnd] {
+ inTmp[i] = v[start:stop]
+ }
+ var outTmp [dimOut82][]byte
+ for i, v := range out[outputOffset:outputEnd] {
+ outTmp[i] = v[start:stop]
+ }
+
+ addTo := inputOffset != 0 // Except for the first input column, add to previous results
+ _galMulAVX512Parallel82(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix82, addTo)
+
+ done = start + ((done >> 6) << 6)
+ if done < stop {
+ galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
+ }
+}
+
+// Invoke AVX512 routine for 4 output rows in parallel
+func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) {
+ done := stop - start
+ if done <= 0 {
+ return
+ }
+
+ inputEnd := inputOffset + dimIn
+ if inputEnd > len(in) {
+ inputEnd = len(in)
+ }
+ outputEnd := outputOffset + dimOut84
+ if outputEnd > len(out) {
+ outputEnd = len(out)
+ }
+
+ // We know the max size, alloc temp array.
+ var inTmp [dimIn][]byte
+ for i, v := range in[inputOffset:inputEnd] {
+ inTmp[i] = v[start:stop]
+ }
+ var outTmp [dimOut84][]byte
+ for i, v := range out[outputOffset:outputEnd] {
+ outTmp[i] = v[start:stop]
+ }
+
+ addTo := inputOffset != 0 // Except for the first input column, add to previous results
+ _galMulAVX512Parallel84(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix84, addTo)
+
+ done = start + ((done >> 6) << 6)
+ if done < stop {
+ galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
+ }
+}
+
+func galMulAVX512LastInput(inputOffset int, inputEnd int, outputOffset int, outputEnd int, matrixRows [][]byte, done int, stop int, out [][]byte, in [][]byte) {
+ for c := inputOffset; c < inputEnd; c++ {
+ for iRow := outputOffset; iRow < outputEnd; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ mt := mulTable[matrixRows[iRow][c]][:256]
+ for i := done; i < stop; i++ {
+ if c == 0 { // only set value for first input column
+ out[iRow][i] = mt[in[c][i]]
+ } else { // and add for all others
+ out[iRow][i] ^= mt[in[c][i]]
+ }
+ }
+ }
+ }
+ }
+}
+
+// Perform the same as codeSomeShards, but taking advantage of
+// AVX512 parallelism for up to 4x faster execution as compared to AVX2
+func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ // Process using no goroutines
+ start, end := 0, r.o.perRound
+ if end > byteCount {
+ end = byteCount
+ }
+ for start < byteCount {
+ matrix84 := [matrixSize84]byte{}
+ matrix82 := [matrixSize82]byte{}
+ matrix81 := [matrixSize81]byte{}
+
+ outputRow := 0
+ // First process (multiple) batches of 4 output rows in parallel
+ if outputRow+dimOut84 <= outputCount {
+ for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
+ galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix84)
+ }
+ }
+ }
+ // Then process a (single) batch of 2 output rows in parallel
+ if outputRow+dimOut82 <= outputCount {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
+ galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix82)
+ }
+ outputRow += dimOut82
+ }
+ // Lastly, we may have a single output row left (for uneven parity)
+ if outputRow < outputCount {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
+ galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix81)
+ }
+ }
+
+ start = end
+ end += r.o.perRound
+ if end > byteCount {
+ end = byteCount
+ }
+ }
+}
+
+// Perform the same as codeSomeShards, but taking advantage of
+// AVX512 parallelism for up to 4x faster execution as compared to AVX2
+func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+ wg.Add(1)
+ go func(grStart, grStop int) {
+ start, stop := grStart, grStart+r.o.perRound
+ if stop > grStop {
+ stop = grStop
+ }
+ // Loop for each round.
+ matrix84 := [matrixSize84]byte{}
+ matrix82 := [matrixSize82]byte{}
+ matrix81 := [matrixSize81]byte{}
+ for start < grStop {
+ outputRow := 0
+ // First process (multiple) batches of 4 output rows in parallel
+ if outputRow+dimOut84 <= outputCount {
+ // 1K matrix buffer
+ for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
+ galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix84)
+ }
+ }
+ }
+ // Then process a (single) batch of 2 output rows in parallel
+ if outputRow+dimOut82 <= outputCount {
+ // 512B matrix buffer
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
+ galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix82)
+ }
+ outputRow += dimOut82
+ }
+ // Lastly, we may have a single output row left (for uneven parity)
+ if outputRow < outputCount {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
+ galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix81)
+ }
+ }
+ start = stop
+ stop += r.o.perRound
+ if stop > grStop {
+ stop = grStop
+ }
+ }
+ wg.Done()
+ }(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
new file mode 100644
index 0000000..97ad420
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
@@ -0,0 +1,400 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2019, Minio, Inc.
+
+#define LOAD(OFFSET) \
+ MOVQ OFFSET(SI), BX \
+ VMOVDQU64 (BX)(R11*1), Z0 \
+ VPSRLQ $4, Z0, Z1 \ // high input
+ VPANDQ Z2, Z0, Z0 \ // low input
+ VPANDQ Z2, Z1, Z1 // high input
+
+#define GALOIS_MUL(MUL_LO, MUL_HI, LO, HI, OUT) \
+ VPSHUFB Z0, MUL_LO, LO \ // mul low part
+ VPSHUFB Z1, MUL_HI, HI \ // mul high part
+ VPTERNLOGD $0x96, LO, HI, OUT
+
+#define GALOIS(C1, C2, IN, LO, HI, OUT) \
+ VSHUFI64X2 $C1, IN, IN, LO \
+ VSHUFI64X2 $C2, IN, IN, HI \
+ GALOIS_MUL(LO, HI, LO, HI, OUT)
+
+//
+// Process single output row from a total of 8 input rows
+//
+// func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel81(SB), 7, $0
+ MOVQ in+0(FP), SI
+ MOVQ 8(SI), R9 // R9: len(in)
+ SHRQ $6, R9 // len(in) / 64
+ TESTQ R9, R9
+ JZ done_avx512_parallel81
+
+ MOVQ matrix+48(FP), SI
+ VMOVDQU64 0x000(SI), Z16
+ VMOVDQU64 0x040(SI), Z17
+ VMOVDQU64 0x080(SI), Z18
+ VMOVDQU64 0x0c0(SI), Z19
+
+ // Initialize multiplication constants
+ VSHUFI64X2 $0x55, Z16, Z16, Z20
+ VSHUFI64X2 $0xaa, Z16, Z16, Z24
+ VSHUFI64X2 $0xff, Z16, Z16, Z28
+ VSHUFI64X2 $0x00, Z16, Z16, Z16
+
+ VSHUFI64X2 $0x55, Z17, Z17, Z21
+ VSHUFI64X2 $0xaa, Z17, Z17, Z25
+ VSHUFI64X2 $0xff, Z17, Z17, Z29
+ VSHUFI64X2 $0x00, Z17, Z17, Z17
+
+ VSHUFI64X2 $0x55, Z18, Z18, Z22
+ VSHUFI64X2 $0xaa, Z18, Z18, Z26
+ VSHUFI64X2 $0xff, Z18, Z18, Z30
+ VSHUFI64X2 $0x00, Z18, Z18, Z18
+
+ VSHUFI64X2 $0x55, Z19, Z19, Z23
+ VSHUFI64X2 $0xaa, Z19, Z19, Z27
+ VSHUFI64X2 $0xff, Z19, Z19, Z31
+ VSHUFI64X2 $0x00, Z19, Z19, Z19
+
+ MOVQ $15, BX
+ VPBROADCASTB BX, Z2
+
+ MOVB addTo+56(FP), AX
+ IMULQ $-0x1, AX
+ KMOVQ AX, K1
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), AX // number of inputs
+ XORQ R11, R11
+ MOVQ out+24(FP), DX
+ MOVQ (DX), DX // DX: &out[0][0]
+
+loopback_avx512_parallel81:
+ VMOVDQU64.Z (DX), K1, Z4
+
+ LOAD(0x00) // &in[0][0]
+ GALOIS_MUL(Z16, Z20, Z14, Z15, Z4)
+
+ CMPQ AX, $1
+ JE skip_avx512_parallel81
+
+ LOAD(0x18) // &in[1][0]
+ GALOIS_MUL(Z24, Z28, Z14, Z15, Z4)
+
+ CMPQ AX, $2
+ JE skip_avx512_parallel81
+
+ LOAD(0x30) // &in[2][0]
+ GALOIS_MUL(Z17, Z21, Z14, Z15, Z4)
+
+ CMPQ AX, $3
+ JE skip_avx512_parallel81
+
+ LOAD(0x48) // &in[3][0]
+ GALOIS_MUL(Z25, Z29, Z14, Z15, Z4)
+
+ CMPQ AX, $4
+ JE skip_avx512_parallel81
+
+ LOAD(0x60) // &in[4][0]
+ GALOIS_MUL(Z18, Z22, Z14, Z15, Z4)
+
+ CMPQ AX, $5
+ JE skip_avx512_parallel81
+
+ LOAD(0x78) // &in[5][0]
+ GALOIS_MUL(Z26, Z30, Z14, Z15, Z4)
+
+ CMPQ AX, $6
+ JE skip_avx512_parallel81
+
+ LOAD(0x90) // &in[6][0]
+ GALOIS_MUL(Z19, Z23, Z14, Z15, Z4)
+
+ CMPQ AX, $7
+ JE skip_avx512_parallel81
+
+ LOAD(0xa8) // &in[7][0]
+ GALOIS_MUL(Z27, Z31, Z14, Z15, Z4)
+
+skip_avx512_parallel81:
+ VMOVDQU64 Z4, (DX)
+
+ ADDQ $64, R11 // in4+=64
+
+ ADDQ $64, DX // out+=64
+
+ SUBQ $1, R9
+ JNZ loopback_avx512_parallel81
+
+done_avx512_parallel81:
+ VZEROUPPER
+ RET
+
+//
+// Process 2 output rows in parallel from a total of 8 input rows
+//
+// func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel82(SB), 7, $0
+ MOVQ in+0(FP), SI
+ MOVQ 8(SI), R9 // R9: len(in)
+ SHRQ $6, R9 // len(in) / 64
+ TESTQ R9, R9
+ JZ done_avx512_parallel82
+
+ MOVQ matrix+48(FP), SI
+ VMOVDQU64 0x000(SI), Z16
+ VMOVDQU64 0x040(SI), Z17
+ VMOVDQU64 0x080(SI), Z18
+ VMOVDQU64 0x0c0(SI), Z19
+ VMOVDQU64 0x100(SI), Z20
+ VMOVDQU64 0x140(SI), Z21
+ VMOVDQU64 0x180(SI), Z22
+ VMOVDQU64 0x1c0(SI), Z23
+
+ // Initialize multiplication constants
+ VSHUFI64X2 $0x55, Z16, Z16, Z24
+ VSHUFI64X2 $0xaa, Z16, Z16, Z25
+ VSHUFI64X2 $0xff, Z16, Z16, Z26
+ VSHUFI64X2 $0x00, Z16, Z16, Z16
+
+ VSHUFI64X2 $0x55, Z20, Z20, Z27
+ VSHUFI64X2 $0xaa, Z20, Z20, Z28
+ VSHUFI64X2 $0xff, Z20, Z20, Z29
+ VSHUFI64X2 $0x00, Z20, Z20, Z20
+
+ VSHUFI64X2 $0x55, Z17, Z17, Z30
+ VSHUFI64X2 $0xaa, Z17, Z17, Z31
+ VSHUFI64X2 $0xff, Z17, Z17, Z11
+ VSHUFI64X2 $0x00, Z17, Z17, Z17
+
+ VSHUFI64X2 $0x55, Z21, Z21, Z8
+ VSHUFI64X2 $0xaa, Z21, Z21, Z9
+ VSHUFI64X2 $0xff, Z21, Z21, Z10
+ VSHUFI64X2 $0x00, Z21, Z21, Z21
+
+ MOVQ $15, BX
+ VPBROADCASTB BX, Z2
+
+ MOVB addTo+56(FP), AX
+ IMULQ $-0x1, AX
+ KMOVQ AX, K1
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), AX // number of inputs
+ XORQ R11, R11
+ MOVQ out+24(FP), DX
+ MOVQ 24(DX), CX // CX: &out[1][0]
+ MOVQ (DX), DX // DX: &out[0][0]
+
+loopback_avx512_parallel82:
+ VMOVDQU64.Z (DX), K1, Z4
+ VMOVDQU64.Z (CX), K1, Z5
+
+ LOAD(0x00) // &in[0][0]
+ GALOIS_MUL(Z16, Z24, Z14, Z15, Z4)
+ GALOIS_MUL(Z20, Z27, Z12, Z13, Z5)
+
+ CMPQ AX, $1
+ JE skip_avx512_parallel82
+
+ LOAD(0x18) // &in[1][0]
+ GALOIS_MUL(Z25, Z26, Z14, Z15, Z4)
+ GALOIS_MUL(Z28, Z29, Z12, Z13, Z5)
+
+ CMPQ AX, $2
+ JE skip_avx512_parallel82
+
+ LOAD(0x30) // &in[2][0]
+ GALOIS_MUL(Z17, Z30, Z14, Z15, Z4)
+ GALOIS_MUL(Z21, Z8, Z12, Z13, Z5)
+
+ CMPQ AX, $3
+ JE skip_avx512_parallel82
+
+ LOAD(0x48) // &in[3][0]
+ GALOIS_MUL(Z31, Z11, Z14, Z15, Z4)
+ GALOIS_MUL(Z9, Z10, Z12, Z13, Z5)
+
+ CMPQ AX, $4
+ JE skip_avx512_parallel82
+
+ LOAD(0x60) // &in[4][0]
+ GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
+
+ CMPQ AX, $5
+ JE skip_avx512_parallel82
+
+ LOAD(0x78) // &in[5][0]
+ GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
+
+ CMPQ AX, $6
+ JE skip_avx512_parallel82
+
+ LOAD(0x90) // &in[6][0]
+ GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
+
+ CMPQ AX, $7
+ JE skip_avx512_parallel82
+
+ LOAD(0xa8) // &in[7][0]
+ GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
+
+skip_avx512_parallel82:
+ VMOVDQU64 Z4, (DX)
+ VMOVDQU64 Z5, (CX)
+
+ ADDQ $64, R11 // in4+=64
+
+ ADDQ $64, DX // out+=64
+ ADDQ $64, CX // out2+=64
+
+ SUBQ $1, R9
+ JNZ loopback_avx512_parallel82
+
+done_avx512_parallel82:
+ VZEROUPPER
+ RET
+
+//
+// Process 4 output rows in parallel from a total of 8 input rows
+//
+// func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel84(SB), 7, $0
+ MOVQ in+0(FP), SI
+ MOVQ 8(SI), R9 // R9: len(in)
+ SHRQ $6, R9 // len(in) / 64
+ TESTQ R9, R9
+ JZ done_avx512_parallel84
+
+ MOVQ matrix+48(FP), SI
+ VMOVDQU64 0x000(SI), Z16
+ VMOVDQU64 0x040(SI), Z17
+ VMOVDQU64 0x080(SI), Z18
+ VMOVDQU64 0x0c0(SI), Z19
+ VMOVDQU64 0x100(SI), Z20
+ VMOVDQU64 0x140(SI), Z21
+ VMOVDQU64 0x180(SI), Z22
+ VMOVDQU64 0x1c0(SI), Z23
+ VMOVDQU64 0x200(SI), Z24
+ VMOVDQU64 0x240(SI), Z25
+ VMOVDQU64 0x280(SI), Z26
+ VMOVDQU64 0x2c0(SI), Z27
+ VMOVDQU64 0x300(SI), Z28
+ VMOVDQU64 0x340(SI), Z29
+ VMOVDQU64 0x380(SI), Z30
+ VMOVDQU64 0x3c0(SI), Z31
+
+ MOVQ $15, BX
+ VPBROADCASTB BX, Z2
+
+ MOVB addTo+56(FP), AX
+ IMULQ $-0x1, AX
+ KMOVQ AX, K1
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), AX // number of inputs
+ XORQ R11, R11
+ MOVQ out+24(FP), DX
+ MOVQ 24(DX), CX // CX: &out[1][0]
+ MOVQ 48(DX), R10 // R10: &out[2][0]
+ MOVQ 72(DX), R12 // R12: &out[3][0]
+ MOVQ (DX), DX // DX: &out[0][0]
+
+loopback_avx512_parallel84:
+ VMOVDQU64.Z (DX), K1, Z4
+ VMOVDQU64.Z (CX), K1, Z5
+ VMOVDQU64.Z (R10), K1, Z6
+ VMOVDQU64.Z (R12), K1, Z7
+
+ LOAD(0x00) // &in[0][0]
+ GALOIS(0x00, 0x55, Z16, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z20, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z24, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z28, Z8, Z9, Z7)
+
+ CMPQ AX, $1
+ JE skip_avx512_parallel84
+
+ LOAD(0x18) // &in[1][0]
+ GALOIS(0xaa, 0xff, Z16, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z20, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z24, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z28, Z8, Z9, Z7)
+
+ CMPQ AX, $2
+ JE skip_avx512_parallel84
+
+ LOAD(0x30) // &in[2][0]
+ GALOIS(0x00, 0x55, Z17, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z21, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z25, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z29, Z8, Z9, Z7)
+
+ CMPQ AX, $3
+ JE skip_avx512_parallel84
+
+ LOAD(0x48) // &in[3][0]
+ GALOIS(0xaa, 0xff, Z17, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z21, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z25, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z29, Z8, Z9, Z7)
+
+ CMPQ AX, $4
+ JE skip_avx512_parallel84
+
+ LOAD(0x60) // &in[4][0]
+ GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z26, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z30, Z8, Z9, Z7)
+
+ CMPQ AX, $5
+ JE skip_avx512_parallel84
+
+ LOAD(0x78) // &in[5][0]
+ GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z26, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z30, Z8, Z9, Z7)
+
+ CMPQ AX, $6
+ JE skip_avx512_parallel84
+
+ LOAD(0x90) // &in[6][0]
+ GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z27, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z31, Z8, Z9, Z7)
+
+ CMPQ AX, $7
+ JE skip_avx512_parallel84
+
+ LOAD(0xa8) // &in[7][0]
+ GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z27, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z31, Z8, Z9, Z7)
+
+skip_avx512_parallel84:
+ VMOVDQU64 Z4, (DX)
+ VMOVDQU64 Z5, (CX)
+ VMOVDQU64 Z6, (R10)
+ VMOVDQU64 Z7, (R12)
+
+ ADDQ $64, R11 // in4+=64
+
+ ADDQ $64, DX // out+=64
+ ADDQ $64, CX // out2+=64
+ ADDQ $64, R10 // out3+=64
+ ADDQ $64, R12 // out4+=64
+
+ SUBQ $1, R9
+ JNZ loopback_avx512_parallel84
+
+done_avx512_parallel84:
+ VZEROUPPER
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
new file mode 100644
index 0000000..f757f9d
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@@ -0,0 +1,138 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+//go:noescape
+func galMulSSSE3(low, high, in, out []byte)
+
+//go:noescape
+func galMulSSSE3Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice(in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor_64(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2_64(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice_64(in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulSSSE3(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] = low[l] ^ high[h]
+ }
+}
+
+func galMulSSSE3Xor(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] ^= low[l] ^ high[h]
+ }
+}
+*/
+
+// bigSwitchover is the size where 64 bytes are processed per loop.
+const bigSwitchover = 128
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ if o.useAVX2 {
+ if len(in) >= bigSwitchover {
+ galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) > 32 {
+ galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ out = out[done:]
+ }
+ } else if o.useSSSE3 {
+ galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mulTable[c][:256]
+ for i := range in {
+ out[i] = mt[in[i]]
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+
+ if o.useAVX2 {
+ if len(in) >= bigSwitchover {
+ galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) >= 32 {
+ galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ out = out[done:]
+ }
+ } else if o.useSSSE3 {
+ galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mulTable[c][:256]
+ for i := range in {
+ out[i] ^= mt[in[i]]
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ if o.useSSE2 {
+ if len(in) >= bigSwitchover {
+ sSE2XorSlice_64(in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) >= 16 {
+ sSE2XorSlice(in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ }
+ out = out[:len(in)]
+ for i := range in {
+ out[i] ^= in[i]
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
new file mode 100644
index 0000000..3501110
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
@@ -0,0 +1,368 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
+// and http://jerasure.org/jerasure/gf-complete/tree/master
+
+// func galMulSSSE3Xor(low, high, in, out []byte)
+TEXT ·galMulSSSE3Xor(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVOU (SI), X6 // X6 low
+ MOVOU (DX), X7 // X7: high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X8
+ PXOR X5, X5
+ MOVQ in+48(FP), SI // R11: &in
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+ MOVQ out+72(FP), DX // DX: &out
+ PSHUFB X5, X8 // X8: lomask (unpacked)
+ SHRQ $4, R9 // len(in) / 16
+ MOVQ SI, AX
+ MOVQ DX, BX
+ ANDQ $15, AX
+ ANDQ $15, BX
+ CMPQ R9, $0
+ JEQ done_xor
+ ORQ AX, BX
+ CMPQ BX, $0
+ JNZ loopback_xor
+
+loopback_xor_aligned:
+ MOVOA (SI), X0 // in[x]
+ MOVOA (DX), X4 // out[x]
+ MOVOA X0, X1 // in[x]
+ MOVOA X6, X2 // low copy
+ MOVOA X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ PXOR X4, X3 // X3: Result xor existing out
+ MOVOA X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_xor_aligned
+ JMP done_xor
+
+loopback_xor:
+ MOVOU (SI), X0 // in[x]
+ MOVOU (DX), X4 // out[x]
+ MOVOU X0, X1 // in[x]
+ MOVOU X6, X2 // low copy
+ MOVOU X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ PXOR X4, X3 // X3: Result xor existing out
+ MOVOU X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_xor
+
+done_xor:
+ RET
+
+// func galMulSSSE3(low, high, in, out []byte)
+TEXT ·galMulSSSE3(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVOU (SI), X6 // X6 low
+ MOVOU (DX), X7 // X7: high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X8
+ PXOR X5, X5
+ MOVQ in+48(FP), SI // R11: &in
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+ MOVQ out+72(FP), DX // DX: &out
+ PSHUFB X5, X8 // X8: lomask (unpacked)
+ MOVQ SI, AX
+ MOVQ DX, BX
+ SHRQ $4, R9 // len(in) / 16
+ ANDQ $15, AX
+ ANDQ $15, BX
+ CMPQ R9, $0
+ JEQ done
+ ORQ AX, BX
+ CMPQ BX, $0
+ JNZ loopback
+
+loopback_aligned:
+ MOVOA (SI), X0 // in[x]
+ MOVOA X0, X1 // in[x]
+ MOVOA X6, X2 // low copy
+ MOVOA X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ MOVOA X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_aligned
+ JMP done
+
+loopback:
+ MOVOU (SI), X0 // in[x]
+ MOVOU X0, X1 // in[x]
+ MOVOA X6, X2 // low copy
+ MOVOA X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ MOVOU X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback
+
+done:
+ RET
+
+// func galMulAVX2Xor(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $5, R9 // len(in) / 32
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_xor_avx2
+
+loopback_xor_avx2:
+ VMOVDQU (SI), Y0
+ VMOVDQU (DX), Y4
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y1, Y1 // Y1: high input
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPXOR Y3, Y2, Y3 // Y3: Result
+ VPXOR Y4, Y3, Y4 // Y4: Result
+ VMOVDQU Y4, (DX)
+
+ ADDQ $32, SI // in+=32
+ ADDQ $32, DX // out+=32
+ SUBQ $1, R9
+ JNZ loopback_xor_avx2
+
+done_xor_avx2:
+ VZEROUPPER
+ RET
+
+// func galMulAVX2(low, high, in, out []byte)
+TEXT ·galMulAVX2(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $5, R9 // len(in) / 32
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_avx2
+
+loopback_avx2:
+ VMOVDQU (SI), Y0
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y1, Y1 // Y1: high input
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPXOR Y3, Y2, Y4 // Y4: Result
+ VMOVDQU Y4, (DX)
+
+ ADDQ $32, SI // in+=32
+ ADDQ $32, DX // out+=32
+ SUBQ $1, R9
+ JNZ loopback_avx2
+
+done_avx2:
+ VZEROUPPER
+ RET
+
+// func sSE2XorSlice(in, out []byte)
+TEXT ·sSE2XorSlice(SB), 7, $0
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), R9 // R9: len(in)
+ MOVQ out+24(FP), DX // DX: &out
+ SHRQ $4, R9 // len(in) / 16
+ CMPQ R9, $0
+ JEQ done_xor_sse2
+
+loopback_xor_sse2:
+ MOVOU (SI), X0 // in[x]
+ MOVOU (DX), X1 // out[x]
+ PXOR X0, X1
+ MOVOU X1, (DX)
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_xor_sse2
+
+done_xor_sse2:
+ RET
+
+// func galMulAVX2Xor_64(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor_64(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $6, R9 // len(in) / 64
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_xor_avx2_64
+
+loopback_xor_avx2_64:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y10
+ VMOVDQU (DX), Y4
+ VMOVDQU 32(DX), Y14
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPSRLQ $4, Y10, Y11 // Y11: high input 2
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y10, Y10 // Y10: low input 2
+ VPAND Y8, Y1, Y1 // Y11: high input
+ VPAND Y8, Y11, Y11 // Y11: high input 2
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
+ VPXOR Y3, Y2, Y3 // Y3: Result
+ VPXOR Y13, Y12, Y13 // Y13: Result 2
+ VPXOR Y4, Y3, Y4 // Y4: Result
+ VPXOR Y14, Y13, Y14 // Y4: Result 2
+ VMOVDQU Y4, (DX)
+ VMOVDQU Y14, 32(DX)
+
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_xor_avx2_64
+
+done_xor_avx2_64:
+ VZEROUPPER
+ RET
+
+// func galMulAVX2_64(low, high, in, out []byte)
+TEXT ·galMulAVX2_64(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $6, R9 // len(in) / 64
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_avx2_64
+
+loopback_avx2_64:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y10
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPSRLQ $4, Y10, Y11 // Y11: high input 2
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y10, Y10 // Y10: low input
+ VPAND Y8, Y1, Y1 // Y1: high input
+ VPAND Y8, Y11, Y11 // Y11: high input 2
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
+ VPXOR Y3, Y2, Y4 // Y4: Result
+ VPXOR Y13, Y12, Y14 // Y14: Result 2
+ VMOVDQU Y4, (DX)
+ VMOVDQU Y14, 32(DX)
+
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_avx2_64
+
+done_avx2_64:
+ VZEROUPPER
+ RET
+
+// func sSE2XorSlice_64(in, out []byte)
+TEXT ·sSE2XorSlice_64(SB), 7, $0
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), R9 // R9: len(in)
+ MOVQ out+24(FP), DX // DX: &out
+ SHRQ $6, R9 // len(in) / 64
+ CMPQ R9, $0
+ JEQ done_xor_sse2_64
+
+loopback_xor_sse2_64:
+ MOVOU (SI), X0 // in[x]
+ MOVOU 16(SI), X2 // in[x]
+ MOVOU 32(SI), X4 // in[x]
+ MOVOU 48(SI), X6 // in[x]
+ MOVOU (DX), X1 // out[x]
+ MOVOU 16(DX), X3 // out[x]
+ MOVOU 32(DX), X5 // out[x]
+ MOVOU 48(DX), X7 // out[x]
+ PXOR X0, X1
+ PXOR X2, X3
+ PXOR X4, X5
+ PXOR X6, X7
+ MOVOU X1, (DX)
+ MOVOU X3, 16(DX)
+ MOVOU X5, 32(DX)
+ MOVOU X7, 48(DX)
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_xor_sse2_64
+
+done_xor_sse2_64:
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
new file mode 100644
index 0000000..23a1dd2
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
@@ -0,0 +1,67 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func galMulNEON(low, high, in, out []byte)
+
+//go:noescape
+func galMulXorNEON(low, high, in, out []byte)
+
+//go:noescape
+func galXorNEON(in, out []byte)
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ var done int
+ galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done = (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] = mt[in[i]]
+ }
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+ var done int
+ galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done = (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] ^= mt[in[i]]
+ }
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+
+ galXorNEON(in, out)
+ done := (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ for i := done; i < len(in); i++ {
+ out[i] ^= in[i]
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
new file mode 100644
index 0000000..d2cac2c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
@@ -0,0 +1,125 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+#define LOAD(LO1, LO2, HI1, HI2) \
+ VLD1.P 32(R1), [LO1.B16, LO2.B16] \
+ \
+ \ // Get low input and high input
+ VUSHR $4, LO1.B16, HI1.B16 \
+ VUSHR $4, LO2.B16, HI2.B16 \
+ VAND V8.B16, LO1.B16, LO1.B16 \
+ VAND V8.B16, LO2.B16, LO2.B16
+
+#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
+ \ // Mul low part and mul high part
+ VTBL V0.B16, [MUL_LO.B16], OUT1.B16 \
+ VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
+ VTBL V1.B16, [MUL_LO.B16], TMP1.B16 \
+ VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
+ \
+ \ // Combine results
+ VEOR OUT2.B16, OUT1.B16, OUT1.B16 \
+ VEOR TMP2.B16, TMP1.B16, OUT2.B16
+
+// func galMulNEON(low, high, in, out []byte)
+TEXT ·galMulNEON(SB), 7, $0
+ MOVD in_base+48(FP), R1
+ MOVD in_len+56(FP), R2 // length of message
+ MOVD out_base+72(FP), R5
+ SUBS $32, R2
+ BMI complete
+
+ MOVD low+0(FP), R10 // R10: &low
+ MOVD high+24(FP), R11 // R11: &high
+ VLD1 (R10), [V6.B16]
+ VLD1 (R11), [V7.B16]
+
+ //
+ // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
+ // WORD $0x4e010c68 // dup v8.16b, w3
+ //
+ MOVD $0x0f, R3
+ VMOV R3, V8.B[0]
+ VDUP V8.B[0], V8.B16
+
+loop:
+ // Main loop
+ LOAD(V0, V1, V10, V11)
+ GALOIS_MUL(V6, V7, V4, V5, V14, V15)
+
+ // Store result
+ VST1.P [V4.D2, V5.D2], 32(R5)
+
+ SUBS $32, R2
+ BPL loop
+
+complete:
+ RET
+
+// func galMulXorNEON(low, high, in, out []byte)
+TEXT ·galMulXorNEON(SB), 7, $0
+ MOVD in_base+48(FP), R1
+ MOVD in_len+56(FP), R2 // length of message
+ MOVD out_base+72(FP), R5
+ SUBS $32, R2
+ BMI completeXor
+
+ MOVD low+0(FP), R10 // R10: &low
+ MOVD high+24(FP), R11 // R11: &high
+ VLD1 (R10), [V6.B16]
+ VLD1 (R11), [V7.B16]
+
+ //
+ // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
+ // WORD $0x4e010c68 // dup v8.16b, w3
+ //
+ MOVD $0x0f, R3
+ VMOV R3, V8.B[0]
+ VDUP V8.B[0], V8.B16
+
+loopXor:
+ // Main loop
+ VLD1 (R5), [V20.B16, V21.B16]
+
+ LOAD(V0, V1, V10, V11)
+ GALOIS_MUL(V6, V7, V4, V5, V14, V15)
+
+ VEOR V20.B16, V4.B16, V4.B16
+ VEOR V21.B16, V5.B16, V5.B16
+
+ // Store result
+ VST1.P [V4.D2, V5.D2], 32(R5)
+
+ SUBS $32, R2
+ BPL loopXor
+
+completeXor:
+ RET
+
+// func galXorNEON(in, out []byte)
+TEXT ·galXorNEON(SB), 7, $0
+ MOVD in_base+0(FP), R1
+ MOVD in_len+8(FP), R2 // length of message
+ MOVD out_base+24(FP), R5
+ SUBS $32, R2
+ BMI completeXor
+
+loopXor:
+ // Main loop
+ VLD1.P 32(R1), [V0.B16, V1.B16]
+ VLD1 (R5), [V20.B16, V21.B16]
+
+ VEOR V20.B16, V0.B16, V4.B16
+ VEOR V21.B16, V1.B16, V5.B16
+
+ // Store result
+ VST1.P [V4.D2, V5.D2], 32(R5)
+
+ SUBS $32, R2
+ BPL loopXor
+
+completeXor:
+ RET
+
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
new file mode 100644
index 0000000..edd6376
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
@@ -0,0 +1,408 @@
+// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build !nogen
+// +build gc
+
+package reedsolomon
+
+// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x8 takes 10 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
new file mode 100644
index 0000000..c76db3c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
@@ -0,0 +1,18526 @@
+// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build !nogen
+// +build gc
+
+// func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), BX
+
+mulAvxTwo_1x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (CX)(BX*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y3, Y4, Y4
+ VPAND Y3, Y5, Y5
+ VPSHUFB Y4, Y1, Y4
+ VPSHUFB Y5, Y2, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(BX*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, BX
+ DECQ AX
+ JNZ mulAvxTwo_1x1_loop
+ VZEROUPPER
+
+mulAvxTwo_1x1_end:
+ RET
+
+// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x2(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), BP
+
+mulAvxTwo_1x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (CX)(BP*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VPSHUFB Y9, Y2, Y7
+ VPSHUFB Y10, Y3, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VPSHUFB Y9, Y4, Y7
+ VPSHUFB Y10, Y5, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(BP*1)
+ VMOVDQU Y1, (DX)(BP*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, BP
+ DECQ AX
+ JNZ mulAvxTwo_1x2_loop
+ VZEROUPPER
+
+mulAvxTwo_1x2_end:
+ RET
+
+// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x3(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X9
+ VPBROADCASTB X9, Y9
+ MOVQ start+72(FP), SI
+
+mulAvxTwo_1x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (CX)(SI*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VPSHUFB Y12, Y3, Y10
+ VPSHUFB Y13, Y4, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+ VPSHUFB Y12, Y5, Y10
+ VPSHUFB Y13, Y6, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y1, Y1
+ VPSHUFB Y12, Y7, Y10
+ VPSHUFB Y13, Y8, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(SI*1)
+ VMOVDQU Y1, (BP)(SI*1)
+ VMOVDQU Y2, (DX)(SI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, SI
+ DECQ AX
+ JNZ mulAvxTwo_1x3_loop
+ VZEROUPPER
+
+mulAvxTwo_1x3_end:
+ RET
+
+// func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), DI
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_1x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (DI)(R8*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R8*1)
+ VMOVDQU Y1, (BP)(R8*1)
+ VMOVDQU Y2, (SI)(R8*1)
+ VMOVDQU Y3, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_1x4_loop
+ VZEROUPPER
+
+mulAvxTwo_1x4_end:
+ RET
+
+// func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R8
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_1x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R8)(R9*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (BP)(R9*1)
+ VMOVDQU Y2, (SI)(R9*1)
+ VMOVDQU Y3, (DI)(R9*1)
+ VMOVDQU Y4, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_1x5_loop
+ VZEROUPPER
+
+mulAvxTwo_1x5_end:
+ RET
+
+// func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R9
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_1x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R9)(R10*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (SI)(R10*1)
+ VMOVDQU Y3, (DI)(R10*1)
+ VMOVDQU Y4, (R8)(R10*1)
+ VMOVDQU Y5, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_1x6_loop
+ VZEROUPPER
+
+mulAvxTwo_1x6_end:
+ RET
+
+// func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R10
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_1x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R10)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DI)(R11*1)
+ VMOVDQU Y4, (R8)(R11*1)
+ VMOVDQU Y5, (R9)(R11*1)
+ VMOVDQU Y6, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_1x7_loop
+ VZEROUPPER
+
+mulAvxTwo_1x7_end:
+ RET
+
+// func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R11
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_1x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R11)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (R8)(R12*1)
+ VMOVDQU Y5, (R9)(R12*1)
+ VMOVDQU Y6, (R10)(R12*1)
+ VMOVDQU Y7, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_1x8_loop
+ VZEROUPPER
+
+mulAvxTwo_1x8_end:
+ RET
+
+// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), CX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), BP
+
+mulAvxTwo_2x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(BP*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y1, Y6
+ VPSHUFB Y7, Y2, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (CX)(BP*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y3, Y6
+ VPSHUFB Y7, Y4, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(BP*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, BP
+ DECQ AX
+ JNZ mulAvxTwo_2x1_loop
+ VZEROUPPER
+
+mulAvxTwo_2x1_end:
+ RET
+
+// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x2(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 15 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BP
+ MOVQ 24(CX), CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X10
+ VPBROADCASTB X10, Y10
+ MOVQ start+72(FP), SI
+
+mulAvxTwo_2x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BP)(SI*1), Y13
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y2, Y11
+ VPSHUFB Y14, Y3, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y0, Y0
+ VPSHUFB Y13, Y4, Y11
+ VPSHUFB Y14, Y5, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (CX)(SI*1), Y13
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y6, Y11
+ VPSHUFB Y14, Y7, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y0, Y0
+ VPSHUFB Y13, Y8, Y11
+ VPSHUFB Y14, Y9, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(SI*1)
+ VMOVDQU Y1, (DX)(SI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, SI
+ DECQ AX
+ JNZ mulAvxTwo_2x2_loop
+ VZEROUPPER
+
+mulAvxTwo_2x2_end:
+ RET
+
+// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), SI
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_2x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R8*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI)(R8*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R8*1)
+ VMOVDQU Y1, (BP)(R8*1)
+ VMOVDQU Y2, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_2x3_loop
+ VZEROUPPER
+
+mulAvxTwo_2x3_end:
+ RET
+
+// func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 25 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_2x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R9*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (DI)(R9*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (BP)(R9*1)
+ VMOVDQU Y2, (SI)(R9*1)
+ VMOVDQU Y3, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_2x4_loop
+ VZEROUPPER
+
+mulAvxTwo_2x4_end:
+ RET
+
+// func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_2x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R10*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R8)(R10*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (SI)(R10*1)
+ VMOVDQU Y3, (DI)(R10*1)
+ VMOVDQU Y4, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_2x5_loop
+ VZEROUPPER
+
+mulAvxTwo_2x5_end:
+ RET
+
+// func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_2x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R9)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DI)(R11*1)
+ VMOVDQU Y4, (R8)(R11*1)
+ VMOVDQU Y5, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_2x6_loop
+ VZEROUPPER
+
+mulAvxTwo_2x6_end:
+ RET
+
+// func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_2x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R10)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (R8)(R12*1)
+ VMOVDQU Y5, (R9)(R12*1)
+ VMOVDQU Y6, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_2x7_loop
+ VZEROUPPER
+
+mulAvxTwo_2x7_end:
+ RET
+
+// func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 45 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_2x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R12)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (R11)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (R8)(R13*1)
+ VMOVDQU Y5, (R9)(R13*1)
+ VMOVDQU Y6, (R10)(R13*1)
+ VMOVDQU Y7, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_2x8_loop
+ VZEROUPPER
+
+mulAvxTwo_2x8_end:
+ RET
+
+// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), SI
+
+mulAvxTwo_3x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(SI*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y1, Y8
+ VPSHUFB Y9, Y2, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(SI*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y3, Y8
+ VPSHUFB Y9, Y4, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (CX)(SI*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y5, Y8
+ VPSHUFB Y9, Y6, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(SI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, SI
+ DECQ AX
+ JNZ mulAvxTwo_3x1_loop
+ VZEROUPPER
+
+mulAvxTwo_3x1_end:
+ RET
+
+// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 19 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), BP
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_3x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R8*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R8*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (BP)(R8*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R8*1)
+ VMOVDQU Y1, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_3x2_loop
+ VZEROUPPER
+
+mulAvxTwo_3x2_end:
+ RET
+
+// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_3x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R9*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R9*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (SI)(R9*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (BP)(R9*1)
+ VMOVDQU Y2, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_3x3_loop
+ VZEROUPPER
+
+mulAvxTwo_3x3_end:
+ RET
+
+// func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 33 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_3x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R10*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R10*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI)(R10*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (SI)(R10*1)
+ VMOVDQU Y3, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_3x4_loop
+ VZEROUPPER
+
+mulAvxTwo_3x4_end:
+ RET
+
+// func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_3x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R11*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R11*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R8)(R11*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DI)(R11*1)
+ VMOVDQU Y4, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_3x5_loop
+ VZEROUPPER
+
+mulAvxTwo_3x5_end:
+ RET
+
+// func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_3x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R9)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (R8)(R12*1)
+ VMOVDQU Y5, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_3x6_loop
+ VZEROUPPER
+
+mulAvxTwo_3x6_end:
+ RET
+
+// func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 54 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_3x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R12)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (R10)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (R8)(R13*1)
+ VMOVDQU Y5, (R9)(R13*1)
+ VMOVDQU Y6, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_3x7_loop
+ VZEROUPPER
+
+mulAvxTwo_3x7_end:
+ RET
+
+// func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 61 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_3x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R12)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (R13)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (R11)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (R8)(R14*1)
+ VMOVDQU Y5, (R9)(R14*1)
+ VMOVDQU Y6, (R10)(R14*1)
+ VMOVDQU Y7, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_3x8_loop
+ VZEROUPPER
+
+mulAvxTwo_3x8_end:
+ RET
+
+// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X9
+ VPBROADCASTB X9, Y9
+ MOVQ start+72(FP), DI
+
+mulAvxTwo_4x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y1, Y10
+ VPSHUFB Y11, Y2, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y3, Y10
+ VPSHUFB Y11, Y4, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y5, Y10
+ VPSHUFB Y11, Y6, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (CX)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y7, Y10
+ VPSHUFB Y11, Y8, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(DI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, DI
+ DECQ AX
+ JNZ mulAvxTwo_4x1_loop
+ VZEROUPPER
+
+mulAvxTwo_4x1_end:
+ RET
+
+// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), BP
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_4x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (BP)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_4x2_loop
+ VZEROUPPER
+
+mulAvxTwo_4x2_end:
+ RET
+
+// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), SI
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_4x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (SI)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_4x3_loop
+ VZEROUPPER
+
+mulAvxTwo_4x3_end:
+ RET
+
+// func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 41 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), DI
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_4x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (DI)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_4x4_loop
+ VZEROUPPER
+
+mulAvxTwo_4x4_end:
+ RET
+
+// func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R8
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_4x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_4x5_loop
+ VZEROUPPER
+
+mulAvxTwo_4x5_end:
+ RET
+
+// func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 59 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R9
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_4x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R12)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R9)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (R8)(R13*1)
+ VMOVDQU Y5, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_4x6_loop
+ VZEROUPPER
+
+mulAvxTwo_4x6_end:
+ RET
+
+// func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R10
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_4x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R12)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (R13)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R10)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (R8)(R14*1)
+ VMOVDQU Y5, (R9)(R14*1)
+ VMOVDQU Y6, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_4x7_loop
+ VZEROUPPER
+
+mulAvxTwo_4x7_end:
+ RET
+
+// func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 77 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R11
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_4x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R12)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (R13)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (R14)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R11)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (R8)(R15*1)
+ VMOVDQU Y5, (R9)(R15*1)
+ VMOVDQU Y6, (R10)(R15*1)
+ VMOVDQU Y7, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_4x8_loop
+ VZEROUPPER
+
+mulAvxTwo_4x8_end:
+ RET
+
+// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X11
+ VPBROADCASTB X11, Y11
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_5x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y1, Y12
+ VPSHUFB Y13, Y2, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y3, Y12
+ VPSHUFB Y13, Y4, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y5, Y12
+ VPSHUFB Y13, Y6, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y7, Y12
+ VPSHUFB Y13, Y8, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (CX)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y9, Y12
+ VPSHUFB Y13, Y10, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_5x1_loop
+ VZEROUPPER
+
+mulAvxTwo_5x1_end:
+ RET
+
+// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 27 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), BP
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_5x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (BP)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_5x2_loop
+ VZEROUPPER
+
+mulAvxTwo_5x2_end:
+ RET
+
+// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), SI
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_5x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (SI)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_5x3_loop
+ VZEROUPPER
+
+mulAvxTwo_5x3_end:
+ RET
+
+// func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 49 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), DI
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_5x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (DI)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_5x4_loop
+ VZEROUPPER
+
+mulAvxTwo_5x4_end:
+ RET
+
+// func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 60 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R8
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_5x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R12)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R8)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_5x5_loop
+ VZEROUPPER
+
+mulAvxTwo_5x5_end:
+ RET
+
+// func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 71 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R9
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_5x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R12)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R13)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (R8)(R14*1)
+ VMOVDQU Y5, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_5x6_loop
+ VZEROUPPER
+
+mulAvxTwo_5x6_end:
+ RET
+
+// func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R10
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_5x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R12)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (R13)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R14)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R10)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (R8)(R15*1)
+ VMOVDQU Y5, (R9)(R15*1)
+ VMOVDQU Y6, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_5x7_loop
+ VZEROUPPER
+
+mulAvxTwo_5x7_end:
+ RET
+
+// func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 93 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), BX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_5x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (BX)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R10
+ VMOVDQU Y0, (R10)(R9*1)
+ MOVQ 24(DX), R10
+ VMOVDQU Y1, (R10)(R9*1)
+ MOVQ 48(DX), R10
+ VMOVDQU Y2, (R10)(R9*1)
+ MOVQ 72(DX), R10
+ VMOVDQU Y3, (R10)(R9*1)
+ MOVQ 96(DX), R10
+ VMOVDQU Y4, (R10)(R9*1)
+ MOVQ 120(DX), R10
+ VMOVDQU Y5, (R10)(R9*1)
+ MOVQ 144(DX), R10
+ VMOVDQU Y6, (R10)(R9*1)
+ MOVQ 168(DX), R10
+ VMOVDQU Y7, (R10)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_5x8_loop
+ VZEROUPPER
+
+mulAvxTwo_5x8_end:
+ RET
+
+// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X13
+ VPBROADCASTB X13, Y13
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_6x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y1, Y14
+ VPSHUFB Y15, Y2, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y3, Y14
+ VPSHUFB Y15, Y4, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y5, Y14
+ VPSHUFB Y15, Y6, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y7, Y14
+ VPSHUFB Y15, Y8, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R8)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y9, Y14
+ VPSHUFB Y15, Y10, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (CX)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y11, Y14
+ VPSHUFB Y15, Y12, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_6x1_loop
+ VZEROUPPER
+
+mulAvxTwo_6x1_end:
+ RET
+
+// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 31 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), BP
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_6x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (BP)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x2_loop
+ VZEROUPPER
+
+mulAvxTwo_6x2_end:
+ RET
+
+// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), SI
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_6x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (SI)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_6x3_loop
+ VZEROUPPER
+
+mulAvxTwo_6x3_end:
+ RET
+
+// func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), DI
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_6x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R12)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (DI)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_6x4_loop
+ VZEROUPPER
+
+mulAvxTwo_6x4_end:
+ RET
+
+// func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 70 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R8
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_6x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R12)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R13)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R8)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_6x5_loop
+ VZEROUPPER
+
+mulAvxTwo_6x5_end:
+ RET
+
+// func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 83 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R9
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_6x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R12)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R13)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R14)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R9)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (R8)(R15*1)
+ VMOVDQU Y5, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_6x6_loop
+ VZEROUPPER
+
+mulAvxTwo_6x6_end:
+ RET
+
+// func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 96 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), BX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_6x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (BX)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(DX), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(DX), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(DX), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(DX), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(DX), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(DX), R11
+ VMOVDQU Y6, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_6x7_loop
+ VZEROUPPER
+
+mulAvxTwo_6x7_end:
+ RET
+
+// func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 109 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), BX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_6x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (BX)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(DX), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(DX), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(DX), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(DX), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(DX), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(DX), R11
+ VMOVDQU Y6, (R11)(R10*1)
+ MOVQ 168(DX), R11
+ VMOVDQU Y7, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_6x8_loop
+ VZEROUPPER
+
+mulAvxTwo_6x8_end:
+ RET
+
+// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (BX)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x1_loop
+ VZEROUPPER
+
+mulAvxTwo_7x1_end:
+ RET
+
+// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), BP
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_7x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (BP)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x2_loop
+ VZEROUPPER
+
+mulAvxTwo_7x2_end:
+ RET
+
+// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), SI
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_7x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R12)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (SI)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_7x3_loop
+ VZEROUPPER
+
+mulAvxTwo_7x3_end:
+ RET
+
+// func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), DI
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_7x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R12)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R13)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (DI)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_7x4_loop
+ VZEROUPPER
+
+mulAvxTwo_7x4_end:
+ RET
+
+// func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 80 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R8
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_7x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R12)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R13)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R14)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R8)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_7x5_loop
+ VZEROUPPER
+
+mulAvxTwo_7x5_end:
+ RET
+
+// func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 95 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (BX)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(DX), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(DX), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(DX), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(DX), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(DX), R12
+ VMOVDQU Y5, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x6_loop
+ VZEROUPPER
+
+mulAvxTwo_7x6_end:
+ RET
+
+// func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (BX)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(DX), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(DX), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(DX), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(DX), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(DX), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(DX), R12
+ VMOVDQU Y6, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x7_loop
+ VZEROUPPER
+
+mulAvxTwo_7x7_end:
+ RET
+
+// func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 125 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (BX)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(DX), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(DX), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(DX), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(DX), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(DX), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(DX), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(DX), R12
+ VMOVDQU Y7, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x8_loop
+ VZEROUPPER
+
+mulAvxTwo_7x8_end:
+ RET
+
+// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (BX)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x1_loop
+ VZEROUPPER
+
+mulAvxTwo_8x1_end:
+ RET
+
+// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 39 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), R12
+ MOVQ 168(BP), BP
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_8x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R12)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (BP)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x2_loop
+ VZEROUPPER
+
+mulAvxTwo_8x2_end:
+ RET
+
+// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), SI
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_8x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R12)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R13)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (SI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_8x3_loop
+ VZEROUPPER
+
+mulAvxTwo_8x3_end:
+ RET
+
+// func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 73 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), DI
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_8x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R12)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R13)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R14)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (DI)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_8x4_loop
+ VZEROUPPER
+
+mulAvxTwo_8x4_end:
+ RET
+
+// func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 90 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BP)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (BX)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x5_loop
+ VZEROUPPER
+
+mulAvxTwo_8x5_end:
+ RET
+
+// func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 107 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (BX)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(DX), R13
+ VMOVDQU Y5, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x6_loop
+ VZEROUPPER
+
+mulAvxTwo_8x6_end:
+ RET
+
+// func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 124 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (BX)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(DX), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(DX), R13
+ VMOVDQU Y6, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x7_loop
+ VZEROUPPER
+
+mulAvxTwo_8x7_end:
+ RET
+
+// func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 141 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (BX)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(DX), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(DX), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(DX), R13
+ VMOVDQU Y7, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x8_loop
+ VZEROUPPER
+
+mulAvxTwo_8x8_end:
+ RET
+
+// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (BX)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x1_loop
+ VZEROUPPER
+
+mulAvxTwo_9x1_end:
+ RET
+
+// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 43 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), R12
+ MOVQ 168(BP), R13
+ MOVQ 192(BP), BP
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_9x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R12)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R13)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (BP)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x2_loop
+ VZEROUPPER
+
+mulAvxTwo_9x2_end:
+ RET
+
+// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), SI
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_9x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R12)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R13)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R14)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (SI)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_9x3_loop
+ VZEROUPPER
+
+mulAvxTwo_9x3_end:
+ RET
+
+// func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 81 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BP)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R12)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (BX)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x4_loop
+ VZEROUPPER
+
+mulAvxTwo_9x4_end:
+ RET
+
+// func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 100 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BP)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (BX)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x5_loop
+ VZEROUPPER
+
+mulAvxTwo_9x5_end:
+ RET
+
+// func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 119 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (BX)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(DX), R14
+ VMOVDQU Y5, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x6_loop
+ VZEROUPPER
+
+mulAvxTwo_9x6_end:
+ RET
+
+// func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 138 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (BX)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(DX), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(DX), R14
+ VMOVDQU Y6, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x7_loop
+ VZEROUPPER
+
+mulAvxTwo_9x7_end:
+ RET
+
+// func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 157 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (BX)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(DX), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(DX), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(DX), R14
+ VMOVDQU Y7, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x8_loop
+ VZEROUPPER
+
+mulAvxTwo_9x8_end:
+ RET
+
+// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 24 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (R13)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 9 to 1 outputs
+ VMOVDQU (BX)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 576(CX), Y2
+ VMOVDQU 608(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x1_loop
+ VZEROUPPER
+
+mulAvxTwo_10x1_end:
+ RET
+
+// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), R12
+ MOVQ 168(BP), R13
+ MOVQ 192(BP), R14
+ MOVQ 216(BP), BP
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_10x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R12)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R13)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (R14)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 9 to 2 outputs
+ VMOVDQU (BP)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1152(CX), Y3
+ VMOVDQU 1184(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 1216(CX), Y3
+ VMOVDQU 1248(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x2_loop
+ VZEROUPPER
+
+mulAvxTwo_10x2_end:
+ RET
+
+// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BP)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R11)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R12)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (R13)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 9 to 3 outputs
+ VMOVDQU (BX)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1728(CX), Y4
+ VMOVDQU 1760(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1792(CX), Y4
+ VMOVDQU 1824(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1856(CX), Y4
+ VMOVDQU 1888(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x3_loop
+ VZEROUPPER
+
+mulAvxTwo_10x3_end:
+ RET
+
+// func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 89 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BP)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R12)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (R13)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 9 to 4 outputs
+ VMOVDQU (BX)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2304(CX), Y5
+ VMOVDQU 2336(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 2368(CX), Y5
+ VMOVDQU 2400(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 2432(CX), Y5
+ VMOVDQU 2464(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 2496(CX), Y5
+ VMOVDQU 2528(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x4_loop
+ VZEROUPPER
+
+mulAvxTwo_10x4_end:
+ RET
+
+// func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BP)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (R13)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 9 to 5 outputs
+ VMOVDQU (BX)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2880(CX), Y6
+ VMOVDQU 2912(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2944(CX), Y6
+ VMOVDQU 2976(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 3008(CX), Y6
+ VMOVDQU 3040(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 3072(CX), Y6
+ VMOVDQU 3104(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 3136(CX), Y6
+ VMOVDQU 3168(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x5_loop
+ VZEROUPPER
+
+mulAvxTwo_10x5_end:
+ RET
+
+// func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 131 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (R13)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 9 to 6 outputs
+ VMOVDQU (BX)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3456(CX), Y7
+ VMOVDQU 3488(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 3520(CX), Y7
+ VMOVDQU 3552(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 3584(CX), Y7
+ VMOVDQU 3616(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 3648(CX), Y7
+ VMOVDQU 3680(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 3712(CX), Y7
+ VMOVDQU 3744(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3776(CX), Y7
+ VMOVDQU 3808(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(DX), R15
+ VMOVDQU Y5, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x6_loop
+ VZEROUPPER
+
+mulAvxTwo_10x6_end:
+ RET
+
+// func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 152 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (R13)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 9 to 7 outputs
+ VMOVDQU (BX)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 4032(CX), Y8
+ VMOVDQU 4064(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 4096(CX), Y8
+ VMOVDQU 4128(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 4160(CX), Y8
+ VMOVDQU 4192(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 4224(CX), Y8
+ VMOVDQU 4256(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 4288(CX), Y8
+ VMOVDQU 4320(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 4352(CX), Y8
+ VMOVDQU 4384(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 4416(CX), Y8
+ VMOVDQU 4448(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(DX), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(DX), R15
+ VMOVDQU Y6, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x7_loop
+ VZEROUPPER
+
+mulAvxTwo_10x7_end:
+ RET
+
+// func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 173 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (R13)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 9 to 8 outputs
+ VMOVDQU (BX)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4608(CX), Y9
+ VMOVDQU 4640(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 4672(CX), Y9
+ VMOVDQU 4704(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 4736(CX), Y9
+ VMOVDQU 4768(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 4800(CX), Y9
+ VMOVDQU 4832(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 4864(CX), Y9
+ VMOVDQU 4896(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 4928(CX), Y9
+ VMOVDQU 4960(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 4992(CX), Y9
+ VMOVDQU 5024(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 5056(CX), Y9
+ VMOVDQU 5088(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(DX), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(DX), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(DX), R15
+ VMOVDQU Y7, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x8_loop
+ VZEROUPPER
+
+mulAvxTwo_10x8_end:
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
new file mode 100644
index 0000000..b4917bc
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
@@ -0,0 +1,11 @@
+//+build !amd64 noasm appengine gccgo nogen
+
+package reedsolomon
+
+const maxAvx2Inputs = 0
+const maxAvx2Outputs = 0
+const avx2CodeGen = false
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+ panic("avx2 codegen not available")
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
new file mode 100644
index 0000000..0b49a1e
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
@@ -0,0 +1,293 @@
+// Code generated by command: go generate gen.go. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build gc
+// +build !nogen
+
+package reedsolomon
+
+import "fmt"
+
+const avx2CodeGen = true
+const maxAvx2Inputs = 10
+const maxAvx2Outputs = 8
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+ n := stop - start
+ n = (n >> 5) << 5
+
+ switch len(in) {
+ case 1:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_1x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_1x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_1x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_1x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_1x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_1x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_1x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_1x8(matrix, in, out, start, n)
+ return n
+ }
+ case 2:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_2x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_2x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_2x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_2x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_2x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_2x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_2x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_2x8(matrix, in, out, start, n)
+ return n
+ }
+ case 3:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_3x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_3x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_3x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_3x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_3x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_3x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_3x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_3x8(matrix, in, out, start, n)
+ return n
+ }
+ case 4:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_4x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_4x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_4x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_4x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_4x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_4x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_4x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_4x8(matrix, in, out, start, n)
+ return n
+ }
+ case 5:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_5x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_5x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_5x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_5x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_5x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_5x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_5x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_5x8(matrix, in, out, start, n)
+ return n
+ }
+ case 6:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_6x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_6x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_6x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_6x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_6x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_6x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_6x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_6x8(matrix, in, out, start, n)
+ return n
+ }
+ case 7:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_7x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_7x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_7x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_7x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_7x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_7x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_7x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_7x8(matrix, in, out, start, n)
+ return n
+ }
+ case 8:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_8x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_8x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_8x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_8x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_8x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_8x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_8x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_8x8(matrix, in, out, start, n)
+ return n
+ }
+ case 9:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_9x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_9x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_9x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_9x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_9x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_9x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_9x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_9x8(matrix, in, out, start, n)
+ return n
+ }
+ case 10:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_10x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_10x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_10x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_10x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_10x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_10x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_10x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_10x8(matrix, in, out, start, n)
+ return n
+ }
+ }
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
new file mode 100644
index 0000000..1d00e06
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
@@ -0,0 +1,44 @@
+//+build !amd64 noasm appengine gccgo
+//+build !arm64 noasm appengine gccgo
+//+build !ppc64le noasm appengine gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ out = out[:len(in)]
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ mt := mulTable[c][:256]
+ for n, input := range in {
+ out[n] = mt[input]
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ out = out[:len(in)]
+ if c == 1 {
+ for n, input := range in {
+ out[n] ^= input
+ }
+ return
+ }
+ mt := mulTable[c][:256]
+ for n, input := range in {
+ out[n] ^= mt[input]
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ for n, input := range in {
+ out[n] ^= input
+ }
+}
+
+func init() {
+ defaultOptions.useAVX512 = false
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
new file mode 100644
index 0000000..bd15e3a
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
@@ -0,0 +1,13 @@
+//+build !amd64 noasm appengine gccgo
+
+// Copyright 2020, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ panic("codeSomeShardsAvx512 should not be called if built without asm")
+}
+
+func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ panic("codeSomeShardsAvx512P should not be called if built without asm")
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
new file mode 100644
index 0000000..70f93d6
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
@@ -0,0 +1,75 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func galMulPpc(low, high, in, out []byte)
+
+//go:noescape
+func galMulPpcXor(low, high, in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulPpc(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] = low[l] ^ high[h]
+ }
+}
+func galMulPpcXor(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] ^= low[l] ^ high[h]
+ }
+}
+*/
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ done := (len(in) >> 4) << 4
+ if done > 0 {
+ galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+ }
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] = mt[in[i]]
+ }
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+ done := (len(in) >> 4) << 4
+ if done > 0 {
+ galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+ }
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] ^= mt[in[i]]
+ }
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ for n, input := range in {
+ out[n] ^= input
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
new file mode 100644
index 0000000..8838f0c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
@@ -0,0 +1,124 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+#include "textflag.h"
+
+#define LOW R3
+#define HIGH R4
+#define IN R5
+#define LEN R6
+#define OUT R7
+#define CONSTANTS R8
+#define OFFSET R9
+#define OFFSET1 R10
+#define OFFSET2 R11
+
+#define X6 VS34
+#define X6_ V2
+#define X7 VS35
+#define X7_ V3
+#define MSG VS36
+#define MSG_ V4
+#define MSG_HI VS37
+#define MSG_HI_ V5
+#define RESULT VS38
+#define RESULT_ V6
+#define ROTATE VS39
+#define ROTATE_ V7
+#define MASK VS40
+#define MASK_ V8
+#define FLIP VS41
+#define FLIP_ V9
+
+// func galMulPpc(low, high, in, out []byte)
+TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
+ MOVD low+0(FP), LOW
+ MOVD high+24(FP), HIGH
+ MOVD in+48(FP), IN
+ MOVD in_len+56(FP), LEN
+ MOVD out+72(FP), OUT
+
+ MOVD $16, OFFSET1
+ MOVD $32, OFFSET2
+
+ MOVD $·constants(SB), CONSTANTS
+ LXVD2X (CONSTANTS)(R0), ROTATE
+ LXVD2X (CONSTANTS)(OFFSET1), MASK
+ LXVD2X (CONSTANTS)(OFFSET2), FLIP
+
+ LXVD2X (LOW)(R0), X6
+ LXVD2X (HIGH)(R0), X7
+ VPERM X6_, V31, FLIP_, X6_
+ VPERM X7_, V31, FLIP_, X7_
+
+ MOVD $0, OFFSET
+
+loop:
+ LXVD2X (IN)(OFFSET), MSG
+
+ VSRB MSG_, ROTATE_, MSG_HI_
+ VAND MSG_, MASK_, MSG_
+ VPERM X6_, V31, MSG_, MSG_
+ VPERM X7_, V31, MSG_HI_, MSG_HI_
+
+ VXOR MSG_, MSG_HI_, MSG_
+
+ STXVD2X MSG, (OUT)(OFFSET)
+
+ ADD $16, OFFSET, OFFSET
+ CMP LEN, OFFSET
+ BGT loop
+ RET
+
+// func galMulPpcXorlow, high, in, out []byte)
+TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
+ MOVD low+0(FP), LOW
+ MOVD high+24(FP), HIGH
+ MOVD in+48(FP), IN
+ MOVD in_len+56(FP), LEN
+ MOVD out+72(FP), OUT
+
+ MOVD $16, OFFSET1
+ MOVD $32, OFFSET2
+
+ MOVD $·constants(SB), CONSTANTS
+ LXVD2X (CONSTANTS)(R0), ROTATE
+ LXVD2X (CONSTANTS)(OFFSET1), MASK
+ LXVD2X (CONSTANTS)(OFFSET2), FLIP
+
+ LXVD2X (LOW)(R0), X6
+ LXVD2X (HIGH)(R0), X7
+ VPERM X6_, V31, FLIP_, X6_
+ VPERM X7_, V31, FLIP_, X7_
+
+ MOVD $0, OFFSET
+
+loopXor:
+ LXVD2X (IN)(OFFSET), MSG
+ LXVD2X (OUT)(OFFSET), RESULT
+
+ VSRB MSG_, ROTATE_, MSG_HI_
+ VAND MSG_, MASK_, MSG_
+ VPERM X6_, V31, MSG_, MSG_
+ VPERM X7_, V31, MSG_HI_, MSG_HI_
+
+ VXOR MSG_, MSG_HI_, MSG_
+ VXOR MSG_, RESULT_, RESULT_
+
+ STXVD2X RESULT, (OUT)(OFFSET)
+
+ ADD $16, OFFSET, OFFSET
+ CMP LEN, OFFSET
+ BGT loopXor
+ RET
+
+DATA ·constants+0x0(SB)/8, $0x0404040404040404
+DATA ·constants+0x8(SB)/8, $0x0404040404040404
+DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x20(SB)/8, $0x0706050403020100
+DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+
+GLOBL ·constants(SB), 8, $48
diff --git a/vendor/github.com/klauspost/reedsolomon/gen.go b/vendor/github.com/klauspost/reedsolomon/gen.go
new file mode 100644
index 0000000..6fc545c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/gen.go
@@ -0,0 +1,249 @@
+//+build generate
+
+//go:generate go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go
+//go:generate gofmt -w galois_gen_switch_amd64.go
+
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+
+ . "github.com/mmcloughlin/avo/build"
+ "github.com/mmcloughlin/avo/buildtags"
+ . "github.com/mmcloughlin/avo/operand"
+ "github.com/mmcloughlin/avo/reg"
+)
+
+// Technically we can do slightly bigger, but we stay reasonable.
+const inputMax = 10
+const outputMax = 8
+
+var switchDefs [inputMax][outputMax]string
+var switchDefsX [inputMax][outputMax]string
+
+const perLoopBits = 5
+const perLoop = 1 << perLoopBits
+
+func main() {
+ Constraint(buildtags.Not("appengine").ToConstraint())
+ Constraint(buildtags.Not("noasm").ToConstraint())
+ Constraint(buildtags.Not("nogen").ToConstraint())
+ Constraint(buildtags.Term("gc").ToConstraint())
+
+ for i := 1; i <= inputMax; i++ {
+ for j := 1; j <= outputMax; j++ {
+ //genMulAvx2(fmt.Sprintf("mulAvxTwoXor_%dx%d", i, j), i, j, true)
+ genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false)
+ }
+ }
+ f, err := os.Create("galois_gen_switch_amd64.go")
+ if err != nil {
+ panic(err)
+ }
+ defer f.Close()
+ w := bufio.NewWriter(f)
+ defer w.Flush()
+ w.WriteString(`// Code generated by command: go generate ` + os.Getenv("GOFILE") + `. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build gc
+// +build !nogen
+
+package reedsolomon
+
+import "fmt"
+
+`)
+
+ w.WriteString("const avx2CodeGen = true\n")
+ w.WriteString(fmt.Sprintf("const maxAvx2Inputs = %d\nconst maxAvx2Outputs = %d\n", inputMax, outputMax))
+ w.WriteString(`
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+ n := stop-start
+`)
+
+ w.WriteString(fmt.Sprintf("n = (n>>%d)<<%d\n\n", perLoopBits, perLoopBits))
+ w.WriteString(`switch len(in) {
+`)
+ for in, defs := range switchDefs[:] {
+ w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
+ for out, def := range defs[:] {
+ w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
+ w.WriteString(def)
+ }
+ w.WriteString("}\n")
+ }
+ w.WriteString(`}
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
+`)
+ Generate()
+}
+
+func genMulAvx2(name string, inputs int, outputs int, xor bool) {
+ total := inputs * outputs
+
+ doc := []string{
+ fmt.Sprintf("%s takes %d inputs and produces %d outputs.", name, inputs, outputs),
+ }
+ if !xor {
+ doc = append(doc, "The output is initialized to 0.")
+ }
+
+ // Load shuffle masks on every use.
+ var loadNone bool
+ // Use registers for destination registers.
+ var regDst = true
+
+ // lo, hi, 1 in, 1 out, 2 tmp, 1 mask
+ est := total*2 + outputs + 5
+ if outputs == 1 {
+ // We don't need to keep a copy of the input if only 1 output.
+ est -= 2
+ }
+
+ if est > 16 {
+ loadNone = true
+ // We run out of GP registers first, now.
+ if inputs+outputs > 12 {
+ regDst = false
+ }
+ }
+
+ TEXT(name, 0, fmt.Sprintf("func(matrix []byte, in [][]byte, out [][]byte, start, n int)"))
+
+ // SWITCH DEFINITION:
+ s := fmt.Sprintf(" mulAvxTwo_%dx%d(matrix, in, out, start, n)\n", inputs, outputs)
+ s += fmt.Sprintf("\t\t\t\treturn n\n")
+ switchDefs[inputs-1][outputs-1] = s
+
+ if loadNone {
+ Comment("Loading no tables to registers")
+ } else {
+ // loadNone == false
+ Comment("Loading all tables to registers")
+ }
+
+ Doc(doc...)
+ Pragma("noescape")
+ Commentf("Full registers estimated %d YMM used", est)
+
+ length := Load(Param("n"), GP64())
+ matrixBase := GP64()
+ MOVQ(Param("matrix").Base().MustAddr(), matrixBase)
+ SHRQ(U8(perLoopBits), length)
+ TESTQ(length, length)
+ JZ(LabelRef(name + "_end"))
+
+ dst := make([]reg.VecVirtual, outputs)
+ dstPtr := make([]reg.GPVirtual, outputs)
+ outBase := Param("out").Base().MustAddr()
+ outSlicePtr := GP64()
+ MOVQ(outBase, outSlicePtr)
+ for i := range dst {
+ dst[i] = YMM()
+ if !regDst {
+ continue
+ }
+ ptr := GP64()
+ MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
+ dstPtr[i] = ptr
+ }
+
+ inLo := make([]reg.VecVirtual, total)
+ inHi := make([]reg.VecVirtual, total)
+
+ for i := range inLo {
+ if loadNone {
+ break
+ }
+ tableLo := YMM()
+ tableHi := YMM()
+ VMOVDQU(Mem{Base: matrixBase, Disp: i * 64}, tableLo)
+ VMOVDQU(Mem{Base: matrixBase, Disp: i*64 + 32}, tableHi)
+ inLo[i] = tableLo
+ inHi[i] = tableHi
+ }
+
+ inPtrs := make([]reg.GPVirtual, inputs)
+ inSlicePtr := GP64()
+ MOVQ(Param("in").Base().MustAddr(), inSlicePtr)
+ for i := range inPtrs {
+ ptr := GP64()
+ MOVQ(Mem{Base: inSlicePtr, Disp: i * 24}, ptr)
+ inPtrs[i] = ptr
+ }
+
+ tmpMask := GP64()
+ MOVQ(U32(15), tmpMask)
+ lowMask := YMM()
+ MOVQ(tmpMask, lowMask.AsX())
+ VPBROADCASTB(lowMask.AsX(), lowMask)
+
+ offset := GP64()
+ MOVQ(Param("start").MustAddr(), offset)
+ Label(name + "_loop")
+ if xor {
+ Commentf("Load %d outputs", outputs)
+ } else {
+ Commentf("Clear %d outputs", outputs)
+ }
+ for i := range dst {
+ if xor {
+ if regDst {
+ VMOVDQU(Mem{Base: dstPtr[i], Index: offset, Scale: 1}, dst[i])
+ continue
+ }
+ ptr := GP64()
+ MOVQ(outBase, ptr)
+ VMOVDQU(Mem{Base: ptr, Index: offset, Scale: 1}, dst[i])
+ } else {
+ VPXOR(dst[i], dst[i], dst[i])
+ }
+ }
+
+ lookLow, lookHigh := YMM(), YMM()
+ inLow, inHigh := YMM(), YMM()
+ for i := range inPtrs {
+ Commentf("Load and process 32 bytes from input %d to %d outputs", i, outputs)
+ VMOVDQU(Mem{Base: inPtrs[i], Index: offset, Scale: 1}, inLow)
+ VPSRLQ(U8(4), inLow, inHigh)
+ VPAND(lowMask, inLow, inLow)
+ VPAND(lowMask, inHigh, inHigh)
+ for j := range dst {
+ if loadNone {
+ VMOVDQU(Mem{Base: matrixBase, Disp: 64 * (i*outputs + j)}, lookLow)
+ VMOVDQU(Mem{Base: matrixBase, Disp: 32 + 64*(i*outputs+j)}, lookHigh)
+ VPSHUFB(inLow, lookLow, lookLow)
+ VPSHUFB(inHigh, lookHigh, lookHigh)
+ } else {
+ VPSHUFB(inLow, inLo[i*outputs+j], lookLow)
+ VPSHUFB(inHigh, inHi[i*outputs+j], lookHigh)
+ }
+ VPXOR(lookLow, lookHigh, lookLow)
+ VPXOR(lookLow, dst[j], dst[j])
+ }
+ }
+ Commentf("Store %d outputs", outputs)
+ for i := range dst {
+ if regDst {
+ VMOVDQU(dst[i], Mem{Base: dstPtr[i], Index: offset, Scale: 1})
+ continue
+ }
+ ptr := GP64()
+ MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
+ VMOVDQU(dst[i], Mem{Base: ptr, Index: offset, Scale: 1})
+ }
+ Comment("Prepare for next loop")
+ ADDQ(U8(perLoop), offset)
+ DECQ(length)
+ JNZ(LabelRef(name + "_loop"))
+ VZEROUPPER()
+
+ Label(name + "_end")
+ RET()
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/go.mod b/vendor/github.com/klauspost/reedsolomon/go.mod
new file mode 100644
index 0000000..a059d86
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/go.mod
@@ -0,0 +1,7 @@
+module github.com/klauspost/reedsolomon
+
+go 1.14
+
+require (
+ github.com/klauspost/cpuid v1.2.4
+)
diff --git a/vendor/github.com/klauspost/reedsolomon/go.sum b/vendor/github.com/klauspost/reedsolomon/go.sum
new file mode 100644
index 0000000..5a44d81
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/go.sum
@@ -0,0 +1,2 @@
+github.com/klauspost/cpuid v1.2.4 h1:EBfaK0SWSwk+fgk6efYFWdzl8MwRWoOO1gkmiaTXPW4=
+github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
diff --git a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
new file mode 100644
index 0000000..c9d8ab2
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
@@ -0,0 +1,160 @@
+/**
+ * A thread-safe tree which caches inverted matrices.
+ *
+ * Copyright 2016, Peter Collins
+ */
+
+package reedsolomon
+
+import (
+ "errors"
+ "sync"
+)
+
+// The tree uses a Reader-Writer mutex to make it thread-safe
+// when accessing cached matrices and inserting new ones.
+type inversionTree struct {
+ mutex *sync.RWMutex
+ root inversionNode
+}
+
+type inversionNode struct {
+ matrix matrix
+ children []*inversionNode
+}
+
+// newInversionTree initializes a tree for storing inverted matrices.
+// Note that the root node is the identity matrix as it implies
+// there were no errors with the original data.
+func newInversionTree(dataShards, parityShards int) inversionTree {
+ identity, _ := identityMatrix(dataShards)
+ root := inversionNode{
+ matrix: identity,
+ children: make([]*inversionNode, dataShards+parityShards),
+ }
+ return inversionTree{
+ mutex: &sync.RWMutex{},
+ root: root,
+ }
+}
+
+// GetInvertedMatrix returns the cached inverted matrix or nil if it
+// is not found in the tree keyed on the indices of invalid rows.
+func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
+ // Lock the tree for reading before accessing the tree.
+ t.mutex.RLock()
+ defer t.mutex.RUnlock()
+
+ // If no invalid indices were give we should return the root
+ // identity matrix.
+ if len(invalidIndices) == 0 {
+ return t.root.matrix
+ }
+
+ // Recursively search for the inverted matrix in the tree, passing in
+ // 0 as the parent index as we start at the root of the tree.
+ return t.root.getInvertedMatrix(invalidIndices, 0)
+}
+
+// errAlreadySet is returned if the root node matrix is overwritten
+var errAlreadySet = errors.New("the root node identity matrix is already set")
+
+// InsertInvertedMatrix inserts a new inverted matrix into the tree
+// keyed by the indices of invalid rows. The total number of shards
+// is required for creating the proper length lists of child nodes for
+// each node.
+func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
+ // If no invalid indices were given then we are done because the
+ // root node is already set with the identity matrix.
+ if len(invalidIndices) == 0 {
+ return errAlreadySet
+ }
+
+ if !matrix.IsSquare() {
+ return errNotSquare
+ }
+
+ // Lock the tree for writing and reading before accessing the tree.
+ t.mutex.Lock()
+ defer t.mutex.Unlock()
+
+ // Recursively create nodes for the inverted matrix in the tree until
+ // we reach the node to insert the matrix to. We start by passing in
+ // 0 as the parent index as we start at the root of the tree.
+ t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
+
+ return nil
+}
+
+func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
+ // Get the child node to search next from the list of children. The
+ // list of children starts relative to the parent index passed in
+ // because the indices of invalid rows is sorted (by default). As we
+ // search recursively, the first invalid index gets popped off the list,
+ // so when searching through the list of children, use that first invalid
+ // index to find the child node.
+ firstIndex := invalidIndices[0]
+ node := n.children[firstIndex-parent]
+
+ // If the child node doesn't exist in the list yet, fail fast by
+ // returning, so we can construct and insert the proper inverted matrix.
+ if node == nil {
+ return nil
+ }
+
+ // If there's more than one invalid index left in the list we should
+ // keep searching recursively.
+ if len(invalidIndices) > 1 {
+ // Search recursively on the child node by passing in the invalid indices
+ // with the first index popped off the front. Also the parent index to
+ // pass down is the first index plus one.
+ return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
+ }
+ // If there aren't any more invalid indices to search, we've found our
+ // node. Return it, however keep in mind that the matrix could still be
+ // nil because intermediary nodes in the tree are created sometimes with
+ // their inversion matrices uninitialized.
+ return node.matrix
+}
+
+func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
+ // As above, get the child node to search next from the list of children.
+ // The list of children starts relative to the parent index passed in
+ // because the indices of invalid rows is sorted (by default). As we
+ // search recursively, the first invalid index gets popped off the list,
+ // so when searching through the list of children, use that first invalid
+ // index to find the child node.
+ firstIndex := invalidIndices[0]
+ node := n.children[firstIndex-parent]
+
+ // If the child node doesn't exist in the list yet, create a new
+ // node because we have the writer lock and add it to the list
+ // of children.
+ if node == nil {
+ // Make the length of the list of children equal to the number
+ // of shards minus the first invalid index because the list of
+ // invalid indices is sorted, so only this length of errors
+ // are possible in the tree.
+ node = &inversionNode{
+ children: make([]*inversionNode, shards-firstIndex),
+ }
+ // Insert the new node into the tree at the first index relative
+ // to the parent index that was given in this recursive call.
+ n.children[firstIndex-parent] = node
+ }
+
+ // If there's more than one invalid index left in the list we should
+ // keep searching recursively in order to find the node to add our
+ // matrix.
+ if len(invalidIndices) > 1 {
+ // As above, search recursively on the child node by passing in
+ // the invalid indices with the first index popped off the front.
+ // Also the total number of shards and parent index are passed down
+ // which is equal to the first index plus one.
+ node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
+ } else {
+ // If there aren't any more invalid indices to search, we've found our
+ // node. Cache the inverted matrix in this node.
+ node.matrix = matrix
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/matrix.go b/vendor/github.com/klauspost/reedsolomon/matrix.go
new file mode 100644
index 0000000..a6b9730
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/matrix.go
@@ -0,0 +1,279 @@
+/**
+ * Matrix Algebra over an 8-bit Galois Field
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// byte[row][col]
+type matrix [][]byte
+
+// newMatrix returns a matrix of zeros.
+func newMatrix(rows, cols int) (matrix, error) {
+ if rows <= 0 {
+ return nil, errInvalidRowSize
+ }
+ if cols <= 0 {
+ return nil, errInvalidColSize
+ }
+
+ m := matrix(make([][]byte, rows))
+ for i := range m {
+ m[i] = make([]byte, cols)
+ }
+ return m, nil
+}
+
+// NewMatrixData initializes a matrix with the given row-major data.
+// Note that data is not copied from input.
+func newMatrixData(data [][]byte) (matrix, error) {
+ m := matrix(data)
+ err := m.Check()
+ if err != nil {
+ return nil, err
+ }
+ return m, nil
+}
+
+// IdentityMatrix returns an identity matrix of the given size.
+func identityMatrix(size int) (matrix, error) {
+ m, err := newMatrix(size, size)
+ if err != nil {
+ return nil, err
+ }
+ for i := range m {
+ m[i][i] = 1
+ }
+ return m, nil
+}
+
+// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
+var errInvalidRowSize = errors.New("invalid row size")
+
+// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
+var errInvalidColSize = errors.New("invalid column size")
+
+// errColSizeMismatch is returned if the size of matrix columns mismatch.
+var errColSizeMismatch = errors.New("column size is not the same for all rows")
+
+func (m matrix) Check() error {
+ rows := len(m)
+ if rows <= 0 {
+ return errInvalidRowSize
+ }
+ cols := len(m[0])
+ if cols <= 0 {
+ return errInvalidColSize
+ }
+
+ for _, col := range m {
+ if len(col) != cols {
+ return errColSizeMismatch
+ }
+ }
+ return nil
+}
+
+// String returns a human-readable string of the matrix contents.
+//
+// Example: [[1, 2], [3, 4]]
+func (m matrix) String() string {
+ rowOut := make([]string, 0, len(m))
+ for _, row := range m {
+ colOut := make([]string, 0, len(row))
+ for _, col := range row {
+ colOut = append(colOut, strconv.Itoa(int(col)))
+ }
+ rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
+ }
+ return "[" + strings.Join(rowOut, ", ") + "]"
+}
+
+// Multiply multiplies this matrix (the one on the left) by another
+// matrix (the one on the right) and returns a new matrix with the result.
+func (m matrix) Multiply(right matrix) (matrix, error) {
+ if len(m[0]) != len(right) {
+ return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
+ }
+ result, _ := newMatrix(len(m), len(right[0]))
+ for r, row := range result {
+ for c := range row {
+ var value byte
+ for i := range m[0] {
+ value ^= galMultiply(m[r][i], right[i][c])
+ }
+ result[r][c] = value
+ }
+ }
+ return result, nil
+}
+
+// Augment returns the concatenation of this matrix and the matrix on the right.
+func (m matrix) Augment(right matrix) (matrix, error) {
+ if len(m) != len(right) {
+ return nil, errMatrixSize
+ }
+
+ result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
+ for r, row := range m {
+ for c := range row {
+ result[r][c] = m[r][c]
+ }
+ cols := len(m[0])
+ for c := range right[0] {
+ result[r][cols+c] = right[r][c]
+ }
+ }
+ return result, nil
+}
+
+// errMatrixSize is returned if matrix dimensions are doesn't match.
+var errMatrixSize = errors.New("matrix sizes do not match")
+
+func (m matrix) SameSize(n matrix) error {
+ if len(m) != len(n) {
+ return errMatrixSize
+ }
+ for i := range m {
+ if len(m[i]) != len(n[i]) {
+ return errMatrixSize
+ }
+ }
+ return nil
+}
+
+// SubMatrix returns a part of this matrix. Data is copied.
+func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
+ result, err := newMatrix(rmax-rmin, cmax-cmin)
+ if err != nil {
+ return nil, err
+ }
+ // OPTME: If used heavily, use copy function to copy slice
+ for r := rmin; r < rmax; r++ {
+ for c := cmin; c < cmax; c++ {
+ result[r-rmin][c-cmin] = m[r][c]
+ }
+ }
+ return result, nil
+}
+
+// SwapRows Exchanges two rows in the matrix.
+func (m matrix) SwapRows(r1, r2 int) error {
+ if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
+ return errInvalidRowSize
+ }
+ m[r2], m[r1] = m[r1], m[r2]
+ return nil
+}
+
+// IsSquare will return true if the matrix is square
+// and nil if the matrix is square
+func (m matrix) IsSquare() bool {
+ return len(m) == len(m[0])
+}
+
+// errSingular is returned if the matrix is singular and cannot be inversed
+var errSingular = errors.New("matrix is singular")
+
+// errNotSquare is returned if attempting to inverse a non-square matrix.
+var errNotSquare = errors.New("only square matrices can be inverted")
+
+// Invert returns the inverse of this matrix.
+// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
+// The matrix must be square, otherwise ErrNotSquare is returned.
+func (m matrix) Invert() (matrix, error) {
+ if !m.IsSquare() {
+ return nil, errNotSquare
+ }
+
+ size := len(m)
+ work, _ := identityMatrix(size)
+ work, _ = m.Augment(work)
+
+ err := work.gaussianElimination()
+ if err != nil {
+ return nil, err
+ }
+
+ return work.SubMatrix(0, size, size, size*2)
+}
+
+func (m matrix) gaussianElimination() error {
+ rows := len(m)
+ columns := len(m[0])
+ // Clear out the part below the main diagonal and scale the main
+ // diagonal to be 1.
+ for r := 0; r < rows; r++ {
+ // If the element on the diagonal is 0, find a row below
+ // that has a non-zero and swap them.
+ if m[r][r] == 0 {
+ for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+ if m[rowBelow][r] != 0 {
+ m.SwapRows(r, rowBelow)
+ break
+ }
+ }
+ }
+ // If we couldn't find one, the matrix is singular.
+ if m[r][r] == 0 {
+ return errSingular
+ }
+ // Scale to 1.
+ if m[r][r] != 1 {
+ scale := galDivide(1, m[r][r])
+ for c := 0; c < columns; c++ {
+ m[r][c] = galMultiply(m[r][c], scale)
+ }
+ }
+ // Make everything below the 1 be a 0 by subtracting
+ // a multiple of it. (Subtraction and addition are
+ // both exclusive or in the Galois field.)
+ for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+ if m[rowBelow][r] != 0 {
+ scale := m[rowBelow][r]
+ for c := 0; c < columns; c++ {
+ m[rowBelow][c] ^= galMultiply(scale, m[r][c])
+ }
+ }
+ }
+ }
+
+ // Now clear the part above the main diagonal.
+ for d := 0; d < rows; d++ {
+ for rowAbove := 0; rowAbove < d; rowAbove++ {
+ if m[rowAbove][d] != 0 {
+ scale := m[rowAbove][d]
+ for c := 0; c < columns; c++ {
+ m[rowAbove][c] ^= galMultiply(scale, m[d][c])
+ }
+
+ }
+ }
+ }
+ return nil
+}
+
+// Create a Vandermonde matrix, which is guaranteed to have the
+// property that any subset of rows that forms a square matrix
+// is invertible.
+func vandermonde(rows, cols int) (matrix, error) {
+ result, err := newMatrix(rows, cols)
+ if err != nil {
+ return nil, err
+ }
+ for r, row := range result {
+ for c := range row {
+ result[r][c] = galExp(byte(r), c)
+ }
+ }
+ return result, nil
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/options.go b/vendor/github.com/klauspost/reedsolomon/options.go
new file mode 100644
index 0000000..b4adc2a
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
@@ -0,0 +1,175 @@
+package reedsolomon
+
+import (
+ "runtime"
+
+ "github.com/klauspost/cpuid"
+)
+
+// Option allows to override processing parameters.
+type Option func(*options)
+
+type options struct {
+ maxGoroutines int
+ minSplitSize int
+ shardSize int
+ perRound int
+
+ useAVX512, useAVX2, useSSSE3, useSSE2 bool
+ usePAR1Matrix bool
+ useCauchy bool
+ fastOneParity bool
+
+ // stream options
+ concReads bool
+ concWrites bool
+ streamBS int
+}
+
+var defaultOptions = options{
+ maxGoroutines: 384,
+ minSplitSize: -1,
+ fastOneParity: false,
+
+ // Detect CPU capabilities.
+ useSSSE3: cpuid.CPU.SSSE3(),
+ useSSE2: cpuid.CPU.SSE2(),
+ useAVX2: cpuid.CPU.AVX2(),
+ useAVX512: cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW(),
+}
+
+func init() {
+ if runtime.GOMAXPROCS(0) <= 1 {
+ defaultOptions.maxGoroutines = 1
+ }
+}
+
+// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
+// Jobs will be split into this many parts, unless each goroutine would have to process
+// less than minSplitSize bytes (set with WithMinSplitSize).
+// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
+// scheduling.
+// If n <= 0, it is ignored.
+func WithMaxGoroutines(n int) Option {
+ return func(o *options) {
+ if n > 0 {
+ o.maxGoroutines = n
+ }
+ }
+}
+
+// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
+// specific shard size.
+// Send in the shard size you expect to send. Other shard sizes will work, but may not
+// run at the optimal speed.
+// Overwrites WithMaxGoroutines.
+// If shardSize <= 0, it is ignored.
+func WithAutoGoroutines(shardSize int) Option {
+ return func(o *options) {
+ o.shardSize = shardSize
+ }
+}
+
+// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
+// By default this parameter is determined by CPU cache characteristics.
+// See WithMaxGoroutines on how jobs are split.
+// If n <= 0, it is ignored.
+func WithMinSplitSize(n int) Option {
+ return func(o *options) {
+ if n > 0 {
+ o.minSplitSize = n
+ }
+ }
+}
+
+// WithConcurrentStreams will enable concurrent reads and writes on the streams.
+// Default: Disabled, meaning only one stream will be read/written at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreams(enabled bool) Option {
+ return func(o *options) {
+ o.concReads, o.concWrites = enabled, enabled
+ }
+}
+
+// WithConcurrentStreamReads will enable concurrent reads from the input streams.
+// Default: Disabled, meaning only one stream will be read at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreamReads(enabled bool) Option {
+ return func(o *options) {
+ o.concReads = enabled
+ }
+}
+
+// WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
+// Default: Disabled, meaning only one stream will be written at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreamWrites(enabled bool) Option {
+ return func(o *options) {
+ o.concWrites = enabled
+ }
+}
+
+// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
+// If not set, any shard size set with WithAutoGoroutines will be used.
+// If WithAutoGoroutines is also unset, 4MB will be used.
+// Ignored if not used on stream.
+func WithStreamBlockSize(n int) Option {
+ return func(o *options) {
+ o.streamBS = n
+ }
+}
+
+func withSSSE3(enabled bool) Option {
+ return func(o *options) {
+ o.useSSSE3 = enabled
+ }
+}
+
+func withAVX2(enabled bool) Option {
+ return func(o *options) {
+ o.useAVX2 = enabled
+ }
+}
+
+func withSSE2(enabled bool) Option {
+ return func(o *options) {
+ o.useSSE2 = enabled
+ }
+}
+
+func withAVX512(enabled bool) Option {
+ return func(o *options) {
+ o.useAVX512 = enabled
+ }
+}
+
+// WithPAR1Matrix causes the encoder to build the matrix how PARv1
+// does. Note that the method they use is buggy, and may lead to cases
+// where recovery is impossible, even if there are enough parity
+// shards.
+func WithPAR1Matrix() Option {
+ return func(o *options) {
+ o.usePAR1Matrix = true
+ o.useCauchy = false
+ }
+}
+
+// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
+// The output of this is not compatible with the standard output.
+// A Cauchy matrix is faster to generate. This does not affect data throughput,
+// but will result in slightly faster start-up time.
+func WithCauchyMatrix() Option {
+ return func(o *options) {
+ o.useCauchy = true
+ o.usePAR1Matrix = false
+ }
+}
+
+// WithFastOneParityMatrix will switch the matrix to a simple xor
+// if there is only one parity shard.
+// The PAR1 matrix already has this property so it has little effect there.
+func WithFastOneParityMatrix() Option {
+ return func(o *options) {
+ o.fastOneParity = true
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
new file mode 100644
index 0000000..13a35d2
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
@@ -0,0 +1,1011 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+// Package reedsolomon enables Erasure Coding in Go
+//
+// For usage and examples, see https://github.com/klauspost/reedsolomon
+//
+package reedsolomon
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "runtime"
+ "sync"
+
+ "github.com/klauspost/cpuid"
+)
+
+// Encoder is an interface to encode Reed-Salomon parity sets for your data.
+type Encoder interface {
+ // Encode parity for a set of data shards.
+ // Input is 'shards' containing data shards followed by parity shards.
+ // The number of shards must match the number given to New().
+ // Each shard is a byte array, and they must all be the same size.
+ // The parity shards will always be overwritten and the data shards
+ // will remain the same, so it is safe for you to read from the
+ // data shards while this is running.
+ Encode(shards [][]byte) error
+
+ // Verify returns true if the parity shards contain correct data.
+ // The data is the same format as Encode. No data is modified, so
+ // you are allowed to read from data while this is running.
+ Verify(shards [][]byte) (bool, error)
+
+ // Reconstruct will recreate the missing shards if possible.
+ //
+ // Given a list of shards, some of which contain data, fills in the
+ // ones that don't have data.
+ //
+ // The length of the array must be equal to the total number of shards.
+ // You indicate that a shard is missing by setting it to nil or zero-length.
+ // If a shard is zero-length but has sufficient capacity, that memory will
+ // be used, otherwise a new []byte will be allocated.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // The reconstructed shard set is complete, but integrity is not verified.
+ // Use the Verify function to check if data set is ok.
+ Reconstruct(shards [][]byte) error
+
+ // ReconstructData will recreate any missing data shards, if possible.
+ //
+ // Given a list of shards, some of which contain data, fills in the
+ // data shards that don't have data.
+ //
+ // The length of the array must be equal to Shards.
+ // You indicate that a shard is missing by setting it to nil or zero-length.
+ // If a shard is zero-length but has sufficient capacity, that memory will
+ // be used, otherwise a new []byte will be allocated.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // As the reconstructed shard set may contain missing parity shards,
+ // calling the Verify function is likely to fail.
+ ReconstructData(shards [][]byte) error
+
+ // Update parity is use for change a few data shards and update it's parity.
+ // Input 'newDatashards' containing data shards changed.
+ // Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
+ // new parity shards will in shards[DataShards:]
+ // Update is very useful if DataShards much larger than ParityShards and changed data shards is few. It will
+ // faster than Encode and not need read all data shards to encode.
+ Update(shards [][]byte, newDatashards [][]byte) error
+
+ // Split a data slice into the number of shards given to the encoder,
+ // and create empty parity shards.
+ //
+ // The data will be split into equally sized shards.
+ // If the data size isn't dividable by the number of shards,
+ // the last shard will contain extra zeros.
+ //
+ // There must be at least 1 byte otherwise ErrShortData will be
+ // returned.
+ //
+ // The data will not be copied, except for the last shard, so you
+ // should not modify the data of the input slice afterwards.
+ Split(data []byte) ([][]byte, error)
+
+ // Join the shards and write the data segment to dst.
+ //
+ // Only the data shards are considered.
+ // You must supply the exact output size you want.
+ // If there are to few shards given, ErrTooFewShards will be returned.
+ // If the total data size is less than outSize, ErrShortData will be returned.
+ Join(dst io.Writer, shards [][]byte, outSize int) error
+}
+
+// reedSolomon contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using New()
+type reedSolomon struct {
+ DataShards int // Number of data shards, should not be modified.
+ ParityShards int // Number of parity shards, should not be modified.
+ Shards int // Total number of shards. Calculated, and should not be modified.
+ m matrix
+ tree inversionTree
+ parity [][]byte
+ o options
+ mPool sync.Pool
+}
+
+// ErrInvShardNum will be returned by New, if you attempt to create
+// an Encoder where either data or parity shards is zero or less.
+var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards")
+
+// ErrMaxShardNum will be returned by New, if you attempt to create an
+// Encoder where data and parity shards are bigger than the order of
+// GF(2^8).
+var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
+
+// buildMatrix creates the matrix to use for encoding, given the
+// number of data shards and the number of total shards.
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrix(dataShards, totalShards int) (matrix, error) {
+ // Start with a Vandermonde matrix. This matrix would work,
+ // in theory, but doesn't have the property that the data
+ // shards are unchanged after encoding.
+ vm, err := vandermonde(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ // Multiply by the inverse of the top square of the matrix.
+ // This will make the top square be the identity matrix, but
+ // preserve the property that any square subset of rows is
+ // invertible.
+ top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ topInv, err := top.Invert()
+ if err != nil {
+ return nil, err
+ }
+
+ return vm.Multiply(topInv)
+}
+
+// buildMatrixPAR1 creates the matrix to use for encoding according to
+// the PARv1 spec, given the number of data shards and the number of
+// total shards. Note that the method they use is buggy, and may lead
+// to cases where recovery is impossible, even if there are enough
+// parity shards.
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
+ result, err := newMatrix(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ for r, row := range result {
+ // The top portion of the matrix is the identity
+ // matrix, and the bottom is a transposed Vandermonde
+ // matrix starting at 1 instead of 0.
+ if r < dataShards {
+ result[r][r] = 1
+ } else {
+ for c := range row {
+ result[r][c] = galExp(byte(c+1), r-dataShards)
+ }
+ }
+ }
+ return result, nil
+}
+
+func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) {
+ result, err := newMatrix(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ for r, row := range result {
+ // The top portion of the matrix is the identity
+ // matrix, and the bottom is a transposed Cauchy matrix.
+ if r < dataShards {
+ result[r][r] = 1
+ } else {
+ for c := range row {
+ result[r][c] = invTable[(byte(r ^ c))]
+ }
+ }
+ }
+ return result, nil
+}
+
+// buildXorMatrix can be used to build a matrix with pure XOR
+// operations if there is only one parity shard.
+func buildXorMatrix(dataShards, totalShards int) (matrix, error) {
+ if dataShards+1 != totalShards {
+ return nil, errors.New("internal error")
+ }
+ result, err := newMatrix(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ for r, row := range result {
+ // The top portion of the matrix is the identity
+ // matrix.
+ if r < dataShards {
+ result[r][r] = 1
+ } else {
+ // Set all values to 1 (XOR)
+ for c := range row {
+ result[r][c] = 1
+ }
+ }
+ }
+ return result, nil
+}
+
+// New creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of total shards is 256.
+// If no options are supplied, default options are used.
+func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
+ r := reedSolomon{
+ DataShards: dataShards,
+ ParityShards: parityShards,
+ Shards: dataShards + parityShards,
+ o: defaultOptions,
+ }
+
+ for _, opt := range opts {
+ opt(&r.o)
+ }
+ if dataShards <= 0 || parityShards <= 0 {
+ return nil, ErrInvShardNum
+ }
+
+ if dataShards+parityShards > 256 {
+ return nil, ErrMaxShardNum
+ }
+
+ var err error
+ switch {
+ case r.o.fastOneParity && parityShards == 1:
+ r.m, err = buildXorMatrix(dataShards, r.Shards)
+ case r.o.useCauchy:
+ r.m, err = buildMatrixCauchy(dataShards, r.Shards)
+ case r.o.usePAR1Matrix:
+ r.m, err = buildMatrixPAR1(dataShards, r.Shards)
+ default:
+ r.m, err = buildMatrix(dataShards, r.Shards)
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ // Calculate what we want per round
+ r.o.perRound = cpuid.CPU.Cache.L2
+ if r.o.perRound <= 0 {
+ // Set to 128K if undetectable.
+ r.o.perRound = 128 << 10
+ }
+
+ if cpuid.CPU.ThreadsPerCore > 1 && r.o.maxGoroutines > cpuid.CPU.PhysicalCores {
+ // If multiple threads per core, make sure they don't contend for cache.
+ r.o.perRound /= cpuid.CPU.ThreadsPerCore
+ }
+ // 1 input + parity must fit in cache, and we add one more to be safer.
+ r.o.perRound = r.o.perRound / (1 + parityShards)
+ // Align to 64 bytes.
+ r.o.perRound = ((r.o.perRound + 63) / 64) * 64
+
+ if r.o.minSplitSize <= 0 {
+ // Set minsplit as high as we can, but still have parity in L1.
+ cacheSize := cpuid.CPU.Cache.L1D
+ if cacheSize <= 0 {
+ cacheSize = 32 << 10
+ }
+
+ r.o.minSplitSize = cacheSize / (parityShards + 1)
+ // Min 1K
+ if r.o.minSplitSize < 1024 {
+ r.o.minSplitSize = 1024
+ }
+ }
+
+ if r.o.perRound < r.o.minSplitSize {
+ r.o.perRound = r.o.minSplitSize
+ }
+
+ if r.o.shardSize > 0 {
+ p := runtime.GOMAXPROCS(0)
+ if p == 1 || r.o.shardSize <= r.o.minSplitSize*2 {
+ // Not worth it.
+ r.o.maxGoroutines = 1
+ } else {
+ g := r.o.shardSize / r.o.perRound
+
+ // Overprovision by a factor of 2.
+ if g < p*2 && r.o.perRound > r.o.minSplitSize*2 {
+ g = p * 2
+ r.o.perRound /= 2
+ }
+
+ // Have g be multiple of p
+ g += p - 1
+ g -= g % p
+
+ r.o.maxGoroutines = g
+ }
+ }
+
+ // Inverted matrices are cached in a tree keyed by the indices
+ // of the invalid rows of the data to reconstruct.
+ // The inversion root node will have the identity matrix as
+ // its inversion matrix because it implies there are no errors
+ // with the original data.
+ r.tree = newInversionTree(dataShards, parityShards)
+
+ r.parity = make([][]byte, parityShards)
+ for i := range r.parity {
+ r.parity[i] = r.m[dataShards+i]
+ }
+
+ if avx2CodeGen && r.o.useAVX2 {
+ r.mPool.New = func() interface{} {
+ return make([]byte, r.Shards*2*32)
+ }
+ }
+ return &r, err
+}
+
+// ErrTooFewShards is returned if too few shards where given to
+// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
+// if there were too few shards to reconstruct the missing data.
+var ErrTooFewShards = errors.New("too few shards given")
+
+// Encodes parity for a set of data shards.
+// An array 'shards' containing data shards followed by parity shards.
+// The number of shards must match the number given to New.
+// Each shard is a byte array, and they must all be the same size.
+// The parity shards will always be overwritten and the data shards
+// will remain the same.
+func (r *reedSolomon) Encode(shards [][]byte) error {
+ if len(shards) != r.Shards {
+ return ErrTooFewShards
+ }
+
+ err := checkShards(shards, false)
+ if err != nil {
+ return err
+ }
+
+ // Get the slice of output buffers.
+ output := shards[r.DataShards:]
+
+ // Do the coding.
+ r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
+ return nil
+}
+
+// ErrInvalidInput is returned if invalid input parameter of Update.
+var ErrInvalidInput = errors.New("invalid input")
+
+func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
+ if len(shards) != r.Shards {
+ return ErrTooFewShards
+ }
+
+ if len(newDatashards) != r.DataShards {
+ return ErrTooFewShards
+ }
+
+ err := checkShards(shards, true)
+ if err != nil {
+ return err
+ }
+
+ err = checkShards(newDatashards, true)
+ if err != nil {
+ return err
+ }
+
+ for i := range newDatashards {
+ if newDatashards[i] != nil && shards[i] == nil {
+ return ErrInvalidInput
+ }
+ }
+ for _, p := range shards[r.DataShards:] {
+ if p == nil {
+ return ErrInvalidInput
+ }
+ }
+
+ shardSize := shardSize(shards)
+
+ // Get the slice of output buffers.
+ output := shards[r.DataShards:]
+
+ // Do the coding.
+ r.updateParityShards(r.parity, shards[0:r.DataShards], newDatashards[0:r.DataShards], output, r.ParityShards, shardSize)
+ return nil
+}
+
+func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+ if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+ r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
+ return
+ }
+
+ for c := 0; c < r.DataShards; c++ {
+ in := newinputs[c]
+ if in == nil {
+ continue
+ }
+ oldin := oldinputs[c]
+ // oldinputs data will be change
+ sliceXor(in, oldin, &r.o)
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
+ }
+ }
+}
+
+func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+ wg.Add(1)
+ go func(start, stop int) {
+ for c := 0; c < r.DataShards; c++ {
+ in := newinputs[c]
+ if in == nil {
+ continue
+ }
+ oldin := oldinputs[c]
+ // oldinputs data will be change
+ sliceXor(in[start:stop], oldin[start:stop], &r.o)
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
+ }
+ }
+ wg.Done()
+ }(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
+
+// Verify returns true if the parity shards contain the right data.
+// The data is the same format as Encode. No data is modified.
+func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
+ if len(shards) != r.Shards {
+ return false, ErrTooFewShards
+ }
+ err := checkShards(shards, false)
+ if err != nil {
+ return false, err
+ }
+
+ // Slice of buffers being checked.
+ toCheck := shards[r.DataShards:]
+
+ // Do the checking.
+ return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
+}
+
+// Multiplies a subset of rows from a coding matrix by a full set of
+// input shards to produce some output shards.
+// 'matrixRows' is The rows from the matrix to use.
+// 'inputs' An array of byte arrays, each of which is one input shard.
+// The number of inputs used is determined by the length of each matrix row.
+// outputs Byte arrays where the computed shards are stored.
+// The number of outputs computed, and the
+// number of matrix rows used, is determined by
+// outputCount, which is the number of outputs to compute.
+func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ if len(outputs) == 0 {
+ return
+ }
+ switch {
+ case r.o.useAVX512 && r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize && len(inputs) >= 4 && len(outputs) >= 2:
+ r.codeSomeShardsAvx512P(matrixRows, inputs, outputs, outputCount, byteCount)
+ return
+ case r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2:
+ r.codeSomeShardsAvx512(matrixRows, inputs, outputs, outputCount, byteCount)
+ return
+ case r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize:
+ r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
+ return
+ }
+
+ // Process using no goroutines
+ start, end := 0, r.o.perRound
+ if end > len(inputs[0]) {
+ end = len(inputs[0])
+ }
+ if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
+ m := genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
+ start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount)
+ r.mPool.Put(m)
+ end = len(inputs[0])
+ }
+
+ for start < len(inputs[0]) {
+ for c := 0; c < r.DataShards; c++ {
+ in := inputs[c][start:end]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
+ }
+ }
+ }
+ start = end
+ end += r.o.perRound
+ if end > len(inputs[0]) {
+ end = len(inputs[0])
+ }
+ }
+}
+
+// Perform the same as codeSomeShards, but split the workload into
+// several goroutines.
+func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ var avx2Matrix []byte
+ if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
+ avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
+ defer r.mPool.Put(avx2Matrix)
+ }
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+
+ wg.Add(1)
+ go func(start, stop int) {
+ if avx2CodeGen && r.o.useAVX2 && stop-start >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
+ start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
+ }
+
+ lstart, lstop := start, start+r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ for lstart < stop {
+ for c := 0; c < r.DataShards; c++ {
+ in := inputs[c][lstart:lstop]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ }
+ }
+ }
+ lstart = lstop
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ }
+ wg.Done()
+ }(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
+
+// checkSomeShards is mostly the same as codeSomeShards,
+// except this will check values and return
+// as soon as a difference is found.
+func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+ if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+ return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
+ }
+ outputs := make([][]byte, len(toCheck))
+ for i := range outputs {
+ outputs[i] = make([]byte, byteCount)
+ }
+ for c := 0; c < r.DataShards; c++ {
+ in := inputs[c]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+ }
+ }
+
+ for i, calc := range outputs {
+ if !bytes.Equal(calc, toCheck[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func (r *reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+ same := true
+ var mu sync.RWMutex // For above
+
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+ wg.Add(1)
+ go func(start, do int) {
+ defer wg.Done()
+ outputs := make([][]byte, len(toCheck))
+ for i := range outputs {
+ outputs[i] = make([]byte, do)
+ }
+ for c := 0; c < r.DataShards; c++ {
+ mu.RLock()
+ if !same {
+ mu.RUnlock()
+ return
+ }
+ mu.RUnlock()
+ in := inputs[c][start : start+do]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+ }
+ }
+
+ for i, calc := range outputs {
+ if !bytes.Equal(calc, toCheck[i][start:start+do]) {
+ mu.Lock()
+ same = false
+ mu.Unlock()
+ return
+ }
+ }
+ }(start, do)
+ start += do
+ }
+ wg.Wait()
+ return same
+}
+
+// ErrShardNoData will be returned if there are no shards,
+// or if the length of all shards is zero.
+var ErrShardNoData = errors.New("no shard data")
+
+// ErrShardSize is returned if shard length isn't the same for all
+// shards.
+var ErrShardSize = errors.New("shard sizes do not match")
+
+// checkShards will check if shards are the same size
+// or 0, if allowed. An error is returned if this fails.
+// An error is also returned if all shards are size 0.
+func checkShards(shards [][]byte, nilok bool) error {
+ size := shardSize(shards)
+ if size == 0 {
+ return ErrShardNoData
+ }
+ for _, shard := range shards {
+ if len(shard) != size {
+ if len(shard) != 0 || !nilok {
+ return ErrShardSize
+ }
+ }
+ }
+ return nil
+}
+
+// shardSize return the size of a single shard.
+// The first non-zero size is returned,
+// or 0 if all shards are size 0.
+func shardSize(shards [][]byte) int {
+ for _, shard := range shards {
+ if len(shard) != 0 {
+ return len(shard)
+ }
+ }
+ return 0
+}
+
+// Reconstruct will recreate the missing shards, if possible.
+//
+// Given a list of shards, some of which contain data, fills in the
+// ones that don't have data.
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete, but integrity is not verified.
+// Use the Verify function to check if data set is ok.
+func (r *reedSolomon) Reconstruct(shards [][]byte) error {
+ return r.reconstruct(shards, false)
+}
+
+// ReconstructData will recreate any missing data shards, if possible.
+//
+// Given a list of shards, some of which contain data, fills in the
+// data shards that don't have data.
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// As the reconstructed shard set may contain missing parity shards,
+// calling the Verify function is likely to fail.
+func (r *reedSolomon) ReconstructData(shards [][]byte) error {
+ return r.reconstruct(shards, true)
+}
+
+// reconstruct will recreate the missing data shards, and unless
+// dataOnly is true, also the missing parity shards
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
+ if len(shards) != r.Shards {
+ return ErrTooFewShards
+ }
+ // Check arguments.
+ err := checkShards(shards, true)
+ if err != nil {
+ return err
+ }
+
+ shardSize := shardSize(shards)
+
+ // Quick check: are all of the shards present? If so, there's
+ // nothing to do.
+ numberPresent := 0
+ dataPresent := 0
+ for i := 0; i < r.Shards; i++ {
+ if len(shards[i]) != 0 {
+ numberPresent++
+ if i < r.DataShards {
+ dataPresent++
+ }
+ }
+ }
+ if numberPresent == r.Shards || dataOnly && dataPresent == r.DataShards {
+ // Cool. All of the shards data data. We don't
+ // need to do anything.
+ return nil
+ }
+
+ // More complete sanity check
+ if numberPresent < r.DataShards {
+ return ErrTooFewShards
+ }
+
+ // Pull out an array holding just the shards that
+ // correspond to the rows of the submatrix. These shards
+ // will be the input to the decoding process that re-creates
+ // the missing data shards.
+ //
+ // Also, create an array of indices of the valid rows we do have
+ // and the invalid rows we don't have up until we have enough valid rows.
+ subShards := make([][]byte, r.DataShards)
+ validIndices := make([]int, r.DataShards)
+ invalidIndices := make([]int, 0)
+ subMatrixRow := 0
+ for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
+ if len(shards[matrixRow]) != 0 {
+ subShards[subMatrixRow] = shards[matrixRow]
+ validIndices[subMatrixRow] = matrixRow
+ subMatrixRow++
+ } else {
+ invalidIndices = append(invalidIndices, matrixRow)
+ }
+ }
+
+ // Attempt to get the cached inverted matrix out of the tree
+ // based on the indices of the invalid rows.
+ dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
+
+ // If the inverted matrix isn't cached in the tree yet we must
+ // construct it ourselves and insert it into the tree for the
+ // future. In this way the inversion tree is lazily loaded.
+ if dataDecodeMatrix == nil {
+ // Pull out the rows of the matrix that correspond to the
+ // shards that we have and build a square matrix. This
+ // matrix could be used to generate the shards that we have
+ // from the original data.
+ subMatrix, _ := newMatrix(r.DataShards, r.DataShards)
+ for subMatrixRow, validIndex := range validIndices {
+ for c := 0; c < r.DataShards; c++ {
+ subMatrix[subMatrixRow][c] = r.m[validIndex][c]
+ }
+ }
+ // Invert the matrix, so we can go from the encoded shards
+ // back to the original data. Then pull out the row that
+ // generates the shard that we want to decode. Note that
+ // since this matrix maps back to the original data, it can
+ // be used to create a data shard, but not a parity shard.
+ dataDecodeMatrix, err = subMatrix.Invert()
+ if err != nil {
+ return err
+ }
+
+ // Cache the inverted matrix in the tree for future use keyed on the
+ // indices of the invalid rows.
+ err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards)
+ if err != nil {
+ return err
+ }
+ }
+
+ // Re-create any data shards that were missing.
+ //
+ // The input to the coding is all of the shards we actually
+ // have, and the output is the missing data shards. The computation
+ // is done using the special decode matrix we just built.
+ outputs := make([][]byte, r.ParityShards)
+ matrixRows := make([][]byte, r.ParityShards)
+ outputCount := 0
+
+ for iShard := 0; iShard < r.DataShards; iShard++ {
+ if len(shards[iShard]) == 0 {
+ if cap(shards[iShard]) >= shardSize {
+ shards[iShard] = shards[iShard][0:shardSize]
+ } else {
+ shards[iShard] = make([]byte, shardSize)
+ }
+ outputs[outputCount] = shards[iShard]
+ matrixRows[outputCount] = dataDecodeMatrix[iShard]
+ outputCount++
+ }
+ }
+ r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize)
+
+ if dataOnly {
+ // Exit out early if we are only interested in the data shards
+ return nil
+ }
+
+ // Now that we have all of the data shards intact, we can
+ // compute any of the parity that is missing.
+ //
+ // The input to the coding is ALL of the data shards, including
+ // any that we just calculated. The output is whichever of the
+ // data shards were missing.
+ outputCount = 0
+ for iShard := r.DataShards; iShard < r.Shards; iShard++ {
+ if len(shards[iShard]) == 0 {
+ if cap(shards[iShard]) >= shardSize {
+ shards[iShard] = shards[iShard][0:shardSize]
+ } else {
+ shards[iShard] = make([]byte, shardSize)
+ }
+ outputs[outputCount] = shards[iShard]
+ matrixRows[outputCount] = r.parity[iShard-r.DataShards]
+ outputCount++
+ }
+ }
+ r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
+ return nil
+}
+
+// ErrShortData will be returned by Split(), if there isn't enough data
+// to fill the number of shards.
+var ErrShortData = errors.New("not enough data to fill the number of requested shards")
+
+// Split a data slice into the number of shards given to the encoder,
+// and create empty parity shards if necessary.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't divisible by the number of shards,
+// the last shard will contain extra zeros.
+//
+// There must be at least 1 byte otherwise ErrShortData will be
+// returned.
+//
+// The data will not be copied, except for the last shard, so you
+// should not modify the data of the input slice afterwards.
+func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
+ if len(data) == 0 {
+ return nil, ErrShortData
+ }
+ // Calculate number of bytes per data shard.
+ perShard := (len(data) + r.DataShards - 1) / r.DataShards
+
+ if cap(data) > len(data) {
+ data = data[:cap(data)]
+ }
+
+ // Only allocate memory if necessary
+ var padding []byte
+ if len(data) < (r.Shards * perShard) {
+ // calculate maximum number of full shards in `data` slice
+ fullShards := len(data) / perShard
+ padding = make([]byte, r.Shards*perShard-perShard*fullShards)
+ copy(padding, data[perShard*fullShards:])
+ data = data[0 : perShard*fullShards]
+ }
+
+ // Split into equal-length shards.
+ dst := make([][]byte, r.Shards)
+ i := 0
+ for ; i < len(dst) && len(data) >= perShard; i++ {
+ dst[i] = data[:perShard:perShard]
+ data = data[perShard:]
+ }
+
+ for j := 0; i+j < len(dst); j++ {
+ dst[i+j] = padding[:perShard:perShard]
+ padding = padding[perShard:]
+ }
+
+ return dst, nil
+}
+
+// ErrReconstructRequired is returned if too few data shards are intact and a
+// reconstruction is required before you can successfully join the shards.
+var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+// You must supply the exact output size you want.
+//
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+// If one or more required data shards are nil, ErrReconstructRequired will be returned.
+func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
+ // Do we have enough shards?
+ if len(shards) < r.DataShards {
+ return ErrTooFewShards
+ }
+ shards = shards[:r.DataShards]
+
+ // Do we have enough data?
+ size := 0
+ for _, shard := range shards {
+ if shard == nil {
+ return ErrReconstructRequired
+ }
+ size += len(shard)
+
+ // Do we have enough data already?
+ if size >= outSize {
+ break
+ }
+ }
+ if size < outSize {
+ return ErrShortData
+ }
+
+ // Copy data to dst
+ write := outSize
+ for _, shard := range shards {
+ if write < len(shard) {
+ _, err := dst.Write(shard[:write])
+ return err
+ }
+ n, err := dst.Write(shard)
+ if err != nil {
+ return err
+ }
+ write -= n
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/streaming.go b/vendor/github.com/klauspost/reedsolomon/streaming.go
new file mode 100644
index 0000000..d048ba0
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
@@ -0,0 +1,603 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "sync"
+)
+
+// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
+// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
+//
+// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
+// since the streaming interface has a start up overhead.
+//
+// For all operations, no readers and writers should not assume any order/size of
+// individual reads/writes.
+//
+// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
+// folder.
+type StreamEncoder interface {
+ // Encode parity shards for a set of data shards.
+ //
+ // Input is 'shards' containing readers for data shards followed by parity shards
+ // io.Writer.
+ //
+ // The number of shards must match the number given to NewStream().
+ //
+ // Each reader must supply the same number of bytes.
+ //
+ // The parity shards will be written to the writer.
+ // The number of bytes written will match the input size.
+ //
+ // If a data stream returns an error, a StreamReadError type error
+ // will be returned. If a parity writer returns an error, a
+ // StreamWriteError will be returned.
+ Encode(data []io.Reader, parity []io.Writer) error
+
+ // Verify returns true if the parity shards contain correct data.
+ //
+ // The number of shards must match the number total data+parity shards
+ // given to NewStream().
+ //
+ // Each reader must supply the same number of bytes.
+ // If a shard stream returns an error, a StreamReadError type error
+ // will be returned.
+ Verify(shards []io.Reader) (bool, error)
+
+ // Reconstruct will recreate the missing shards if possible.
+ //
+ // Given a list of valid shards (to read) and invalid shards (to write)
+ //
+ // You indicate that a shard is missing by setting it to nil in the 'valid'
+ // slice and at the same time setting a non-nil writer in "fill".
+ // An index cannot contain both non-nil 'valid' and 'fill' entry.
+ // If both are provided 'ErrReconstructMismatch' is returned.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // The reconstructed shard set is complete, but integrity is not verified.
+ // Use the Verify function to check if data set is ok.
+ Reconstruct(valid []io.Reader, fill []io.Writer) error
+
+ // Split a an input stream into the number of shards given to the encoder.
+ //
+ // The data will be split into equally sized shards.
+ // If the data size isn't dividable by the number of shards,
+ // the last shard will contain extra zeros.
+ //
+ // You must supply the total size of your input.
+ // 'ErrShortData' will be returned if it is unable to retrieve the
+ // number of bytes indicated.
+ Split(data io.Reader, dst []io.Writer, size int64) (err error)
+
+ // Join the shards and write the data segment to dst.
+ //
+ // Only the data shards are considered.
+ //
+ // You must supply the exact output size you want.
+ // If there are to few shards given, ErrTooFewShards will be returned.
+ // If the total data size is less than outSize, ErrShortData will be returned.
+ Join(dst io.Writer, shards []io.Reader, outSize int64) error
+}
+
+// StreamReadError is returned when a read error is encountered
+// that relates to a supplied stream.
+// This will allow you to find out which reader has failed.
+type StreamReadError struct {
+ Err error // The error
+ Stream int // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamReadError) Error() string {
+ return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamReadError) String() string {
+ return s.Error()
+}
+
+// StreamWriteError is returned when a write error is encountered
+// that relates to a supplied stream. This will allow you to
+// find out which reader has failed.
+type StreamWriteError struct {
+ Err error // The error
+ Stream int // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamWriteError) Error() string {
+ return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamWriteError) String() string {
+ return s.Error()
+}
+
+// rsStream contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using NewStream()
+type rsStream struct {
+ r *reedSolomon
+ o options
+
+ // Shard reader
+ readShards func(dst [][]byte, in []io.Reader) error
+ // Shard writer
+ writeShards func(out []io.Writer, in [][]byte) error
+
+ blockPool sync.Pool
+}
+
+// NewStream creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of data shards is 256.
+func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
+ r := rsStream{o: defaultOptions}
+ for _, opt := range o {
+ opt(&r.o)
+ }
+ // Override block size if shard size is set.
+ if r.o.streamBS == 0 && r.o.shardSize > 0 {
+ r.o.streamBS = r.o.shardSize
+ }
+ if r.o.streamBS <= 0 {
+ r.o.streamBS = 4 << 20
+ }
+ if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
+ o = append(o, WithAutoGoroutines(r.o.streamBS))
+ }
+
+ enc, err := New(dataShards, parityShards, o...)
+ if err != nil {
+ return nil, err
+ }
+ r.r = enc.(*reedSolomon)
+
+ r.blockPool.New = func() interface{} {
+ out := make([][]byte, dataShards+parityShards)
+ for i := range out {
+ out[i] = make([]byte, r.o.streamBS)
+ }
+ return out
+ }
+ r.readShards = readShards
+ r.writeShards = writeShards
+ if r.o.concReads {
+ r.readShards = cReadShards
+ }
+ if r.o.concWrites {
+ r.writeShards = cWriteShards
+ }
+
+ return &r, err
+}
+
+// NewStreamC creates a new encoder and initializes it to
+// the number of data shards and parity shards given.
+//
+// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
+func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
+ return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
+}
+
+func (r *rsStream) createSlice() [][]byte {
+ out := r.blockPool.Get().([][]byte)
+ for i := range out {
+ out[i] = out[i][:r.o.streamBS]
+ }
+ return out
+}
+
+// Encodes parity shards for a set of data shards.
+//
+// Input is 'shards' containing readers for data shards followed by parity shards
+// io.Writer.
+//
+// The number of shards must match the number given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+//
+// The parity shards will be written to the writer.
+// The number of bytes written will match the input size.
+//
+// If a data stream returns an error, a StreamReadError type error
+// will be returned. If a parity writer returns an error, a
+// StreamWriteError will be returned.
+func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
+ if len(data) != r.r.DataShards {
+ return ErrTooFewShards
+ }
+
+ if len(parity) != r.r.ParityShards {
+ return ErrTooFewShards
+ }
+
+ all := r.createSlice()
+ defer r.blockPool.Put(all)
+ in := all[:r.r.DataShards]
+ out := all[r.r.DataShards:]
+ read := 0
+
+ for {
+ err := r.readShards(in, data)
+ switch err {
+ case nil:
+ case io.EOF:
+ if read == 0 {
+ return ErrShardNoData
+ }
+ return nil
+ default:
+ return err
+ }
+ out = trimShards(out, shardSize(in))
+ read += shardSize(in)
+ err = r.r.Encode(all)
+ if err != nil {
+ return err
+ }
+ err = r.writeShards(parity, out)
+ if err != nil {
+ return err
+ }
+ }
+}
+
+// Trim the shards so they are all the same size
+func trimShards(in [][]byte, size int) [][]byte {
+ for i := range in {
+ if len(in[i]) != 0 {
+ in[i] = in[i][0:size]
+ }
+ if len(in[i]) < size {
+ in[i] = in[i][:0]
+ }
+ }
+ return in
+}
+
+func readShards(dst [][]byte, in []io.Reader) error {
+ if len(in) != len(dst) {
+ panic("internal error: in and dst size do not match")
+ }
+ size := -1
+ for i := range in {
+ if in[i] == nil {
+ dst[i] = dst[i][:0]
+ continue
+ }
+ n, err := io.ReadFull(in[i], dst[i])
+ // The error is EOF only if no bytes were read.
+ // If an EOF happens after reading some but not all the bytes,
+ // ReadFull returns ErrUnexpectedEOF.
+ switch err {
+ case io.ErrUnexpectedEOF, io.EOF:
+ if size < 0 {
+ size = n
+ } else if n != size {
+ // Shard sizes must match.
+ return ErrShardSize
+ }
+ dst[i] = dst[i][0:n]
+ case nil:
+ continue
+ default:
+ return StreamReadError{Err: err, Stream: i}
+ }
+ }
+ if size == 0 {
+ return io.EOF
+ }
+ return nil
+}
+
+func writeShards(out []io.Writer, in [][]byte) error {
+ if len(out) != len(in) {
+ panic("internal error: in and out size do not match")
+ }
+ for i := range in {
+ if out[i] == nil {
+ continue
+ }
+ n, err := out[i].Write(in[i])
+ if err != nil {
+ return StreamWriteError{Err: err, Stream: i}
+ }
+ //
+ if n != len(in[i]) {
+ return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+ }
+ }
+ return nil
+}
+
+type readResult struct {
+ n int
+ size int
+ err error
+}
+
+// cReadShards reads shards concurrently
+func cReadShards(dst [][]byte, in []io.Reader) error {
+ if len(in) != len(dst) {
+ panic("internal error: in and dst size do not match")
+ }
+ var wg sync.WaitGroup
+ wg.Add(len(in))
+ res := make(chan readResult, len(in))
+ for i := range in {
+ if in[i] == nil {
+ dst[i] = dst[i][:0]
+ wg.Done()
+ continue
+ }
+ go func(i int) {
+ defer wg.Done()
+ n, err := io.ReadFull(in[i], dst[i])
+ // The error is EOF only if no bytes were read.
+ // If an EOF happens after reading some but not all the bytes,
+ // ReadFull returns ErrUnexpectedEOF.
+ res <- readResult{size: n, err: err, n: i}
+
+ }(i)
+ }
+ wg.Wait()
+ close(res)
+ size := -1
+ for r := range res {
+ switch r.err {
+ case io.ErrUnexpectedEOF, io.EOF:
+ if size < 0 {
+ size = r.size
+ } else if r.size != size {
+ // Shard sizes must match.
+ return ErrShardSize
+ }
+ dst[r.n] = dst[r.n][0:r.size]
+ case nil:
+ default:
+ return StreamReadError{Err: r.err, Stream: r.n}
+ }
+ }
+ if size == 0 {
+ return io.EOF
+ }
+ return nil
+}
+
+// cWriteShards writes shards concurrently
+func cWriteShards(out []io.Writer, in [][]byte) error {
+ if len(out) != len(in) {
+ panic("internal error: in and out size do not match")
+ }
+ var errs = make(chan error, len(out))
+ var wg sync.WaitGroup
+ wg.Add(len(out))
+ for i := range in {
+ go func(i int) {
+ defer wg.Done()
+ if out[i] == nil {
+ errs <- nil
+ return
+ }
+ n, err := out[i].Write(in[i])
+ if err != nil {
+ errs <- StreamWriteError{Err: err, Stream: i}
+ return
+ }
+ if n != len(in[i]) {
+ errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+ }
+ }(i)
+ }
+ wg.Wait()
+ close(errs)
+ for err := range errs {
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// Verify returns true if the parity shards contain correct data.
+//
+// The number of shards must match the number total data+parity shards
+// given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+// If a shard stream returns an error, a StreamReadError type error
+// will be returned.
+func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
+ if len(shards) != r.r.Shards {
+ return false, ErrTooFewShards
+ }
+
+ read := 0
+ all := r.createSlice()
+ defer r.blockPool.Put(all)
+ for {
+ err := r.readShards(all, shards)
+ if err == io.EOF {
+ if read == 0 {
+ return false, ErrShardNoData
+ }
+ return true, nil
+ }
+ if err != nil {
+ return false, err
+ }
+ read += shardSize(all)
+ ok, err := r.r.Verify(all)
+ if !ok || err != nil {
+ return ok, err
+ }
+ }
+}
+
+// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
+// "valid" and "fill" streams on the same index.
+// Therefore it is impossible to see if you consider the shard valid
+// or would like to have it reconstructed.
+var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
+
+// Reconstruct will recreate the missing shards if possible.
+//
+// Given a list of valid shards (to read) and invalid shards (to write)
+//
+// You indicate that a shard is missing by setting it to nil in the 'valid'
+// slice and at the same time setting a non-nil writer in "fill".
+// An index cannot contain both non-nil 'valid' and 'fill' entry.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete when explicitly asked for all missing shards.
+// However its integrity is not automatically verified.
+// Use the Verify function to check in case the data set is complete.
+func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
+ if len(valid) != r.r.Shards {
+ return ErrTooFewShards
+ }
+ if len(fill) != r.r.Shards {
+ return ErrTooFewShards
+ }
+
+ all := r.createSlice()
+ defer r.blockPool.Put(all)
+ reconDataOnly := true
+ for i := range valid {
+ if valid[i] != nil && fill[i] != nil {
+ return ErrReconstructMismatch
+ }
+ if i >= r.r.DataShards && fill[i] != nil {
+ reconDataOnly = false
+ }
+ }
+
+ read := 0
+ for {
+ err := r.readShards(all, valid)
+ if err == io.EOF {
+ if read == 0 {
+ return ErrShardNoData
+ }
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+ read += shardSize(all)
+ all = trimShards(all, shardSize(all))
+
+ if reconDataOnly {
+ err = r.r.ReconstructData(all) // just reconstruct missing data shards
+ } else {
+ err = r.r.Reconstruct(all) // reconstruct all missing shards
+ }
+ if err != nil {
+ return err
+ }
+ err = r.writeShards(fill, all)
+ if err != nil {
+ return err
+ }
+ }
+}
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+//
+// You must supply the exact output size you want.
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
+ // Do we have enough shards?
+ if len(shards) < r.r.DataShards {
+ return ErrTooFewShards
+ }
+
+ // Trim off parity shards if any
+ shards = shards[:r.r.DataShards]
+ for i := range shards {
+ if shards[i] == nil {
+ return StreamReadError{Err: ErrShardNoData, Stream: i}
+ }
+ }
+ // Join all shards
+ src := io.MultiReader(shards...)
+
+ // Copy data to dst
+ n, err := io.CopyN(dst, src, outSize)
+ if err == io.EOF {
+ return ErrShortData
+ }
+ if err != nil {
+ return err
+ }
+ if n != outSize {
+ return ErrShortData
+ }
+ return nil
+}
+
+// Split a an input stream into the number of shards given to the encoder.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't dividable by the number of shards,
+// the last shard will contain extra zeros.
+//
+// You must supply the total size of your input.
+// 'ErrShortData' will be returned if it is unable to retrieve the
+// number of bytes indicated.
+func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
+ if size == 0 {
+ return ErrShortData
+ }
+ if len(dst) != r.r.DataShards {
+ return ErrInvShardNum
+ }
+
+ for i := range dst {
+ if dst[i] == nil {
+ return StreamWriteError{Err: ErrShardNoData, Stream: i}
+ }
+ }
+
+ // Calculate number of bytes per shard.
+ perShard := (size + int64(r.r.DataShards) - 1) / int64(r.r.DataShards)
+
+ // Pad data to r.Shards*perShard.
+ padding := make([]byte, (int64(r.r.Shards)*perShard)-size)
+ data = io.MultiReader(data, bytes.NewBuffer(padding))
+
+ // Split into equal-length shards and copy.
+ for i := range dst {
+ n, err := io.CopyN(dst[i], data, perShard)
+ if err != io.EOF && err != nil {
+ return err
+ }
+ if n != perShard {
+ return ErrShortData
+ }
+ }
+
+ return nil
+}