diff options
author | kali kaneko (leap communications) <kali@leap.se> | 2021-11-29 01:46:27 +0100 |
---|---|---|
committer | kali kaneko (leap communications) <kali@leap.se> | 2021-11-29 18:14:16 +0100 |
commit | 18f52af5be3a9a0c73811706108f790d65ee9c67 (patch) | |
tree | e13cbacb47d56919caa9c44a2b45dec1497a7860 /vendor/github.com/klauspost | |
parent | ebcef0d57b6ecb5a40c6579f6be07182dd3033ba (diff) |
[pkg] update vendor
Diffstat (limited to 'vendor/github.com/klauspost')
41 files changed, 27087 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/cpuid/.gitignore b/vendor/github.com/klauspost/cpuid/.gitignore new file mode 100644 index 0000000..daf913b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/.gitignore @@ -0,0 +1,24 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof diff --git a/vendor/github.com/klauspost/cpuid/.travis.yml b/vendor/github.com/klauspost/cpuid/.travis.yml new file mode 100644 index 0000000..77d975f --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/.travis.yml @@ -0,0 +1,46 @@ +language: go + +os: + - linux + - osx + - windows + +arch: + - amd64 + - arm64 + +go: + - 1.12.x + - 1.13.x + - 1.14.x + - master + +script: + - go vet ./... + - go test -race ./... + - go test -tags=noasm ./... + +stages: + - gofmt + - test + +matrix: + allow_failures: + - go: 'master' + fast_finish: true + include: + - stage: gofmt + go: 1.14.x + os: linux + arch: amd64 + script: + - diff <(gofmt -d .) <(printf "") + - diff <(gofmt -d ./private) <(printf "") + - go install github.com/klauspost/asmfmt/cmd/asmfmt + - diff <(asmfmt -d .) <(printf "") + - stage: i386 + go: 1.14.x + os: linux + arch: amd64 + script: + - GOOS=linux GOARCH=386 go test . diff --git a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt new file mode 100644 index 0000000..2ef4714 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt @@ -0,0 +1,35 @@ +Developer Certificate of Origin
+Version 1.1
+
+Copyright (C) 2015- Klaus Post & Contributors.
+Email: klauspost@gmail.com
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+
+Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
diff --git a/vendor/github.com/klauspost/cpuid/LICENSE b/vendor/github.com/klauspost/cpuid/LICENSE new file mode 100644 index 0000000..5cec7ee --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/klauspost/cpuid/README.md b/vendor/github.com/klauspost/cpuid/README.md new file mode 100644 index 0000000..38d4a8b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/README.md @@ -0,0 +1,191 @@ +# cpuid +Package cpuid provides information about the CPU running the current program. + +CPU features are detected on startup, and kept for fast access through the life of the application. +Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use. + +You can access the CPU information by accessing the shared CPU variable of the cpuid library. + +Package home: https://github.com/klauspost/cpuid + +[![GoDoc][1]][2] [![Build Status][3]][4] + +[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg +[2]: https://godoc.org/github.com/klauspost/cpuid +[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master +[4]: https://travis-ci.org/klauspost/cpuid + +# features + +## x86 CPU Instructions +* **CMOV** (i686 CMOV) +* **NX** (NX (No-Execute) bit) +* **AMD3DNOW** (AMD 3DNOW) +* **AMD3DNOWEXT** (AMD 3DNowExt) +* **MMX** (standard MMX) +* **MMXEXT** (SSE integer functions or AMD MMX ext) +* **SSE** (SSE functions) +* **SSE2** (P4 SSE functions) +* **SSE3** (Prescott SSE3 functions) +* **SSSE3** (Conroe SSSE3 functions) +* **SSE4** (Penryn SSE4.1 functions) +* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions) +* **SSE42** (Nehalem SSE4.2 functions) +* **AVX** (AVX functions) +* **AVX2** (AVX2 functions) +* **FMA3** (Intel FMA 3) +* **FMA4** (Bulldozer FMA4 functions) +* **XOP** (Bulldozer XOP functions) +* **F16C** (Half-precision floating-point conversion) +* **BMI1** (Bit Manipulation Instruction Set 1) +* **BMI2** (Bit Manipulation Instruction Set 2) +* **TBM** (AMD Trailing Bit Manipulation) +* **LZCNT** (LZCNT instruction) +* **POPCNT** (POPCNT instruction) +* **AESNI** (Advanced Encryption Standard New Instructions) +* **CLMUL** (Carry-less Multiplication) +* **HTT** (Hyperthreading (enabled)) +* **HLE** (Hardware Lock Elision) +* **RTM** (Restricted Transactional Memory) +* **RDRAND** (RDRAND instruction is available) +* **RDSEED** (RDSEED instruction is available) +* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions)) +* **SHA** (Intel SHA Extensions) +* **AVX512F** (AVX-512 Foundation) +* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions) +* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions) +* **AVX512PF** (AVX-512 Prefetch Instructions) +* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions) +* **AVX512CD** (AVX-512 Conflict Detection Instructions) +* **AVX512BW** (AVX-512 Byte and Word Instructions) +* **AVX512VL** (AVX-512 Vector Length Extensions) +* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions) +* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2) +* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions) +* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword) +* **GFNI** (Galois Field New Instructions) +* **VAES** (Vector AES) +* **AVX512BITALG** (AVX-512 Bit Algorithms) +* **VPCLMULQDQ** (Carry-Less Multiplication Quadword) +* **AVX512BF16** (AVX-512 BFLOAT16 Instructions) +* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q) +* **MPX** (Intel MPX (Memory Protection Extensions)) +* **ERMS** (Enhanced REP MOVSB/STOSB) +* **RDTSCP** (RDTSCP Instruction) +* **CX16** (CMPXCHG16B Instruction) +* **SGX** (Software Guard Extensions, with activation details) +* **VMX** (Virtual Machine Extensions) + +## Performance +* **RDTSCP()** Returns current cycle count. Can be used for benchmarking. +* **SSE2SLOW** (SSE2 is supported, but usually not faster) +* **SSE3SLOW** (SSE3 is supported, but usually not faster) +* **ATOM** (Atom processor, some SSSE3 instructions are slower) +* **Cache line** (Probable size of a cache line). +* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs. + +## ARM CPU features + +# ARM FEATURE DETECTION DISABLED! + +See [#52](https://github.com/klauspost/cpuid/issues/52). + +Currently only `arm64` platforms are implemented. + +* **FP** Single-precision and double-precision floating point +* **ASIMD** Advanced SIMD +* **EVTSTRM** Generic timer +* **AES** AES instructions +* **PMULL** Polynomial Multiply instructions (PMULL/PMULL2) +* **SHA1** SHA-1 instructions (SHA1C, etc) +* **SHA2** SHA-2 instructions (SHA256H, etc) +* **CRC32** CRC32/CRC32C instructions +* **ATOMICS** Large System Extensions (LSE) +* **FPHP** Half-precision floating point +* **ASIMDHP** Advanced SIMD half-precision floating point +* **ARMCPUID** Some CPU ID registers readable at user-level +* **ASIMDRDM** Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) +* **JSCVT** Javascript-style double->int convert (FJCVTZS) +* **FCMA** Floating point complex number addition and multiplication +* **LRCPC** Weaker release consistency (LDAPR, etc) +* **DCPOP** Data cache clean to Point of Persistence (DC CVAP) +* **SHA3** SHA-3 instructions (EOR3, RAXI, XAR, BCAX) +* **SM3** SM3 instructions +* **SM4** SM4 instructions +* **ASIMDDP** SIMD Dot Product +* **SHA512** SHA512 instructions +* **SVE** Scalable Vector Extension +* **GPA** Generic Pointer Authentication + +## Cpu Vendor/VM +* **Intel** +* **AMD** +* **VIA** +* **Transmeta** +* **NSC** +* **KVM** (Kernel-based Virtual Machine) +* **MSVM** (Microsoft Hyper-V or Windows Virtual PC) +* **VMware** +* **XenHVM** +* **Bhyve** +* **Hygon** + +# installing + +```go get github.com/klauspost/cpuid``` + +# example + +```Go +package main + +import ( + "fmt" + "github.com/klauspost/cpuid" +) + +func main() { + // Print basic CPU information: + fmt.Println("Name:", cpuid.CPU.BrandName) + fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores) + fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore) + fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores) + fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model) + fmt.Println("Features:", cpuid.CPU.Features) + fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine) + fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes") + fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes") + fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes") + fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes") + + // Test if we have a specific feature: + if cpuid.CPU.SSE() { + fmt.Println("We have Streaming SIMD Extensions") + } +} +``` + +Sample output: +``` +>go run main.go +Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz +PhysicalCores: 2 +ThreadsPerCore: 2 +LogicalCores: 4 +Family 6 Model: 42 +Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL +Cacheline bytes: 64 +We have Streaming SIMD Extensions +``` + +# private package + +In the "private" folder you can find an autogenerated version of the library you can include in your own packages. + +For this purpose all exports are removed, and functions and constants are lowercased. + +This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages. + +# license + +This code is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/cpuid/cpuid.go b/vendor/github.com/klauspost/cpuid/cpuid.go new file mode 100644 index 0000000..208b3e7 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid.go @@ -0,0 +1,1504 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// Package cpuid provides information about the CPU running the current program. +// +// CPU features are detected on startup, and kept for fast access through the life of the application. +// Currently x86 / x64 (AMD64) as well as arm64 is supported. +// +// You can access the CPU information by accessing the shared CPU variable of the cpuid library. +// +// Package home: https://github.com/klauspost/cpuid +package cpuid + +import ( + "math" + "strings" +) + +// AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf +// and Processor Programming Reference (PPR) + +// Vendor is a representation of a CPU vendor. +type Vendor int + +const ( + Other Vendor = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM + Bhyve + Hygon + SiS + RDC +) + +const ( + CMOV = 1 << iota // i686 CMOV + NX // NX (No-Execute) bit + AMD3DNOW // AMD 3DNOW + AMD3DNOWEXT // AMD 3DNowExt + MMX // standard MMX + MMXEXT // SSE integer functions or AMD MMX ext + SSE // SSE functions + SSE2 // P4 SSE functions + SSE3 // Prescott SSE3 functions + SSSE3 // Conroe SSSE3 functions + SSE4 // Penryn SSE4.1 functions + SSE4A // AMD Barcelona microarchitecture SSE4a instructions + SSE42 // Nehalem SSE4.2 functions + AVX // AVX functions + AVX2 // AVX2 functions + FMA3 // Intel FMA 3 + FMA4 // Bulldozer FMA4 functions + XOP // Bulldozer XOP functions + F16C // Half-precision floating-point conversion + BMI1 // Bit Manipulation Instruction Set 1 + BMI2 // Bit Manipulation Instruction Set 2 + TBM // AMD Trailing Bit Manipulation + LZCNT // LZCNT instruction + POPCNT // POPCNT instruction + AESNI // Advanced Encryption Standard New Instructions + CLMUL // Carry-less Multiplication + HTT // Hyperthreading (enabled) + HLE // Hardware Lock Elision + RTM // Restricted Transactional Memory + RDRAND // RDRAND instruction is available + RDSEED // RDSEED instruction is available + ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + SHA // Intel SHA Extensions + AVX512F // AVX-512 Foundation + AVX512DQ // AVX-512 Doubleword and Quadword Instructions + AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF // AVX-512 Prefetch Instructions + AVX512ER // AVX-512 Exponential and Reciprocal Instructions + AVX512CD // AVX-512 Conflict Detection Instructions + AVX512BW // AVX-512 Byte and Word Instructions + AVX512VL // AVX-512 Vector Length Extensions + AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VNNI // AVX-512 Vector Neural Network Instructions + AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword + GFNI // Galois Field New Instructions + VAES // Vector AES + AVX512BITALG // AVX-512 Bit Algorithms + VPCLMULQDQ // Carry-Less Multiplication Quadword + AVX512BF16 // AVX-512 BFLOAT16 Instructions + AVX512VP2INTERSECT // AVX-512 Intersect for D/Q + MPX // Intel MPX (Memory Protection Extensions) + ERMS // Enhanced REP MOVSB/STOSB + RDTSCP // RDTSCP Instruction + CX16 // CMPXCHG16B Instruction + SGX // Software Guard Extensions + SGXLC // Software Guard Extensions Launch Control + IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) + STIBP // Single Thread Indirect Branch Predictors + VMX // Virtual Machine Extensions + + // Performance indicators + SSE2SLOW // SSE2 is supported, but usually not faster + SSE3SLOW // SSE3 is supported, but usually not faster + ATOM // Atom processor, some SSSE3 instructions are slower +) + +var flagNames = map[Flags]string{ + CMOV: "CMOV", // i686 CMOV + NX: "NX", // NX (No-Execute) bit + AMD3DNOW: "AMD3DNOW", // AMD 3DNOW + AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt + MMX: "MMX", // Standard MMX + MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext + SSE: "SSE", // SSE functions + SSE2: "SSE2", // P4 SSE2 functions + SSE3: "SSE3", // Prescott SSE3 functions + SSSE3: "SSSE3", // Conroe SSSE3 functions + SSE4: "SSE4.1", // Penryn SSE4.1 functions + SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions + SSE42: "SSE4.2", // Nehalem SSE4.2 functions + AVX: "AVX", // AVX functions + AVX2: "AVX2", // AVX functions + FMA3: "FMA3", // Intel FMA 3 + FMA4: "FMA4", // Bulldozer FMA4 functions + XOP: "XOP", // Bulldozer XOP functions + F16C: "F16C", // Half-precision floating-point conversion + BMI1: "BMI1", // Bit Manipulation Instruction Set 1 + BMI2: "BMI2", // Bit Manipulation Instruction Set 2 + TBM: "TBM", // AMD Trailing Bit Manipulation + LZCNT: "LZCNT", // LZCNT instruction + POPCNT: "POPCNT", // POPCNT instruction + AESNI: "AESNI", // Advanced Encryption Standard New Instructions + CLMUL: "CLMUL", // Carry-less Multiplication + HTT: "HTT", // Hyperthreading (enabled) + HLE: "HLE", // Hardware Lock Elision + RTM: "RTM", // Restricted Transactional Memory + RDRAND: "RDRAND", // RDRAND instruction is available + RDSEED: "RDSEED", // RDSEED instruction is available + ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + SHA: "SHA", // Intel SHA Extensions + AVX512F: "AVX512F", // AVX-512 Foundation + AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions + AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions + AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions + AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions + AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions + AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions + AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions + AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions + AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2 + AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions + AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword + GFNI: "GFNI", // Galois Field New Instructions + VAES: "VAES", // Vector AES + AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms + VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword + AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction + AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q + MPX: "MPX", // Intel MPX (Memory Protection Extensions) + ERMS: "ERMS", // Enhanced REP MOVSB/STOSB + RDTSCP: "RDTSCP", // RDTSCP Instruction + CX16: "CX16", // CMPXCHG16B Instruction + SGX: "SGX", // Software Guard Extensions + SGXLC: "SGXLC", // Software Guard Extensions Launch Control + IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier + STIBP: "STIBP", // Single Thread Indirect Branch Predictors + VMX: "VMX", // Virtual Machine Extensions + + // Performance indicators + SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster + SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster + ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower + +} + +/* all special features for arm64 should be defined here */ +const ( + /* extension instructions */ + FP ArmFlags = 1 << iota + ASIMD + EVTSTRM + AES + PMULL + SHA1 + SHA2 + CRC32 + ATOMICS + FPHP + ASIMDHP + ARMCPUID + ASIMDRDM + JSCVT + FCMA + LRCPC + DCPOP + SHA3 + SM3 + SM4 + ASIMDDP + SHA512 + SVE + GPA +) + +var flagNamesArm = map[ArmFlags]string{ + FP: "FP", // Single-precision and double-precision floating point + ASIMD: "ASIMD", // Advanced SIMD + EVTSTRM: "EVTSTRM", // Generic timer + AES: "AES", // AES instructions + PMULL: "PMULL", // Polynomial Multiply instructions (PMULL/PMULL2) + SHA1: "SHA1", // SHA-1 instructions (SHA1C, etc) + SHA2: "SHA2", // SHA-2 instructions (SHA256H, etc) + CRC32: "CRC32", // CRC32/CRC32C instructions + ATOMICS: "ATOMICS", // Large System Extensions (LSE) + FPHP: "FPHP", // Half-precision floating point + ASIMDHP: "ASIMDHP", // Advanced SIMD half-precision floating point + ARMCPUID: "CPUID", // Some CPU ID registers readable at user-level + ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) + JSCVT: "JSCVT", // Javascript-style double->int convert (FJCVTZS) + FCMA: "FCMA", // Floatin point complex number addition and multiplication + LRCPC: "LRCPC", // Weaker release consistency (LDAPR, etc) + DCPOP: "DCPOP", // Data cache clean to Point of Persistence (DC CVAP) + SHA3: "SHA3", // SHA-3 instructions (EOR3, RAXI, XAR, BCAX) + SM3: "SM3", // SM3 instructions + SM4: "SM4", // SM4 instructions + ASIMDDP: "ASIMDDP", // SIMD Dot Product + SHA512: "SHA512", // SHA512 instructions + SVE: "SVE", // Scalable Vector Extension + GPA: "GPA", // Generic Pointer Authentication +} + +// CPUInfo contains information about the detected system CPU. +type CPUInfo struct { + BrandName string // Brand name reported by the CPU + VendorID Vendor // Comparable CPU vendor ID + VendorString string // Raw vendor string. + Features Flags // Features of the CPU (x64) + Arm ArmFlags // Features of the CPU (arm) + PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable. + ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable. + LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable. + Family int // CPU family number + Model int // CPU model number + CacheLine int // Cache line size in bytes. Will be 0 if undetectable. + Hz int64 // Clock speed, if known + Cache struct { + L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected + L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected + L2 int // L2 Cache (per core or shared). Will be -1 if undetected + L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected + } + SGX SGXSupport + maxFunc uint32 + maxExFunc uint32 +} + +var cpuid func(op uint32) (eax, ebx, ecx, edx uint32) +var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32) +var xgetbv func(index uint32) (eax, edx uint32) +var rdtscpAsm func() (eax, ebx, ecx, edx uint32) + +// CPU contains information about the CPU as detected on startup, +// or when Detect last was called. +// +// Use this as the primary entry point to you data. +var CPU CPUInfo + +func init() { + initCPU() + Detect() +} + +// Detect will re-detect current CPU info. +// This will replace the content of the exported CPU variable. +// +// Unless you expect the CPU to change while you are running your program +// you should not need to call this function. +// If you call this, you must ensure that no other goroutine is accessing the +// exported CPU variable. +func Detect() { + // Set defaults + CPU.ThreadsPerCore = 1 + CPU.Cache.L1I = -1 + CPU.Cache.L1D = -1 + CPU.Cache.L2 = -1 + CPU.Cache.L3 = -1 + addInfo(&CPU) +} + +// Generated here: http://play.golang.org/p/BxFH2Gdc0G + +// Cmov indicates support of CMOV instructions +func (c CPUInfo) Cmov() bool { + return c.Features&CMOV != 0 +} + +// Amd3dnow indicates support of AMD 3DNOW! instructions +func (c CPUInfo) Amd3dnow() bool { + return c.Features&AMD3DNOW != 0 +} + +// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions +func (c CPUInfo) Amd3dnowExt() bool { + return c.Features&AMD3DNOWEXT != 0 +} + +// VMX indicates support of VMX +func (c CPUInfo) VMX() bool { + return c.Features&VMX != 0 +} + +// MMX indicates support of MMX instructions +func (c CPUInfo) MMX() bool { + return c.Features&MMX != 0 +} + +// MMXExt indicates support of MMXEXT instructions +// (SSE integer functions or AMD MMX ext) +func (c CPUInfo) MMXExt() bool { + return c.Features&MMXEXT != 0 +} + +// SSE indicates support of SSE instructions +func (c CPUInfo) SSE() bool { + return c.Features&SSE != 0 +} + +// SSE2 indicates support of SSE 2 instructions +func (c CPUInfo) SSE2() bool { + return c.Features&SSE2 != 0 +} + +// SSE3 indicates support of SSE 3 instructions +func (c CPUInfo) SSE3() bool { + return c.Features&SSE3 != 0 +} + +// SSSE3 indicates support of SSSE 3 instructions +func (c CPUInfo) SSSE3() bool { + return c.Features&SSSE3 != 0 +} + +// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions +func (c CPUInfo) SSE4() bool { + return c.Features&SSE4 != 0 +} + +// SSE42 indicates support of SSE4.2 instructions +func (c CPUInfo) SSE42() bool { + return c.Features&SSE42 != 0 +} + +// AVX indicates support of AVX instructions +// and operating system support of AVX instructions +func (c CPUInfo) AVX() bool { + return c.Features&AVX != 0 +} + +// AVX2 indicates support of AVX2 instructions +func (c CPUInfo) AVX2() bool { + return c.Features&AVX2 != 0 +} + +// FMA3 indicates support of FMA3 instructions +func (c CPUInfo) FMA3() bool { + return c.Features&FMA3 != 0 +} + +// FMA4 indicates support of FMA4 instructions +func (c CPUInfo) FMA4() bool { + return c.Features&FMA4 != 0 +} + +// XOP indicates support of XOP instructions +func (c CPUInfo) XOP() bool { + return c.Features&XOP != 0 +} + +// F16C indicates support of F16C instructions +func (c CPUInfo) F16C() bool { + return c.Features&F16C != 0 +} + +// BMI1 indicates support of BMI1 instructions +func (c CPUInfo) BMI1() bool { + return c.Features&BMI1 != 0 +} + +// BMI2 indicates support of BMI2 instructions +func (c CPUInfo) BMI2() bool { + return c.Features&BMI2 != 0 +} + +// TBM indicates support of TBM instructions +// (AMD Trailing Bit Manipulation) +func (c CPUInfo) TBM() bool { + return c.Features&TBM != 0 +} + +// Lzcnt indicates support of LZCNT instruction +func (c CPUInfo) Lzcnt() bool { + return c.Features&LZCNT != 0 +} + +// Popcnt indicates support of POPCNT instruction +func (c CPUInfo) Popcnt() bool { + return c.Features&POPCNT != 0 +} + +// HTT indicates the processor has Hyperthreading enabled +func (c CPUInfo) HTT() bool { + return c.Features&HTT != 0 +} + +// SSE2Slow indicates that SSE2 may be slow on this processor +func (c CPUInfo) SSE2Slow() bool { + return c.Features&SSE2SLOW != 0 +} + +// SSE3Slow indicates that SSE3 may be slow on this processor +func (c CPUInfo) SSE3Slow() bool { + return c.Features&SSE3SLOW != 0 +} + +// AesNi indicates support of AES-NI instructions +// (Advanced Encryption Standard New Instructions) +func (c CPUInfo) AesNi() bool { + return c.Features&AESNI != 0 +} + +// Clmul indicates support of CLMUL instructions +// (Carry-less Multiplication) +func (c CPUInfo) Clmul() bool { + return c.Features&CLMUL != 0 +} + +// NX indicates support of NX (No-Execute) bit +func (c CPUInfo) NX() bool { + return c.Features&NX != 0 +} + +// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions +func (c CPUInfo) SSE4A() bool { + return c.Features&SSE4A != 0 +} + +// HLE indicates support of Hardware Lock Elision +func (c CPUInfo) HLE() bool { + return c.Features&HLE != 0 +} + +// RTM indicates support of Restricted Transactional Memory +func (c CPUInfo) RTM() bool { + return c.Features&RTM != 0 +} + +// Rdrand indicates support of RDRAND instruction is available +func (c CPUInfo) Rdrand() bool { + return c.Features&RDRAND != 0 +} + +// Rdseed indicates support of RDSEED instruction is available +func (c CPUInfo) Rdseed() bool { + return c.Features&RDSEED != 0 +} + +// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions) +func (c CPUInfo) ADX() bool { + return c.Features&ADX != 0 +} + +// SHA indicates support of Intel SHA Extensions +func (c CPUInfo) SHA() bool { + return c.Features&SHA != 0 +} + +// AVX512F indicates support of AVX-512 Foundation +func (c CPUInfo) AVX512F() bool { + return c.Features&AVX512F != 0 +} + +// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions +func (c CPUInfo) AVX512DQ() bool { + return c.Features&AVX512DQ != 0 +} + +// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions +func (c CPUInfo) AVX512IFMA() bool { + return c.Features&AVX512IFMA != 0 +} + +// AVX512PF indicates support of AVX-512 Prefetch Instructions +func (c CPUInfo) AVX512PF() bool { + return c.Features&AVX512PF != 0 +} + +// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions +func (c CPUInfo) AVX512ER() bool { + return c.Features&AVX512ER != 0 +} + +// AVX512CD indicates support of AVX-512 Conflict Detection Instructions +func (c CPUInfo) AVX512CD() bool { + return c.Features&AVX512CD != 0 +} + +// AVX512BW indicates support of AVX-512 Byte and Word Instructions +func (c CPUInfo) AVX512BW() bool { + return c.Features&AVX512BW != 0 +} + +// AVX512VL indicates support of AVX-512 Vector Length Extensions +func (c CPUInfo) AVX512VL() bool { + return c.Features&AVX512VL != 0 +} + +// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions +func (c CPUInfo) AVX512VBMI() bool { + return c.Features&AVX512VBMI != 0 +} + +// AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2 +func (c CPUInfo) AVX512VBMI2() bool { + return c.Features&AVX512VBMI2 != 0 +} + +// AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions +func (c CPUInfo) AVX512VNNI() bool { + return c.Features&AVX512VNNI != 0 +} + +// AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword +func (c CPUInfo) AVX512VPOPCNTDQ() bool { + return c.Features&AVX512VPOPCNTDQ != 0 +} + +// GFNI indicates support of Galois Field New Instructions +func (c CPUInfo) GFNI() bool { + return c.Features&GFNI != 0 +} + +// VAES indicates support of Vector AES +func (c CPUInfo) VAES() bool { + return c.Features&VAES != 0 +} + +// AVX512BITALG indicates support of AVX-512 Bit Algorithms +func (c CPUInfo) AVX512BITALG() bool { + return c.Features&AVX512BITALG != 0 +} + +// VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword +func (c CPUInfo) VPCLMULQDQ() bool { + return c.Features&VPCLMULQDQ != 0 +} + +// AVX512BF16 indicates support of +func (c CPUInfo) AVX512BF16() bool { + return c.Features&AVX512BF16 != 0 +} + +// AVX512VP2INTERSECT indicates support of +func (c CPUInfo) AVX512VP2INTERSECT() bool { + return c.Features&AVX512VP2INTERSECT != 0 +} + +// MPX indicates support of Intel MPX (Memory Protection Extensions) +func (c CPUInfo) MPX() bool { + return c.Features&MPX != 0 +} + +// ERMS indicates support of Enhanced REP MOVSB/STOSB +func (c CPUInfo) ERMS() bool { + return c.Features&ERMS != 0 +} + +// RDTSCP Instruction is available. +func (c CPUInfo) RDTSCP() bool { + return c.Features&RDTSCP != 0 +} + +// CX16 indicates if CMPXCHG16B instruction is available. +func (c CPUInfo) CX16() bool { + return c.Features&CX16 != 0 +} + +// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection. +// So TSX simply checks that. +func (c CPUInfo) TSX() bool { + return c.Features&(HLE|RTM) == HLE|RTM +} + +// Atom indicates an Atom processor +func (c CPUInfo) Atom() bool { + return c.Features&ATOM != 0 +} + +// Intel returns true if vendor is recognized as Intel +func (c CPUInfo) Intel() bool { + return c.VendorID == Intel +} + +// AMD returns true if vendor is recognized as AMD +func (c CPUInfo) AMD() bool { + return c.VendorID == AMD +} + +// Hygon returns true if vendor is recognized as Hygon +func (c CPUInfo) Hygon() bool { + return c.VendorID == Hygon +} + +// Transmeta returns true if vendor is recognized as Transmeta +func (c CPUInfo) Transmeta() bool { + return c.VendorID == Transmeta +} + +// NSC returns true if vendor is recognized as National Semiconductor +func (c CPUInfo) NSC() bool { + return c.VendorID == NSC +} + +// VIA returns true if vendor is recognized as VIA +func (c CPUInfo) VIA() bool { + return c.VendorID == VIA +} + +// RTCounter returns the 64-bit time-stamp counter +// Uses the RDTSCP instruction. The value 0 is returned +// if the CPU does not support the instruction. +func (c CPUInfo) RTCounter() uint64 { + if !c.RDTSCP() { + return 0 + } + a, _, _, d := rdtscpAsm() + return uint64(a) | (uint64(d) << 32) +} + +// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP. +// This variable is OS dependent, but on Linux contains information +// about the current cpu/core the code is running on. +// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned. +func (c CPUInfo) Ia32TscAux() uint32 { + if !c.RDTSCP() { + return 0 + } + _, _, ecx, _ := rdtscpAsm() + return ecx +} + +// LogicalCPU will return the Logical CPU the code is currently executing on. +// This is likely to change when the OS re-schedules the running thread +// to another CPU. +// If the current core cannot be detected, -1 will be returned. +func (c CPUInfo) LogicalCPU() int { + if c.maxFunc < 1 { + return -1 + } + _, ebx, _, _ := cpuid(1) + return int(ebx >> 24) +} + +// hertz tries to compute the clock speed of the CPU. If leaf 15 is +// supported, use it, otherwise parse the brand string. Yes, really. +func hertz(model string) int64 { + mfi := maxFunctionID() + if mfi >= 0x15 { + eax, ebx, ecx, _ := cpuid(0x15) + if eax != 0 && ebx != 0 && ecx != 0 { + return int64((int64(ecx) * int64(ebx)) / int64(eax)) + } + } + // computeHz determines the official rated speed of a CPU from its brand + // string. This insanity is *actually the official documented way to do + // this according to Intel*, prior to leaf 0x15 existing. The official + // documentation only shows this working for exactly `x.xx` or `xxxx` + // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other + // sizes. + hz := strings.LastIndex(model, "Hz") + if hz < 3 { + return -1 + } + var multiplier int64 + switch model[hz-1] { + case 'M': + multiplier = 1000 * 1000 + case 'G': + multiplier = 1000 * 1000 * 1000 + case 'T': + multiplier = 1000 * 1000 * 1000 * 1000 + } + if multiplier == 0 { + return -1 + } + freq := int64(0) + divisor := int64(0) + decimalShift := int64(1) + var i int + for i = hz - 2; i >= 0 && model[i] != ' '; i-- { + if model[i] >= '0' && model[i] <= '9' { + freq += int64(model[i]-'0') * decimalShift + decimalShift *= 10 + } else if model[i] == '.' { + if divisor != 0 { + return -1 + } + divisor = decimalShift + } else { + return -1 + } + } + // we didn't find a space + if i < 0 { + return -1 + } + if divisor != 0 { + return (freq * multiplier) / divisor + } + return freq * multiplier +} + +// VM Will return true if the cpu id indicates we are in +// a virtual machine. This is only a hint, and will very likely +// have many false negatives. +func (c CPUInfo) VM() bool { + switch c.VendorID { + case MSVM, KVM, VMware, XenHVM, Bhyve: + return true + } + return false +} + +// Flags contains detected cpu features and characteristics +type Flags uint64 + +// ArmFlags contains detected ARM cpu features and characteristics +type ArmFlags uint64 + +// String returns a string representation of the detected +// CPU features. +func (f Flags) String() string { + return strings.Join(f.Strings(), ",") +} + +// Strings returns an array of the detected features. +func (f Flags) Strings() []string { + r := make([]string, 0, 20) + for i := uint(0); i < 64; i++ { + key := Flags(1 << i) + val := flagNames[key] + if f&key != 0 { + r = append(r, val) + } + } + return r +} + +// String returns a string representation of the detected +// CPU features. +func (f ArmFlags) String() string { + return strings.Join(f.Strings(), ",") +} + +// Strings returns an array of the detected features. +func (f ArmFlags) Strings() []string { + r := make([]string, 0, 20) + for i := uint(0); i < 64; i++ { + key := ArmFlags(1 << i) + val := flagNamesArm[key] + if f&key != 0 { + r = append(r, val) + } + } + return r +} +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000) + return eax +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0) + return a +} + +func brandName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002 + i) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +func threadsPerCore() int { + mfi := maxFunctionID() + vend, _ := vendorID() + + if mfi < 0x4 || (vend != Intel && vend != AMD) { + return 1 + } + + if mfi < 0xb { + if vend != Intel { + return 1 + } + _, b, _, d := cpuid(1) + if (d & (1 << 28)) != 0 { + // v will contain logical core count + v := (b >> 16) & 255 + if v > 1 { + a4, _, _, _ := cpuid(4) + // physical cores + v2 := (a4 >> 26) + 1 + if v2 > 0 { + return int(v) / int(v2) + } + } + } + return 1 + } + _, b, _, _ := cpuidex(0xb, 0) + if b&0xffff == 0 { + return 1 + } + return int(b & 0xffff) +} + +func logicalCores() int { + mfi := maxFunctionID() + v, _ := vendorID() + switch v { + case Intel: + // Use this on old Intel processors + if mfi < 0xb { + if mfi < 1 { + return 0 + } + // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID) + // that can be assigned to logical processors in a physical package. + // The value may not be the same as the number of logical processors that are present in the hardware of a physical package. + _, ebx, _, _ := cpuid(1) + logical := (ebx >> 16) & 0xff + return int(logical) + } + _, b, _, _ := cpuidex(0xb, 1) + return int(b & 0xffff) + case AMD, Hygon: + _, b, _, _ := cpuid(1) + return int((b >> 16) & 0xff) + default: + return 0 + } +} + +func familyModel() (int, int) { + if maxFunctionID() < 0x1 { + return 0, 0 + } + eax, _, _, _ := cpuid(1) + family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff) + model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0) + return int(family), int(model) +} + +func physicalCores() int { + v, _ := vendorID() + switch v { + case Intel: + return logicalCores() / threadsPerCore() + case AMD, Hygon: + lc := logicalCores() + tpc := threadsPerCore() + if lc > 0 && tpc > 0 { + return lc / tpc + } + // The following is inaccurate on AMD EPYC 7742 64-Core Processor + + if maxExtendedFunction() >= 0x80000008 { + _, _, c, _ := cpuid(0x80000008) + return int(c&0xff) + 1 + } + } + return 0 +} + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]Vendor{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, + "bhyve bhyve ": Bhyve, + "HygonGenuine": Hygon, + "Vortex86 SoC": SiS, + "SiS SiS SiS ": SiS, + "RiseRiseRise": SiS, + "Genuine RDC": RDC, +} + +func vendorID() (Vendor, string) { + _, b, c, d := cpuid(0) + v := string(valAsString(b, d, c)) + vend, ok := vendorMapping[v] + if !ok { + return Other, v + } + return vend, v +} + +func cacheLine() int { + if maxFunctionID() < 0x1 { + return 0 + } + + _, ebx, _, _ := cpuid(1) + cache := (ebx & 0xff00) >> 5 // cflush size + if cache == 0 && maxExtendedFunction() >= 0x80000006 { + _, _, ecx, _ := cpuid(0x80000006) + cache = ecx & 0xff // cacheline size + } + // TODO: Read from Cache and TLB Information + return int(cache) +} + +func (c *CPUInfo) cacheSize() { + c.Cache.L1D = -1 + c.Cache.L1I = -1 + c.Cache.L2 = -1 + c.Cache.L3 = -1 + vendor, _ := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return + } + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuidex(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.Cache.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.Cache.L1I = size + } else { + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + case AMD, Hygon: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return + } + _, _, ecx, edx := cpuid(0x80000005) + c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return + } + _, _, ecx, _ = cpuid(0x80000006) + c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + + // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties + if maxExtendedFunction() < 0x8000001D { + return + } + for i := uint32(0); i < math.MaxUint32; i++ { + eax, ebx, ecx, _ := cpuidex(0x8000001D, i) + + level := (eax >> 5) & 7 + cacheNumSets := ecx + 1 + cacheLineSize := 1 + (ebx & 2047) + cachePhysPartitions := 1 + ((ebx >> 12) & 511) + cacheNumWays := 1 + ((ebx >> 22) & 511) + + typ := eax & 15 + size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays) + if typ == 0 { + return + } + + switch level { + case 1: + switch typ { + case 1: + // Data cache + c.Cache.L1D = size + case 2: + // Inst cache + c.Cache.L1I = size + default: + if c.Cache.L1D < 0 { + c.Cache.L1I = size + } + if c.Cache.L1I < 0 { + c.Cache.L1I = size + } + } + case 2: + c.Cache.L2 = size + case 3: + c.Cache.L3 = size + } + } + } + + return +} + +type SGXEPCSection struct { + BaseAddress uint64 + EPCSize uint64 +} + +type SGXSupport struct { + Available bool + LaunchControl bool + SGX1Supported bool + SGX2Supported bool + MaxEnclaveSizeNot64 int64 + MaxEnclaveSize64 int64 + EPCSections []SGXEPCSection +} + +func hasSGX(available, lc bool) (rval SGXSupport) { + rval.Available = available + + if !available { + return + } + + rval.LaunchControl = lc + + a, _, _, d := cpuidex(0x12, 0) + rval.SGX1Supported = a&0x01 != 0 + rval.SGX2Supported = a&0x02 != 0 + rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2 + rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2 + rval.EPCSections = make([]SGXEPCSection, 0) + + for subleaf := uint32(2); subleaf < 2+8; subleaf++ { + eax, ebx, ecx, edx := cpuidex(0x12, subleaf) + leafType := eax & 0xf + + if leafType == 0 { + // Invalid subleaf, stop iterating + break + } else if leafType == 1 { + // EPC Section subleaf + baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32) + size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32) + + section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size} + rval.EPCSections = append(rval.EPCSections, section) + } + } + + return +} + +func support() Flags { + mfi := maxFunctionID() + vend, _ := vendorID() + if mfi < 0x1 { + return 0 + } + rval := uint64(0) + _, _, c, d := cpuid(1) + if (d & (1 << 15)) != 0 { + rval |= CMOV + } + if (d & (1 << 23)) != 0 { + rval |= MMX + } + if (d & (1 << 25)) != 0 { + rval |= MMXEXT + } + if (d & (1 << 25)) != 0 { + rval |= SSE + } + if (d & (1 << 26)) != 0 { + rval |= SSE2 + } + if (c & 1) != 0 { + rval |= SSE3 + } + if (c & (1 << 5)) != 0 { + rval |= VMX + } + if (c & 0x00000200) != 0 { + rval |= SSSE3 + } + if (c & 0x00080000) != 0 { + rval |= SSE4 + } + if (c & 0x00100000) != 0 { + rval |= SSE42 + } + if (c & (1 << 25)) != 0 { + rval |= AESNI + } + if (c & (1 << 1)) != 0 { + rval |= CLMUL + } + if c&(1<<23) != 0 { + rval |= POPCNT + } + if c&(1<<30) != 0 { + rval |= RDRAND + } + if c&(1<<29) != 0 { + rval |= F16C + } + if c&(1<<13) != 0 { + rval |= CX16 + } + if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 { + if threadsPerCore() > 1 { + rval |= HTT + } + } + if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 { + if threadsPerCore() > 1 { + rval |= HTT + } + } + // Check XGETBV, OXSAVE and AVX bits + if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { + // Check for OS support + eax, _ := xgetbv(0) + if (eax & 0x6) == 0x6 { + rval |= AVX + if (c & 0x00001000) != 0 { + rval |= FMA3 + } + } + } + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 { + _, ebx, ecx, edx := cpuidex(7, 0) + eax1, _, _, _ := cpuidex(7, 1) + if (rval&AVX) != 0 && (ebx&0x00000020) != 0 { + rval |= AVX2 + } + if (ebx & 0x00000008) != 0 { + rval |= BMI1 + if (ebx & 0x00000100) != 0 { + rval |= BMI2 + } + } + if ebx&(1<<2) != 0 { + rval |= SGX + } + if ebx&(1<<4) != 0 { + rval |= HLE + } + if ebx&(1<<9) != 0 { + rval |= ERMS + } + if ebx&(1<<11) != 0 { + rval |= RTM + } + if ebx&(1<<14) != 0 { + rval |= MPX + } + if ebx&(1<<18) != 0 { + rval |= RDSEED + } + if ebx&(1<<19) != 0 { + rval |= ADX + } + if ebx&(1<<29) != 0 { + rval |= SHA + } + if edx&(1<<26) != 0 { + rval |= IBPB + } + if ecx&(1<<30) != 0 { + rval |= SGXLC + } + if edx&(1<<27) != 0 { + rval |= STIBP + } + + // Only detect AVX-512 features if XGETBV is supported + if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) { + // Check for OS support + eax, _ := xgetbv(0) + + // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and + // ZMM16-ZMM31 state are enabled by OS) + /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS). + if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 { + if ebx&(1<<16) != 0 { + rval |= AVX512F + } + if ebx&(1<<17) != 0 { + rval |= AVX512DQ + } + if ebx&(1<<21) != 0 { + rval |= AVX512IFMA + } + if ebx&(1<<26) != 0 { + rval |= AVX512PF + } + if ebx&(1<<27) != 0 { + rval |= AVX512ER + } + if ebx&(1<<28) != 0 { + rval |= AVX512CD + } + if ebx&(1<<30) != 0 { + rval |= AVX512BW + } + if ebx&(1<<31) != 0 { + rval |= AVX512VL + } + // ecx + if ecx&(1<<1) != 0 { + rval |= AVX512VBMI + } + if ecx&(1<<6) != 0 { + rval |= AVX512VBMI2 + } + if ecx&(1<<8) != 0 { + rval |= GFNI + } + if ecx&(1<<9) != 0 { + rval |= VAES + } + if ecx&(1<<10) != 0 { + rval |= VPCLMULQDQ + } + if ecx&(1<<11) != 0 { + rval |= AVX512VNNI + } + if ecx&(1<<12) != 0 { + rval |= AVX512BITALG + } + if ecx&(1<<14) != 0 { + rval |= AVX512VPOPCNTDQ + } + // edx + if edx&(1<<8) != 0 { + rval |= AVX512VP2INTERSECT + } + // cpuid eax 07h,ecx=1 + if eax1&(1<<5) != 0 { + rval |= AVX512BF16 + } + } + } + } + + if maxExtendedFunction() >= 0x80000001 { + _, _, c, d := cpuid(0x80000001) + if (c & (1 << 5)) != 0 { + rval |= LZCNT + rval |= POPCNT + } + if (d & (1 << 31)) != 0 { + rval |= AMD3DNOW + } + if (d & (1 << 30)) != 0 { + rval |= AMD3DNOWEXT + } + if (d & (1 << 23)) != 0 { + rval |= MMX + } + if (d & (1 << 22)) != 0 { + rval |= MMXEXT + } + if (c & (1 << 6)) != 0 { + rval |= SSE4A + } + if d&(1<<20) != 0 { + rval |= NX + } + if d&(1<<27) != 0 { + rval |= RDTSCP + } + + /* Allow for selectively disabling SSE2 functions on AMD processors + with SSE2 support but not SSE4a. This includes Athlon64, some + Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster + than SSE2 often enough to utilize this special-case flag. + AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case + so that SSE2 is used unless explicitly disabled by checking + AV_CPU_FLAG_SSE2SLOW. */ + if vend != Intel && + rval&SSE2 != 0 && (c&0x00000040) == 0 { + rval |= SSE2SLOW + } + + /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be + * used unless the OS has AVX support. */ + if (rval & AVX) != 0 { + if (c & 0x00000800) != 0 { + rval |= XOP + } + if (c & 0x00010000) != 0 { + rval |= FMA4 + } + } + + if vend == Intel { + family, model := familyModel() + if family == 6 && (model == 9 || model == 13 || model == 14) { + /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and + * 6/14 (core1 "yonah") theoretically support sse2, but it's + * usually slower than mmx. */ + if (rval & SSE2) != 0 { + rval |= SSE2SLOW + } + if (rval & SSE3) != 0 { + rval |= SSE3SLOW + } + } + /* The Atom processor has SSSE3 support, which is useful in many cases, + * but sometimes the SSSE3 version is slower than the SSE2 equivalent + * on the Atom, but is generally faster on other processors supporting + * SSSE3. This flag allows for selectively disabling certain SSSE3 + * functions on the Atom. */ + if family == 6 && model == 28 { + rval |= ATOM + } + } + } + return Flags(rval) +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} + +// Single-precision and double-precision floating point +func (c CPUInfo) ArmFP() bool { + return c.Arm&FP != 0 +} + +// Advanced SIMD +func (c CPUInfo) ArmASIMD() bool { + return c.Arm&ASIMD != 0 +} + +// Generic timer +func (c CPUInfo) ArmEVTSTRM() bool { + return c.Arm&EVTSTRM != 0 +} + +// AES instructions +func (c CPUInfo) ArmAES() bool { + return c.Arm&AES != 0 +} + +// Polynomial Multiply instructions (PMULL/PMULL2) +func (c CPUInfo) ArmPMULL() bool { + return c.Arm&PMULL != 0 +} + +// SHA-1 instructions (SHA1C, etc) +func (c CPUInfo) ArmSHA1() bool { + return c.Arm&SHA1 != 0 +} + +// SHA-2 instructions (SHA256H, etc) +func (c CPUInfo) ArmSHA2() bool { + return c.Arm&SHA2 != 0 +} + +// CRC32/CRC32C instructions +func (c CPUInfo) ArmCRC32() bool { + return c.Arm&CRC32 != 0 +} + +// Large System Extensions (LSE) +func (c CPUInfo) ArmATOMICS() bool { + return c.Arm&ATOMICS != 0 +} + +// Half-precision floating point +func (c CPUInfo) ArmFPHP() bool { + return c.Arm&FPHP != 0 +} + +// Advanced SIMD half-precision floating point +func (c CPUInfo) ArmASIMDHP() bool { + return c.Arm&ASIMDHP != 0 +} + +// Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) +func (c CPUInfo) ArmASIMDRDM() bool { + return c.Arm&ASIMDRDM != 0 +} + +// Javascript-style double->int convert (FJCVTZS) +func (c CPUInfo) ArmJSCVT() bool { + return c.Arm&JSCVT != 0 +} + +// Floatin point complex number addition and multiplication +func (c CPUInfo) ArmFCMA() bool { + return c.Arm&FCMA != 0 +} + +// Weaker release consistency (LDAPR, etc) +func (c CPUInfo) ArmLRCPC() bool { + return c.Arm&LRCPC != 0 +} + +// Data cache clean to Point of Persistence (DC CVAP) +func (c CPUInfo) ArmDCPOP() bool { + return c.Arm&DCPOP != 0 +} + +// SHA-3 instructions (EOR3, RAXI, XAR, BCAX) +func (c CPUInfo) ArmSHA3() bool { + return c.Arm&SHA3 != 0 +} + +// SM3 instructions +func (c CPUInfo) ArmSM3() bool { + return c.Arm&SM3 != 0 +} + +// SM4 instructions +func (c CPUInfo) ArmSM4() bool { + return c.Arm&SM4 != 0 +} + +// SIMD Dot Product +func (c CPUInfo) ArmASIMDDP() bool { + return c.Arm&ASIMDDP != 0 +} + +// SHA512 instructions +func (c CPUInfo) ArmSHA512() bool { + return c.Arm&SHA512 != 0 +} + +// Scalable Vector Extension +func (c CPUInfo) ArmSVE() bool { + return c.Arm&SVE != 0 +} + +// Generic Pointer Authentication +func (c CPUInfo) ArmGPA() bool { + return c.Arm&GPA != 0 +} diff --git a/vendor/github.com/klauspost/cpuid/cpuid_386.s b/vendor/github.com/klauspost/cpuid/cpuid_386.s new file mode 100644 index 0000000..089638f --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s @@ -0,0 +1,42 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build 386,!gccgo,!noasm,!appengine + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s new file mode 100644 index 0000000..3ba0559 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s @@ -0,0 +1,42 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build amd64,!gccgo,!noasm,!appengine + +// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·asmCpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func asmXgetbv(index uint32) (eax, edx uint32) +TEXT ·asmXgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET + +// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) +TEXT ·asmRdtscpAsm(SB), 7, $0 + BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP + MOVL AX, eax+0(FP) + MOVL BX, ebx+4(FP) + MOVL CX, ecx+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s new file mode 100644 index 0000000..8975ee8 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s @@ -0,0 +1,26 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build arm64,!gccgo + +// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt + +// func getMidr +TEXT ·getMidr(SB), 7, $0 + WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */ + MOVD R0, midr+0(FP) + RET + +// func getProcFeatures +TEXT ·getProcFeatures(SB), 7, $0 + WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */ + MOVD R0, procFeatures+0(FP) + RET + +// func getInstAttributes +TEXT ·getInstAttributes(SB), 7, $0 + WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */ + WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */ + MOVD R0, instAttrReg0+0(FP) + MOVD R1, instAttrReg1+8(FP) + RET + diff --git a/vendor/github.com/klauspost/cpuid/detect_arm64.go b/vendor/github.com/klauspost/cpuid/detect_arm64.go new file mode 100644 index 0000000..923a826 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_arm64.go @@ -0,0 +1,219 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build arm64,!gccgo,!noasm,!appengine + +package cpuid + +func getMidr() (midr uint64) +func getProcFeatures() (procFeatures uint64) +func getInstAttributes() (instAttrReg0, instAttrReg1 uint64) + +func initCPU() { + cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + xgetbv = func(uint32) (a, b uint32) { return 0, 0 } + rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } +} + +func addInfo(c *CPUInfo) { + // ARM64 disabled for now. + if true { + return + } + // midr := getMidr() + + // MIDR_EL1 - Main ID Register + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | Implementer | [31-24] | y | + // |--------------------------------------------------| + // | Variant | [23-20] | y | + // |--------------------------------------------------| + // | Architecture | [19-16] | y | + // |--------------------------------------------------| + // | PartNum | [15-4] | y | + // |--------------------------------------------------| + // | Revision | [3-0] | y | + // x--------------------------------------------------x + + // fmt.Printf(" implementer: 0x%02x\n", (midr>>24)&0xff) + // fmt.Printf(" variant: 0x%01x\n", (midr>>20)&0xf) + // fmt.Printf("architecture: 0x%01x\n", (midr>>16)&0xf) + // fmt.Printf(" part num: 0x%03x\n", (midr>>4)&0xfff) + // fmt.Printf(" revision: 0x%01x\n", (midr>>0)&0xf) + + procFeatures := getProcFeatures() + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | DIT | [51-48] | y | + // |--------------------------------------------------| + // | SVE | [35-32] | y | + // |--------------------------------------------------| + // | GIC | [27-24] | n | + // |--------------------------------------------------| + // | AdvSIMD | [23-20] | y | + // |--------------------------------------------------| + // | FP | [19-16] | y | + // |--------------------------------------------------| + // | EL3 | [15-12] | n | + // |--------------------------------------------------| + // | EL2 | [11-8] | n | + // |--------------------------------------------------| + // | EL1 | [7-4] | n | + // |--------------------------------------------------| + // | EL0 | [3-0] | n | + // x--------------------------------------------------x + + var f ArmFlags + // if procFeatures&(0xf<<48) != 0 { + // fmt.Println("DIT") + // } + if procFeatures&(0xf<<32) != 0 { + f |= SVE + } + if procFeatures&(0xf<<20) != 15<<20 { + f |= ASIMD + if procFeatures&(0xf<<20) == 1<<20 { + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1 + // 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic. + f |= FPHP + f |= ASIMDHP + } + } + if procFeatures&(0xf<<16) != 0 { + f |= FP + } + + instAttrReg0, instAttrReg1 := getInstAttributes() + + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | TS | [55-52] | y | + // |--------------------------------------------------| + // | FHM | [51-48] | y | + // |--------------------------------------------------| + // | DP | [47-44] | y | + // |--------------------------------------------------| + // | SM4 | [43-40] | y | + // |--------------------------------------------------| + // | SM3 | [39-36] | y | + // |--------------------------------------------------| + // | SHA3 | [35-32] | y | + // |--------------------------------------------------| + // | RDM | [31-28] | y | + // |--------------------------------------------------| + // | ATOMICS | [23-20] | y | + // |--------------------------------------------------| + // | CRC32 | [19-16] | y | + // |--------------------------------------------------| + // | SHA2 | [15-12] | y | + // |--------------------------------------------------| + // | SHA1 | [11-8] | y | + // |--------------------------------------------------| + // | AES | [7-4] | y | + // x--------------------------------------------------x + + // if instAttrReg0&(0xf<<52) != 0 { + // fmt.Println("TS") + // } + // if instAttrReg0&(0xf<<48) != 0 { + // fmt.Println("FHM") + // } + if instAttrReg0&(0xf<<44) != 0 { + f |= ASIMDDP + } + if instAttrReg0&(0xf<<40) != 0 { + f |= SM4 + } + if instAttrReg0&(0xf<<36) != 0 { + f |= SM3 + } + if instAttrReg0&(0xf<<32) != 0 { + f |= SHA3 + } + if instAttrReg0&(0xf<<28) != 0 { + f |= ASIMDRDM + } + if instAttrReg0&(0xf<<20) != 0 { + f |= ATOMICS + } + if instAttrReg0&(0xf<<16) != 0 { + f |= CRC32 + } + if instAttrReg0&(0xf<<12) != 0 { + f |= SHA2 + } + if instAttrReg0&(0xf<<12) == 2<<12 { + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented. + f |= SHA512 + } + if instAttrReg0&(0xf<<8) != 0 { + f |= SHA1 + } + if instAttrReg0&(0xf<<4) != 0 { + f |= AES + } + if instAttrReg0&(0xf<<4) == 2<<4 { + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1 + // 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities. + f |= PMULL + } + + // https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1 + // + // ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + // x--------------------------------------------------x + // | Name | bits | visible | + // |--------------------------------------------------| + // | GPI | [31-28] | y | + // |--------------------------------------------------| + // | GPA | [27-24] | y | + // |--------------------------------------------------| + // | LRCPC | [23-20] | y | + // |--------------------------------------------------| + // | FCMA | [19-16] | y | + // |--------------------------------------------------| + // | JSCVT | [15-12] | y | + // |--------------------------------------------------| + // | API | [11-8] | y | + // |--------------------------------------------------| + // | APA | [7-4] | y | + // |--------------------------------------------------| + // | DPB | [3-0] | y | + // x--------------------------------------------------x + + // if instAttrReg1&(0xf<<28) != 0 { + // fmt.Println("GPI") + // } + if instAttrReg1&(0xf<<28) != 24 { + f |= GPA + } + if instAttrReg1&(0xf<<20) != 0 { + f |= LRCPC + } + if instAttrReg1&(0xf<<16) != 0 { + f |= FCMA + } + if instAttrReg1&(0xf<<12) != 0 { + f |= JSCVT + } + // if instAttrReg1&(0xf<<8) != 0 { + // fmt.Println("API") + // } + // if instAttrReg1&(0xf<<4) != 0 { + // fmt.Println("APA") + // } + if instAttrReg1&(0xf<<0) != 0 { + f |= DCPOP + } + c.Arm = f +} diff --git a/vendor/github.com/klauspost/cpuid/detect_intel.go b/vendor/github.com/klauspost/cpuid/detect_intel.go new file mode 100644 index 0000000..363951b --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_intel.go @@ -0,0 +1,33 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine + +package cpuid + +func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32) +func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func asmXgetbv(index uint32) (eax, edx uint32) +func asmRdtscpAsm() (eax, ebx, ecx, edx uint32) + +func initCPU() { + cpuid = asmCpuid + cpuidex = asmCpuidex + xgetbv = asmXgetbv + rdtscpAsm = asmRdtscpAsm +} + +func addInfo(c *CPUInfo) { + c.maxFunc = maxFunctionID() + c.maxExFunc = maxExtendedFunction() + c.BrandName = brandName() + c.CacheLine = cacheLine() + c.Family, c.Model = familyModel() + c.Features = support() + c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0) + c.ThreadsPerCore = threadsPerCore() + c.LogicalCores = logicalCores() + c.PhysicalCores = physicalCores() + c.VendorID, c.VendorString = vendorID() + c.Hz = hertz(c.BrandName) + c.cacheSize() +} diff --git a/vendor/github.com/klauspost/cpuid/detect_ref.go b/vendor/github.com/klauspost/cpuid/detect_ref.go new file mode 100644 index 0000000..970ff3d --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/detect_ref.go @@ -0,0 +1,14 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +//+build !amd64,!386,!arm64 gccgo noasm appengine + +package cpuid + +func initCPU() { + cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 } + xgetbv = func(uint32) (a, b uint32) { return 0, 0 } + rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 } +} + +func addInfo(info *CPUInfo) {} diff --git a/vendor/github.com/klauspost/cpuid/go.mod b/vendor/github.com/klauspost/cpuid/go.mod new file mode 100644 index 0000000..55563f2 --- /dev/null +++ b/vendor/github.com/klauspost/cpuid/go.mod @@ -0,0 +1,3 @@ +module github.com/klauspost/cpuid + +go 1.12 diff --git a/vendor/github.com/klauspost/reedsolomon/.gitignore b/vendor/github.com/klauspost/reedsolomon/.gitignore new file mode 100644 index 0000000..59610b5 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/.gitignore @@ -0,0 +1,26 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test +*.prof + +.idea
\ No newline at end of file diff --git a/vendor/github.com/klauspost/reedsolomon/.travis.yml b/vendor/github.com/klauspost/reedsolomon/.travis.yml new file mode 100644 index 0000000..f77b85c --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/.travis.yml @@ -0,0 +1,77 @@ +language: go + +os: + - linux + - osx + - windows + +arch: + - amd64 + - arm64 + - ppc64le + - s390x + +go: + - 1.12.x + - 1.13.x + - 1.14.x + - master + +install: + - go get ./... + +script: + - go vet ./... + - go test -cpu=1,2 . + - go test -tags=noasm -cpu=1,2 . + - go build examples/simple-decoder.go + - go build examples/simple-encoder.go + - go build examples/stream-decoder.go + - go build examples/stream-encoder.go + +stages: + - gofmt + - test + - deploy + +jobs: + allow_failures: + - go: 'master' + - arch: s390x + fast_finish: true + include: + - stage: gofmt + go: 1.14.x + os: linux + arch: amd64 + script: + - diff <(gofmt -d .) <(printf "") + - diff <(gofmt -d ./examples) <(printf "") + - go install github.com/klauspost/asmfmt/cmd/asmfmt + - diff <(asmfmt -d .) <(printf "") + - stage: race + go: 1.14.x + os: linux + arch: amd64 + script: + - go test -cpu=1 -short -race . + - go test -cpu=2 -short -race . + - go test -tags=noasm -cpu=1 -short -race . + - go test -tags=noasm -cpu=4 -short -race . + - go test -no-avx512 -short -race . + - go test -no-avx512 -no-avx2 -short -race . + - go test -no-avx512 -no-avx2 -no-ssse3 -short -race . + - stage: amd64-noasm + go: 1.14.x + os: linux + arch: amd64 + script: + - go test -no-avx512 + - go test -no-avx512 -no-avx2 + - go test -no-avx512 -no-avx2 -no-ssse3 + - stage: i386 + go: 1.14.x + os: linux + arch: amd64 + script: + - GOOS=linux GOARCH=386 go test -short . diff --git a/vendor/github.com/klauspost/reedsolomon/LICENSE b/vendor/github.com/klauspost/reedsolomon/LICENSE new file mode 100644 index 0000000..a947e16 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/LICENSE @@ -0,0 +1,23 @@ +The MIT License (MIT) + +Copyright (c) 2015 Klaus Post +Copyright (c) 2015 Backblaze + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md new file mode 100644 index 0000000..f9824cb --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/README.md @@ -0,0 +1,395 @@ +# Reed-Solomon +[![GoDoc][1]][2] [![Build Status][3]][4] + +[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg +[2]: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc +[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master +[4]: https://travis-ci.org/klauspost/reedsolomon + +Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go. + +This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by +[Backblaze](http://backblaze.com), with some additional optimizations. + +For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/). + +Package home: https://github.com/klauspost/reedsolomon + +Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc + +# Installation +To get the package use the standard: +```bash +go get -u github.com/klauspost/reedsolomon +``` + +# Changes + +## May 2020 + +* ARM64 optimizations, up to 2.5x faster. +* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard. +* Much better performance when using a limited number of goroutines. +* AVX512 is now using multiple cores. +* Stream processing overhaul, big speedups in most cases. +* AVX512 optimizations + +## March 6, 2019 + +The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations. + +## February 8, 2019 + +AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2. +See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details. + +## December 18, 2018 + +Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform. + +## November 18, 2017 + +Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt +to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU. + +## October 1, 2017 + +* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option. +Thanks to [templexxx](https://github.com/templexxx) for the basis of this. + +* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines) +has been increased for better multi-core scaling. + +* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to +be used instead of allocating new memory. + +## August 26, 2017 + +* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update` +function contributed by [chenzhongtao](https://github.com/chenzhongtao). + +* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly, +which gives a huge performance boost on this platform. + +## July 20, 2017 + +`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface. +This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added: + +```Go +func (e *YourEnc) ReconstructData(shards [][]byte) error { + return ReconstructData(shards) +} +``` + +You can of course also do your own implementation. +The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder) +handles this without modifying the interface. +This is a good lesson on why returning interfaces is not a good design. + +# Usage + +This section assumes you know the basics of Reed-Solomon encoding. +A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/). + +This package performs the calculation of the parity sets. The usage is therefore relatively simple. + +First of all, you need to choose your distribution of data and parity shards. +A 'good' distribution is very subjective, and will depend a lot on your usage scenario. +A good starting point is above 5 and below 257 data shards (the maximum supported number), +and the number of parity shards to be 2 or above, and below the number of data shards. + +To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated): +```Go + enc, err := reedsolomon.New(10, 3) +``` +This encoder will work for all parity sets with this distribution of data and parity shards. +The error will only be set if you specify 0 or negative values in any of the parameters, +or if you specify more than 256 data shards. + +If you will primarily be using it with one shard size it is recommended to use +[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines) +as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed. +It is not required that all shards are this size. + +The you send and receive data is a simple slice of byte slices; `[][]byte`. +In the example above, the top slice must have a length of 13. + +```Go + data := make([][]byte, 13) +``` +You should then fill the 10 first slices with *equally sized* data, +and create parity shards that will be populated with parity data. In this case we create the data in memory, +but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files. + +```Go + // Create all shards, size them at 50000 each + for i := range input { + data[i] := make([]byte, 50000) + } + + + // Fill some data into the data shards + for i, in := range data[:10] { + for j:= range in { + in[j] = byte((i+j)&0xff) + } + } +``` + +To populate the parity shards, you simply call `Encode()` with your data. +```Go + err = enc.Encode(data) +``` +The only cases where you should get an error is, if the data shards aren't of equal size. +The last 3 shards now contain parity data. You can verify this by calling `Verify()`: + +```Go + ok, err = enc.Verify(data) +``` + +The final (and important) part is to be able to reconstruct missing shards. +For this to work, you need to know which parts of your data is missing. +The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, +you need to implement a hash check for each shard. + +If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set. + +To indicate missing data, you set the shard to nil before calling `Reconstruct()`: + +```Go + // Delete two data shards + data[3] = nil + data[7] = nil + + // Reconstruct the missing shards + err := enc.Reconstruct(data) +``` +The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail. + +If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`: + +```Go + // Delete two data shards + data[3] = nil + data[7] = nil + + // Reconstruct just the missing data shards + err := enc.ReconstructData(data) +``` + +So to sum up reconstruction: +* The number of data/parity shards must match the numbers used for encoding. +* The order of shards must be the same as used when encoding. +* You may only supply data you know is valid. +* Invalid shards should be set to nil. + +For complete examples of an encoder and decoder see the +[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples). + +# Splitting/Joining Data + +You might have a large slice of data. +To help you split this, there are some helper functions that can split and join a single byte slice. + +```Go + bigfile, _ := ioutil.Readfile("myfile.data") + + // Split the file + split, err := enc.Split(bigfile) +``` +This will split the file into the number of data shards set when creating the encoder and create empty parity shards. + +An important thing to note is that you have to *keep track of the exact input size*. +If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard. + +To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply: +```Go + // Join a data set and write it to io.Discard. + err = enc.Join(io.Discard, data, len(bigfile)) +``` + +# Streaming/Merging + +It might seem like a limitation that all data should be in memory, +but an important property is that *as long as the number of data/parity shards are the same, +you can merge/split data sets*, and they will remain valid as a separate set. + +```Go + // Split the data set of 50000 elements into two of 25000 + splitA := make([][]byte, 13) + splitB := make([][]byte, 13) + + // Merge into a 100000 element set + merged := make([][]byte, 13) + + for i := range data { + splitA[i] = data[i][:25000] + splitB[i] = data[i][25000:] + + // Concatenate it to itself + merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...) + merged[i] = append(merged[i], data[i]...) + } + + // Each part should still verify as ok. + ok, err := enc.Verify(splitA) + if ok && err == nil { + log.Println("splitA ok") + } + + ok, err = enc.Verify(splitB) + if ok && err == nil { + log.Println("splitB ok") + } + + ok, err = enc.Verify(merge) + if ok && err == nil { + log.Println("merge ok") + } +``` + +This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. +For the best throughput, don't use too small blocks. + +This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. +This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant. + +# Streaming API + +There has been added support for a streaming API, to help perform fully streaming operations, +which enables you to do the same operations, but on streams. +To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function +to create the encoding/decoding interfaces. + +You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams) +to ready an interface that reads/writes concurrently from the streams. + +You can specify the size of each operation using +[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize). +This will set the size of each read/write operation. + +Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. +Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API. +If an error occurs in relation to a stream, +a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) +or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) +will help you determine which stream was the offender. + +There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer. + +For complete examples of a streaming encoder and decoder see the +[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples). + +# Advanced Options + +You can modify internal options which affects how jobs are split between and processed by goroutines. + +To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`. +If no Options are supplied, default options are used. + +Example of how to supply options: + + ```Go + enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25)) + ``` + + +# Performance +Performance depends mainly on the number of parity shards. +In rough terms, doubling the number of parity shards will double the encoding time. + +Here are the throughput numbers with some different selections of data and parity shards. +For reference each shard is 1MB random data, and 2 CPU cores are used for encoding. + +| Data | Parity | Parity | MB/s | SSSE3 MB/s | SSSE3 Speed | Rel. Speed | +|------|--------|--------|--------|-------------|-------------|------------| +| 5 | 2 | 40% | 576,11 | 2599,2 | 451% | 100,00% | +| 10 | 2 | 20% | 587,73 | 3100,28 | 528% | 102,02% | +| 10 | 4 | 40% | 298,38 | 2470,97 | 828% | 51,79% | +| 50 | 20 | 40% | 59,81 | 713,28 | 1193% | 10,38% | + +If `runtime.GOMAXPROCS()` is set to a value higher than 1, +the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`. + +Example of performance scaling on AMD Ryzen 3950X - 16 physical cores, 32 logical cores, AVX 2. +The example uses 10 blocks with 1MB data each and 4 parity blocks. + +| Threads | Speed | +|---------|------------| +| 1 | 9979 MB/s | +| 2 | 18870 MB/s | +| 4 | 33697 MB/s | +| 8 | 51531 MB/s | +| 16 | 59204 MB/s | + + +Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result: +``` +benchmark all MB/s data MB/s speedup +BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x +BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x +BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x +BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x +BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x +BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x +BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x +``` + +# Performance on AVX512 + +The performance on AVX512 has been accelerated for Intel CPUs. +This gives speedups on a per-core basis typically up to 2x compared to +AVX2 as can be seen in the following table: + +``` +[...] +``` + +This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other. +In doing so it is possible to minimize the memory bandwidth required for loading all data shards. +At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM +registers (32 in total) is used to keep more data around (most notably the matrix coefficients). + +# Performance on ARM64 NEON + +By exploiting NEON instructions the performance for ARM has been accelerated. +Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2): + +``` +BenchmarkGalois128K-64 119562 10028 ns/op 13070.78 MB/s +BenchmarkGalois1M-64 14380 83424 ns/op 12569.22 MB/s +BenchmarkGaloisXor128K-64 96508 12432 ns/op 10543.29 MB/s +BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s +``` + +# Performance on ppc64le + +The performance for ppc64le has been accelerated. +This gives roughly a 10x performance improvement on this architecture as can been seen below: + +``` +benchmark old MB/s new MB/s speedup +BenchmarkGalois128K-160 948.87 8878.85 9.36x +BenchmarkGalois1M-160 968.85 9041.92 9.33x +BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x +BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x +``` + +# asm2plan9s + +[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents. + +# Links +* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/). +* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze. +* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation. +* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package. +* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance. +* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation. +* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests. +* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations. + +# License + +This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information. diff --git a/vendor/github.com/klauspost/reedsolomon/appveyor.yml b/vendor/github.com/klauspost/reedsolomon/appveyor.yml new file mode 100644 index 0000000..9bb067f --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/appveyor.yml @@ -0,0 +1,20 @@ +os: Visual Studio 2015 + +platform: x64 + +clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon + +# environment variables +environment: + GOPATH: c:\gopath + +install: + - echo %PATH% + - echo %GOPATH% + - go version + - go env + - go get -d ./... + +build_script: + - go test -v -cpu=2 ./... + - go test -cpu=1,2,4 -short -race ./... diff --git a/vendor/github.com/klauspost/reedsolomon/galois.go b/vendor/github.com/klauspost/reedsolomon/galois.go new file mode 100644 index 0000000..76049f9 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois.go @@ -0,0 +1,929 @@ +/** + * 8-bit Galois Field + * Copyright 2015, Klaus Post + * Copyright 2015, Backblaze, Inc. All rights reserved. + */ + +package reedsolomon + +const ( + // The number of elements in the field. + fieldSize = 256 + + // The polynomial used to generate the logarithm table. + // + // There are a number of polynomials that work to generate + // a Galois field of 256 elements. The choice is arbitrary, + // and we just use the first one. + // + // The possibilities are: 29, 43, 45, 77, 95, 99, 101, 105, + //* 113, 135, 141, 169, 195, 207, 231, and 245. + generatingPolynomial = 29 +) + +var logTable = [fieldSize]byte{ + 0, 0, 1, 25, 2, 50, 26, 198, + 3, 223, 51, 238, 27, 104, 199, 75, + 4, 100, 224, 14, 52, 141, 239, 129, + 28, 193, 105, 248, 200, 8, 76, 113, + 5, 138, 101, 47, 225, 36, 15, 33, + 53, 147, 142, 218, 240, 18, 130, 69, + 29, 181, 194, 125, 106, 39, 249, 185, + 201, 154, 9, 120, 77, 228, 114, 166, + 6, 191, 139, 98, 102, 221, 48, 253, + 226, 152, 37, 179, 16, 145, 34, 136, + 54, 208, 148, 206, 143, 150, 219, 189, + 241, 210, 19, 92, 131, 56, 70, 64, + 30, 66, 182, 163, 195, 72, 126, 110, + 107, 58, 40, 84, 250, 133, 186, 61, + 202, 94, 155, 159, 10, 21, 121, 43, + 78, 212, 229, 172, 115, 243, 167, 87, + 7, 112, 192, 247, 140, 128, 99, 13, + 103, 74, 222, 237, 49, 197, 254, 24, + 227, 165, 153, 119, 38, 184, 180, 124, + 17, 68, 146, 217, 35, 32, 137, 46, + 55, 63, 209, 91, 149, 188, 207, 205, + 144, 135, 151, 178, 220, 252, 190, 97, + 242, 86, 211, 171, 20, 42, 93, 158, + 132, 60, 57, 83, 71, 109, 65, 162, + 31, 45, 67, 216, 183, 123, 164, 118, + 196, 23, 73, 236, 127, 12, 111, 246, + 108, 161, 59, 82, 41, 157, 85, 170, + 251, 96, 134, 177, 187, 204, 62, 90, + 203, 89, 95, 176, 156, 169, 160, 81, + 11, 245, 22, 235, 122, 117, 44, 215, + 79, 174, 213, 233, 230, 231, 173, 232, + 116, 214, 244, 234, 168, 80, 88, 175, +} + +/** + * Inverse of the logarithm table. Maps integer logarithms + * to members of the field. There is no entry for 255 + * because the highest log is 254. + * + * This table was generated by `go run gentables.go` + */ +var expTable = []byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e} + +func galAdd(a, b byte) byte { + return a ^ b +} + +func galSub(a, b byte) byte { + return a ^ b +} + +// Table from https://github.com/templexxx/reedsolomon +var invTable = [256]byte{0x0, 0x1, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, 0xab, 0xc, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x4, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, 0xcb, 0x63, 0x97, 0xe, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, 0xdb, 0x77, 0x6, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x2, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0xf, 0x5c, 0xb, 0xdc, 0xbd, 0x94, 0xac, 0x9, 0xc7, 0xa2, 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x5, 0xce, 0x3b, 0xd, 0x3c, 0x9c, 0x8, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x7, 0x7b, 0x95, 0x9a, 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, 0x99, 0xa, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, 0xe3, 0xe7, 0xb5, 0xea, 0x3, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd} + +var mulTable = [256][256]uint8{[256]uint8{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff}, + {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0xd, 0xf, 0x9, 0xb, 0x5, 0x7, 0x1, 0x3, 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3}, + {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0xd, 0xe, 0xb, 0x8, 0x1, 0x2, 0x7, 0x4, 0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c}, + {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64, 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c, 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19, 0x15, 0x11, 0xd, 0x9, 0x5, 0x1, 0x3d, 0x39, 0x35, 0x31, 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61, 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9, 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26, 0x1a, 0x1e, 0x12, 0x16, 0xa, 0xe, 0x2, 0x6, 0x7a, 0x7e, 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56, 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86, 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde, 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b, 0x37, 0x33, 0x3f, 0x3b, 0x7, 0x3, 0xf, 0xb, 0x17, 0x13, 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b, 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3, 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b, 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb}, + {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f, 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb, 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5, 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7, 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e, 0xd, 0x8, 0x7, 0x2, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20, 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2, 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6, 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf, 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d, 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9, 0x1a, 0x1f, 0x10, 0x15, 0xe, 0xb, 0x4, 0x1, 0x32, 0x37, 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45, 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc, 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2, 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90, 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74, 0x17, 0x12, 0x1d, 0x18, 0x3, 0x6, 0x9, 0xc, 0x3f, 0x3a, 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24}, + {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72, 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6, 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa, 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82, 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab, 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7, 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f, 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b, 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0xd, 0xb, 0x1, 0x7, 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0xf, 0x9, 0x3, 0x5, 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71, 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed, 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95, 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc, 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80, 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8, 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c, 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10, 0x2, 0x4, 0xe, 0x8, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, 0x3e, 0x38}, + {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4, 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8, 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a, 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x5, 0x2, 0xb, 0xc, 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60, 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98, 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde, 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52, 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30, 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0xf, 0x8, 0x1, 0x6, 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57, 0xa, 0xd, 0x4, 0x3, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35, 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93, 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff, 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7}, + {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 0x1d, 0x15, 0xd, 0x5, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55, 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85, 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0xa, 0x2, 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2, 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2, 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x7, 0xf, 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f, 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef, 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c, 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, 0x4, 0xc, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc, 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61, 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31, 0x9, 0x1, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91, 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0xe, 0x6, 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6, 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b, 0x13, 0x1b, 0x3, 0xb, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb, 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b, 0xb3, 0xbb, 0xa3, 0xab}, + {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26, 0x19, 0x10, 0xb, 0x2, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92, 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x4, 0xd, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0xf, 0x6, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb, 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, 0x8, 0x1, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe, 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18, 0x3, 0xa, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95, 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, 0xf0, 0xf9, 0x1e, 0x17, 0xc, 0x5, 0x3a, 0x33, 0x28, 0x21, 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba, 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0, 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x7, 0xe, 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54}, + {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe, 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b, 0xd, 0x7, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7, 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3, 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc, 0x1a, 0x10, 0xe, 0x4, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40, 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9, 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71, 0x17, 0x1d, 0x3, 0x9, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63, 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27, 0x11, 0x1b, 0x5, 0xf, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf, 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x8, 0x2, 0x64, 0x6e, 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a, 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5, 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79, 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d, 0xb, 0x1, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8, 0x2e, 0x24, 0x3a, 0x30, 0x6, 0xc, 0x12, 0x18, 0x7e, 0x74, 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48}, + {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76, 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38, 0x9, 0x2, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4, 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9, 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57, 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x4, 0xf, 0x3e, 0x35, 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6, 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c, 0x21, 0x2a, 0x1b, 0x10, 0xd, 0x6, 0x6f, 0x64, 0x79, 0x72, 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x1, 0xa, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89, 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x8, 0x3, 0x1e, 0x15, 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18, 0x5, 0xe, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56, 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca, 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d, 0x20, 0x2b, 0x1a, 0x11, 0xc, 0x7, 0xde, 0xd5, 0xc8, 0xc3, 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, 0xbc, 0xb7}, + {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc, 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4, 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9, 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31, 0x25, 0x29, 0xd, 0x1, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33, 0x17, 0x1b, 0xf, 0x3, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3, 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6, 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce, 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, 0x52, 0x5e, 0x1a, 0x16, 0x2, 0xe, 0x2a, 0x26, 0x32, 0x3e, 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22, 0x36, 0x3a, 0x1e, 0x12, 0x6, 0xa, 0x8e, 0x82, 0x96, 0x9a, 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7, 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f, 0xb, 0x7, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67, 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, 0x41, 0x4d, 0x9, 0x5, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d, 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5, 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0, 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x4, 0x8, 0x1c, 0x10, 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70}, + {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x5, 0x8, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0xf, 0x2, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0xa, 0x7, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9, 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, 0x88, 0x85, 0x1e, 0x13, 0x4, 0x9, 0x2a, 0x27, 0x30, 0x3d, 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e, 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x1, 0xc, 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8, 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc, 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e, 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0xb, 0x6, 0x25, 0x28, 0x3f, 0x32, 0x14, 0x19, 0xe, 0x3, 0x20, 0x2d, 0x3a, 0x37, 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9, 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb, 0x98, 0x95, 0x82, 0x8f}, + {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x5, 0xb, 0x19, 0x17, 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0xf, 0x1, 0x13, 0x1d, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0xa, 0x4, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79, 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x7, 0x9, 0xb3, 0xbd, 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1, 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4, 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10, 0x2, 0xc, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6, 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, 0xa0, 0xae, 0x14, 0x1a, 0x8, 0x6, 0x2c, 0x22, 0x30, 0x3e, 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27, 0x35, 0x3b, 0x11, 0x1f, 0xd, 0x3, 0x59, 0x57, 0x45, 0x4b, 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93}, + {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1, 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0, 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0xd, 0x2, 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64, 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2, 0x17, 0x18, 0x9, 0x6, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60, 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x4, 0xb, 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7, 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc, 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba, 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, 0x1, 0xe, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76, 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0xc, 0x3, 0x56, 0x59, 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf, 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x8, 0x7, 0x16, 0x19, 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb, 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad, 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c, 0x39, 0x36, 0x27, 0x28, 0x5, 0xa, 0x1b, 0x14, 0x41, 0x4e, 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c}, + {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0x1d, 0xd, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d, 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a, 0x1a, 0xa, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a, 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x7, 0x17, 0x67, 0x77, 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7, 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x4, 0xf4, 0xe4, 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59, 0x29, 0x39, 0x9, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0xe, 0x1e, 0x2e, 0x3e, 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43, 0x73, 0x63, 0x13, 0x3, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3, 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x8, 0x18, 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65, 0x55, 0x45, 0x35, 0x25, 0x15, 0x5, 0xd2, 0xc2, 0xf2, 0xe2, 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x2, 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf, 0x4f, 0x5f, 0x6f, 0x7f, 0xf, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c, 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0xc, 0x3c, 0x2c, 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, 0xe1, 0xf1, 0x1, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36, 0x6, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b, 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0xb, 0x7b, 0x6b, 0x5b, 0x4b}, + {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0xd, 0x1c, 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6, 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0xb, 0x38, 0x29, 0x5e, 0x4f, 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5, 0x17, 0x6, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e, 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x7, 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, 0xda, 0xcb, 0x39, 0x28, 0x1b, 0xa, 0x7d, 0x6c, 0x5f, 0x4e, 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f, 0xc, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95, 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x1, 0x10, 0x67, 0x76, 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc, 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0xe, 0x1f, 0xe0, 0xf1, 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56, 0x21, 0x30, 0x3, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x5, 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x8, 0xf7, 0xe6, 0xd5, 0xc4, 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x9, 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3, 0x51, 0x40, 0x73, 0x62, 0x15, 0x4, 0x37, 0x26, 0xd9, 0xc8, 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75, 0x2, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0xf, 0x1e, 0x2d, 0x3c, 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4}, + {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0xb, 0x75, 0x67, 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3, 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x4, 0xea, 0xf8, 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71, 0xf, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x8, 0x1a, 0xc9, 0xdb, 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f, 0x11, 0x3, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, 0xe2, 0xf0, 0x1e, 0xc, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60, 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31, 0x7, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3, 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, 0x9, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x2, 0x34, 0x26, 0x8f, 0x9d, 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0xd, 0x3b, 0x29, 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, 0xde, 0xcc, 0x22, 0x30, 0x6, 0x14, 0x6a, 0x78, 0x4e, 0x5c, 0x1, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83, 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0xa, 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x5, 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54, 0x62, 0x70, 0xe, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0, 0x9e, 0x8c, 0xba, 0xa8}, + {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1, 0x2d, 0x3e, 0xb, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6, 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f, 0x16, 0x5, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0xe, 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7, 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0xa, 0x19, 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, 0xf3, 0xe0, 0x1, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78, 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65, 0x50, 0x43, 0x3a, 0x29, 0x1c, 0xf, 0xc3, 0xd0, 0xe5, 0xf6, 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x4, 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0xc, 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b, 0x7e, 0x6d, 0x14, 0x7, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5, 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x9, 0x1a, 0x63, 0x70, 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce, 0x2, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89, 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4, 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x6, 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95, 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0xd, 0x9b, 0x88, 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x3, 0x10, 0x25, 0x36, 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, 0xdc, 0xcf, 0x2e, 0x3d, 0x8, 0x1b, 0x62, 0x71, 0x44, 0x57}, + {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61, 0xd, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6, 0x1a, 0xe, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3, 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b, 0x17, 0x3, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, 0x11, 0x5, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5, 0x34, 0x20, 0x1c, 0x8, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80, 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef, 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, 0xb, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2, 0x2e, 0x3a, 0x6, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6, 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e, 0x22, 0x36, 0xa, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, 0xf7, 0xe3, 0x2f, 0x3b, 0x7, 0x13, 0x7f, 0x6b, 0x57, 0x43, 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x4, 0xc8, 0xdc, 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x9, 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7, 0x1b, 0xf, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2, 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a, 0x16, 0x2, 0x3e, 0x2a, 0x1, 0x15, 0x29, 0x3d, 0x51, 0x45, 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd, 0x5c, 0x48, 0x74, 0x60, 0xc, 0x18, 0x24, 0x30, 0xfc, 0xe8, 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90}, + {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0xc, 0x33, 0x26, 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f, 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0xd, 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x1, 0x14, 0x29, 0x3c, 0x3, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94, 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b, 0x30, 0x25, 0x1a, 0xf, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8, 0x1b, 0xe, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb, 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69, 0x2, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x6, 0x13, 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91, 0x1f, 0xa, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2, 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7, 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, 0x1e, 0xb, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee, 0x2d, 0x38, 0x7, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e, 0x51, 0x44, 0x2f, 0x3a, 0x5, 0x10, 0xd3, 0xc6, 0xf9, 0xec, 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x9, 0x62, 0x77, 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5, 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c, 0x63, 0x76, 0x1d, 0x8, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93, 0xf8, 0xed, 0xd2, 0xc7, 0x4, 0x11, 0x2e, 0x3b, 0x50, 0x45, 0x7a, 0x6f}, + {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b, 0x51, 0x47, 0x25, 0x33, 0x9, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7, 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x4, 0x3e, 0x28, 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21, 0x1b, 0xd, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3, 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x1, 0x17, 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6, 0x24, 0x32, 0x8, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x5, 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99, 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, 0x1a, 0xc, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc, 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69, 0x53, 0x45, 0x27, 0x31, 0xb, 0x1d, 0xb2, 0xa4, 0x9e, 0x88, 0xea, 0xfc, 0xc6, 0xd0, 0x2, 0x14, 0x2e, 0x38, 0x5a, 0x4c, 0x76, 0x60, 0x35, 0x23, 0x19, 0xf, 0x6d, 0x7b, 0x41, 0x57, 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e, 0x64, 0x72, 0x10, 0x6, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2, 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0xa, 0x1c, 0x7e, 0x68, 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4, 0x5b, 0x4d, 0x77, 0x61, 0x3, 0x15, 0x2f, 0x39, 0xeb, 0xfd, 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6, 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, 0x18, 0xe, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3, 0x11, 0x7, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73}, + {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, 0x1f, 0x8, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0, 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75, 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x7, 0xb7, 0xa0, 0x99, 0x8e, 0xeb, 0xfc, 0xc5, 0xd2, 0xf, 0x18, 0x21, 0x36, 0x53, 0x44, 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc, 0x11, 0x6, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3, 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45, 0x20, 0x37, 0xe, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, 0x1, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae, 0x1e, 0x9, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1, 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76, 0x13, 0x4, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, 0x85, 0x92, 0x22, 0x35, 0xc, 0x1b, 0x7e, 0x69, 0x50, 0x47, 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82, 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x3, 0x14, 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0xb, 0x32, 0x25, 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49, 0x70, 0x67, 0x2, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2, 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0xa, 0x6f, 0x78, 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x5, 0x60, 0x77, 0x4e, 0x59, 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46, 0x7f, 0x68, 0xd, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0, 0xb5, 0xa2, 0x9b, 0x8c}, + {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88, 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45, 0x6d, 0x75, 0x3d, 0x25, 0xd, 0x15, 0x27, 0x3f, 0x17, 0xf, 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2, 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x2, 0x2a, 0x32, 0x4e, 0x56, 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x6, 0x8e, 0x96, 0xbe, 0xa6, 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, 0x83, 0x9b, 0x13, 0xb, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b, 0x69, 0x71, 0x59, 0x41, 0x9, 0x11, 0x39, 0x21, 0xa9, 0xb1, 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc, 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x4, 0x1c, 0x54, 0x4c, 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4, 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0xc, 0x14, 0x1, 0x19, 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9, 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x3, 0x2b, 0x33, 0x26, 0x3e, 0x16, 0xe, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe, 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa, 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0xa, 0x22, 0x3a, 0x72, 0x6a, 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x7, 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed, 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x5, 0x1d, 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x8, 0x10, 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0}, + {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6, 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, 0x13, 0xa, 0x7, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48, 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93, 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69, 0x26, 0x3f, 0x14, 0xd, 0xe, 0x17, 0x3c, 0x25, 0x6a, 0x73, 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89, 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52, 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x4, 0x9, 0x10, 0x3b, 0x22, 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb, 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x3, 0x1c, 0x5, 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff, 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0xf, 0x16, 0x1b, 0x2, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca, 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd, 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, 0x8, 0x11, 0x12, 0xb, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d, 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86, 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c, 0x33, 0x2a, 0x1, 0x18, 0x15, 0xc, 0x27, 0x3e, 0x71, 0x68, 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92, 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49, 0x62, 0x7b, 0x34, 0x2d, 0x6, 0x1f}, + {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb, 0x6d, 0x77, 0x59, 0x43, 0x5, 0x1f, 0x31, 0x2b, 0x67, 0x7d, 0x53, 0x49, 0xf, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99, 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, 0x86, 0x9c, 0xa, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c, 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x4, 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d, 0x1b, 0x1, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef, 0x79, 0x63, 0x4d, 0x57, 0x11, 0xb, 0x25, 0x3f, 0x14, 0xe, 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea, 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0xd, 0x17, 0x3c, 0x26, 0x8, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6, 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8, 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x2, 0x18, 0x5e, 0x44, 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x7, 0x1d, 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55, 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x9, 0x9f, 0x85, 0xab, 0xb1, 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, 0xae, 0xb4, 0x22, 0x38, 0x16, 0xc, 0x4a, 0x50, 0x7e, 0x64, 0x28, 0x32, 0x1c, 0x6, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2, 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb, 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, 0x19, 0x3}, + {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6, 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58, 0x19, 0x2, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, 0x1d, 0x6, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde, 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29, 0x4, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3, 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, 0xc, 0x17, 0x23, 0x38, 0x15, 0xe, 0x4f, 0x54, 0x79, 0x62, 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2, 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0xa, 0x27, 0x3c, 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x8, 0x13, 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd, 0x1, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2, 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81, 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x3, 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x7, 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0, 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x5, 0x1e, 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0xd, 0x16, 0x22, 0x39, 0x14, 0xf, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1, 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5, 0xa4, 0xbf, 0x92, 0x89, 0x10, 0xb, 0x26, 0x3d, 0x7c, 0x67, 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x9, 0x12, 0x3f, 0x24, 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc}, + {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, 0x95, 0x89, 0x3d, 0x21, 0x5, 0x19, 0x4d, 0x51, 0x75, 0x69, 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b, 0x7f, 0x63, 0x37, 0x2b, 0xf, 0x13, 0x7a, 0x66, 0x42, 0x5e, 0xa, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x7, 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92, 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a, 0x1e, 0x2, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, 0xbc, 0xa0, 0x14, 0x8, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40, 0x29, 0x35, 0x11, 0xd, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5, 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82, 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, 0xe, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0xb, 0x17, 0x33, 0x2f, 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x1, 0x1d, 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5, 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, 0x94, 0x88, 0x3c, 0x20, 0x4, 0x18, 0x4c, 0x50, 0x74, 0x68, 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x9, 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0xc, 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x6, 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93, 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b, 0x1f, 0x3, 0x27, 0x3b}, + {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb, 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38, 0x1f, 0x2, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0, 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x6, 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x4, 0x19, 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0xe, 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc, 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, 0x90, 0x8d, 0x36, 0x2b, 0xc, 0x11, 0x42, 0x5f, 0x78, 0x65, 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61, 0x46, 0x5b, 0x8, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e, 0x2d, 0x30, 0x17, 0xa, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x1, 0x52, 0x4f, 0x68, 0x75, 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6, 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x3, 0x1e, 0x39, 0x24, 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x7, 0x1a, 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x5, 0x22, 0x3f, 0x84, 0x99, 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0xf, 0x12, 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab, 0x10, 0xd, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf, 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d, 0x2e, 0x33, 0x14, 0x9, 0x7f, 0x62, 0x45, 0x58, 0xb, 0x16, 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4}, + {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf, 0x85, 0x9b, 0xb9, 0xa7, 0xd, 0x13, 0x31, 0x2f, 0x75, 0x6b, 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd, 0x17, 0x9, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x4, 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8, 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x1, 0x5b, 0x45, 0x67, 0x79, 0x2e, 0x30, 0x12, 0xc, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0, 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x8, 0x16, 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93, 0x39, 0x27, 0x5, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5, 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69, 0x33, 0x2d, 0xf, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, 0x2, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec, 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x6, 0xac, 0xb2, 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83, 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, 0x15, 0xb, 0x68, 0x76, 0x54, 0x4a, 0x10, 0xe, 0x2c, 0x32, 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b, 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47, 0x1d, 0x3, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x7, 0x19, 0x3b, 0x25, 0x72, 0x6c, 0x4e, 0x50, 0xa, 0x14, 0x36, 0x28, 0x82, 0x9c, 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8}, + {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0, 0x15, 0xa, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8, 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x1, 0x1e, 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0xb, 0x56, 0x49, 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f, 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74, 0x55, 0x4a, 0x17, 0x8, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f, 0x2, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x9, 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6, 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60, 0x3d, 0x22, 0x3, 0x1c, 0x3b, 0x24, 0x5, 0x1a, 0x47, 0x58, 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e, 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31, 0x10, 0xf, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd, 0x80, 0x9f, 0xbe, 0xa1, 0x4, 0x1b, 0x3a, 0x25, 0x78, 0x67, 0x46, 0x59, 0x11, 0xe, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c, 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7, 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71, 0x2c, 0x33, 0x12, 0xd, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, 0x7, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0, 0x6f, 0x70, 0x51, 0x4e, 0x13, 0xc, 0x2d, 0x32, 0x97, 0x88, 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3, 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x6, 0x19, 0x38, 0x27}, + {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a, 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x7, 0x67, 0x47, 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, 0xb4, 0x94, 0x69, 0x49, 0x29, 0x9, 0xe9, 0xc9, 0xa9, 0x89, 0x4e, 0x6e, 0xe, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73, 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88, 0x68, 0x48, 0x28, 0x8, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32, 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0xf, 0x2f, 0x9c, 0xbc, 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1, 0x1, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x6, 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b, 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0xd, 0x2d, 0xd0, 0xf0, 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97, 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, 0x2a, 0xa, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59, 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x4, 0x64, 0x44, 0x83, 0xa3, 0xc3, 0xe3, 0x3, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe, 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x5, 0x65, 0x45, 0xa5, 0x85, 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8, 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x2, 0x22, 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31, 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0xc, 0x2c, 0xcc, 0xec, 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0xb, 0xeb, 0xcb, 0xab, 0x8b, 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96}, + {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0xb, 0x68, 0x49, 0xae, 0x8f, 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8, 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60, 0x3, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d, 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x8, 0xef, 0xce, 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0xd, 0x6e, 0x4f, 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3, 0xc0, 0xe1, 0x6, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4, 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0xe, 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2, 0x81, 0xa0, 0x47, 0x66, 0x5, 0x24, 0x4d, 0x6c, 0xf, 0x2e, 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x4, 0xe3, 0xc2, 0xa1, 0x80, 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38, 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0xc, 0x2d, 0x4e, 0x6f, 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, 0xf5, 0xd4, 0x26, 0x7, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1, 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x2, 0xf0, 0xd1, 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac, 0x4b, 0x6a, 0x9, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56, 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x1, 0x62, 0x43, 0x9b, 0xba, 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed, 0xa, 0x2b, 0x48, 0x69}, + {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3, 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35, 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52, 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, 0xf5, 0xd7, 0x2e, 0xc, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0, 0x23, 0x1, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a, 0x2c, 0xe, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x3, 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91, 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73, 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x2, 0x20, 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0xf, 0x2d, 0xc3, 0xe1, 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e, 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8, 0x8e, 0xac, 0x42, 0x60, 0x6, 0x24, 0xc7, 0xe5, 0x83, 0xa1, 0x4f, 0x6d, 0xb, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, 0x28, 0xa, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x7, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1, 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde, 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x8, 0x6e, 0x4c, 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x5, 0x63, 0x41, 0x8c, 0xae, 0xc8, 0xea, 0x4, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7, 0x9, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75}, + {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0xa, 0x29, 0x4c, 0x6f, 0x86, 0xa5, 0xc0, 0xe3, 0xf, 0x2c, 0x49, 0x6a, 0x83, 0xa0, 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd, 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d, 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e, 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0xb, 0x6e, 0x4d, 0xa4, 0x87, 0xe2, 0xc1, 0x2d, 0xe, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4, 0x22, 0x1, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x4, 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59, 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf, 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73, 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30, 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6, 0x44, 0x67, 0x2, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62, 0x7, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x8, 0x2b, 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0xd, 0x2e, 0xc7, 0xe4, 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91, 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51, 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12, 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x9, 0xe0, 0xc3, 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0xc, 0xe5, 0xc6, 0xa3, 0x80, 0x66, 0x45, 0x20, 0x3, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40, 0x25, 0x6, 0xef, 0xcc, 0xa9, 0x8a}, + {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86, 0x47, 0x63, 0xf, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0, 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x8, 0xc9, 0xed, 0x81, 0xa5, 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x7, 0x6b, 0x4f, 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x9, 0xc8, 0xec, 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3, 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x6, 0x6a, 0x4e, 0x1, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd, 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f, 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0xe, 0x2a, 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, 0x2f, 0xb, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36, 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94, 0xf8, 0xdc, 0x20, 0x4, 0x68, 0x4c, 0x3, 0x27, 0x4b, 0x6f, 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85, 0x44, 0x60, 0xc, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x2, 0x26, 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53, 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, 0xa0, 0x84, 0x45, 0x61, 0xd, 0x29, 0xd5, 0xf1, 0x9d, 0xb9, 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0xa, 0xcb, 0xef, 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0, 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x5, 0x69, 0x4d}, + {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f, 0x20, 0x5, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30, 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, 0xa, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a, 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0xf, 0x60, 0x45, 0x8b, 0xae, 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda, 0x21, 0x4, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x1, 0x24, 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44, 0x2b, 0xe, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b, 0xc0, 0xe5, 0x8a, 0xaf, 0xb, 0x2e, 0x41, 0x64, 0x9f, 0xba, 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5, 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67, 0x8, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72, 0x89, 0xac, 0xc3, 0xe6, 0x28, 0xd, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58, 0x96, 0xb3, 0xdc, 0xf9, 0x2, 0x27, 0x48, 0x6d, 0xc9, 0xec, 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93, 0x68, 0x4d, 0x22, 0x7, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0xc, 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8, 0xd7, 0xf2, 0x9, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79, 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x6, 0x69, 0x4c, 0xb7, 0x92, 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87, 0x49, 0x6c, 0x3, 0x26, 0xdd, 0xf8, 0x97, 0xb2}, + {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85, 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0xa, 0x60, 0x46, 0x99, 0xbf, 0xd5, 0xf3, 0x1, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84, 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87, 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x9, 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x2, 0x24, 0x4e, 0x68, 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e, 0x9b, 0xbd, 0xd7, 0xf1, 0x3, 0x25, 0x4f, 0x69, 0xb6, 0x90, 0xfa, 0xdc, 0x2e, 0x8, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80, 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0xe, 0x64, 0x42, 0x9d, 0xbb, 0xd1, 0xf7, 0x5, 0x23, 0x49, 0x6f, 0x5e, 0x78, 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19, 0xeb, 0xcd, 0xa7, 0x81, 0x4, 0x22, 0x48, 0x6e, 0x9c, 0xba, 0xd0, 0xf6, 0x29, 0xf, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb, 0x9f, 0xb9, 0xd3, 0xf5, 0x7, 0x21, 0x4b, 0x6d, 0xb2, 0x94, 0xfe, 0xd8, 0x2a, 0xc, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf, 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, 0x3c, 0x1a, 0x2b, 0xd, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9, 0x6, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57, 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36, 0xc4, 0xe2, 0x88, 0xae}, + {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0, 0x4a, 0x6d, 0x4, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48, 0x21, 0x6, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd, 0x8, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0xa, 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0xc, 0x2b, 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0xe, 0x35, 0x12, 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79, 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf, 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3, 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82, 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x3, 0xf6, 0xd1, 0xb8, 0x9f, 0x4f, 0x68, 0x1, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x7, 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x5, 0x22, 0x4b, 0x6c, 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, 0x2c, 0xb, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x9, 0x2e, 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0xf, 0x66, 0x41, 0x91, 0xb6, 0xdf, 0xf8, 0xd, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36, 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0, 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec, 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87, 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51}, + {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2, 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1, 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb, 0x83, 0xab, 0x73, 0x5b, 0x23, 0xb, 0x8e, 0xa6, 0xde, 0xf6, 0x2e, 0x6, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, 0x22, 0xa, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x7, 0x7f, 0x57, 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d, 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3, 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, 0x16, 0x3e, 0x1, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9, 0x5c, 0x74, 0xc, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91, 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c, 0x44, 0x6c, 0x14, 0x3c, 0x3, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, 0xf3, 0xdb, 0x5e, 0x76, 0xe, 0x26, 0xfe, 0xd6, 0xae, 0x86, 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x8, 0x8d, 0xa5, 0xdd, 0xf5, 0x2d, 0x5, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12, 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3, 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9, 0x81, 0xa9, 0x71, 0x59, 0x21, 0x9, 0x8c, 0xa4, 0xdc, 0xf4, 0x2c, 0x4, 0x7c, 0x54, 0x2, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, 0xf2, 0xda, 0x5f, 0x77, 0xf, 0x27, 0xff, 0xd7, 0xaf, 0x87, 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd, 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d}, + {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0xe, 0x27, 0x5c, 0x75, 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x9, 0x20, 0x49, 0x60, 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67, 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69, 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee, 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43, 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x4, 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x3, 0x78, 0x51, 0x71, 0x58, 0x23, 0xa, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0xd, 0x76, 0x5f, 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3, 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef, 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0xb, 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0xc, 0x77, 0x5e, 0x81, 0xa8, 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x5, 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x2, 0x79, 0x50, 0xab, 0x82, 0xf9, 0xd0, 0xf, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85, 0x5a, 0x73, 0x8, 0x21, 0x1, 0x28, 0x53, 0x7a, 0xa5, 0x8c, 0xf7, 0xde, 0x54, 0x7d, 0x6, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b, 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e, 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, 0xeb, 0xc2}, + {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0, 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9, 0x7f, 0x55, 0x2b, 0x1, 0x29, 0x3, 0x7d, 0x57, 0x81, 0xab, 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2, 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4, 0xaa, 0x80, 0x56, 0x7c, 0x2, 0x28, 0x52, 0x78, 0x6, 0x2c, 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e, 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x7, 0x79, 0x53, 0x7b, 0x51, 0x2f, 0x5, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48, 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x4, 0x2e, 0x50, 0x7a, 0xa4, 0x8e, 0xf0, 0xda, 0xc, 0x26, 0x58, 0x72, 0xe9, 0xc3, 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40, 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0xd, 0xdb, 0xf1, 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0xf, 0x71, 0x5b, 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d, 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0xe, 0x24, 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, 0xa, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d, 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0xb, 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1, 0x77, 0x5d, 0x23, 0x9, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93, 0x8, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde}, + {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0xd, 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e, 0x9, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67, 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe, 0x2f, 0x4, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3, 0x57, 0x7c, 0x1, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3, 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0, 0x71, 0x5a, 0x27, 0xc, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, 0xe1, 0xca, 0x5e, 0x75, 0x8, 0x23, 0xf2, 0xd9, 0xa4, 0x8f, 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff, 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x5, 0x24, 0xf, 0x72, 0x59, 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x2, 0x29, 0x54, 0x7f, 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x6, 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15, 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0xb, 0x20, 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33, 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58, 0x25, 0xe, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1, 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, 0x3, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee, 0x7a, 0x51, 0x2c, 0x7, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e, 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d, 0x5c, 0x77, 0xa, 0x21}, + {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9, 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab, 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d, 0x59, 0x75, 0x1, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x8, 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7, 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3, 0x97, 0xbb, 0x7f, 0x53, 0x27, 0xb, 0xb2, 0x9e, 0xea, 0xc6, 0x2, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c, 0x26, 0xa, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77, 0x3, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8, 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47, 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55, 0x21, 0xd, 0xc9, 0xe5, 0x91, 0xbd, 0x4, 0x28, 0x5c, 0x70, 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3, 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0xc, 0x78, 0x54, 0xed, 0xc1, 0xb5, 0x99, 0x5d, 0x71, 0x5, 0x29, 0x4c, 0x60, 0x14, 0x38, 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x6, 0x2a, 0x5e, 0x72, 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0xf, 0xa5, 0x89, 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac, 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x7, 0x2b, 0xef, 0xc3, 0xb7, 0x9b, 0x22, 0xe, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6}, + {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d, 0x5e, 0x73, 0x4, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x6, 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0xa, 0xbc, 0x91, 0xe6, 0xcb, 0x8, 0x25, 0x52, 0x7f, 0x23, 0xe, 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0xc, 0x21, 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39, 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d, 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31, 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f, 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x3, 0x2e, 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x1, 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, 0x7, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x5, 0x72, 0x5f, 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x9, 0xbf, 0x92, 0xe5, 0xc8, 0xb, 0x26, 0x51, 0x7c, 0x20, 0xd, 0x7a, 0x57, 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0xf, 0x22, 0xe1, 0xcc, 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f, 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b, 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64, 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3, 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7, 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19}, + {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10, 0xb7, 0x99, 0xeb, 0xc5, 0xf, 0x21, 0x53, 0x7d, 0xa9, 0x87, 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6, 0x7c, 0x52, 0x20, 0xe, 0x73, 0x5d, 0x2f, 0x1, 0xcb, 0xe5, 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4, 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0xc, 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7, 0x2d, 0x3, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x2, 0x2c, 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12, 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0xd, 0x23, 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x8, 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39, 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x7, 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45, 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x6, 0x28, 0xe2, 0xcc, 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x9, 0x27, 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff, 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce, 0x4, 0x2a, 0x58, 0x76, 0xb, 0x25, 0x57, 0x79, 0xb3, 0x9d, 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac, 0x78, 0x56, 0x24, 0xa, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b, 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0, 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, 0x2b, 0x5}, + {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5, 0x94, 0xbb, 0x76, 0x59, 0x28, 0x7, 0xaf, 0x80, 0xf1, 0xde, 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0xe, 0x21, 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x9, 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0xf, 0x20, 0x51, 0x7e, 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x8, 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9, 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92, 0x5f, 0x70, 0x1, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, 0xae, 0x81, 0x29, 0x6, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4, 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54, 0x25, 0xa, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5, 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0xd, 0x22, 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x4, 0x75, 0x5a, 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72, 0x3, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49, 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x5, 0xc8, 0xe7, 0x96, 0xb9, 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91, 0xe0, 0xcf, 0x2, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9, 0x24, 0xb, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, 0x1f, 0x30, 0x52, 0x7d, 0xc, 0x23, 0xee, 0xc1, 0xb0, 0x9f, 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa}, + {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a, 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3, 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x9, 0x39, 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x4, 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0xc, 0x1, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b, 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76, 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf, 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x5, 0x55, 0x65, 0x68, 0x58, 0x8, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75, 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, 0x18, 0x28, 0x2, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92, 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0xf, 0x6b, 0x5b, 0xb, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6, 0x36, 0x6, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41, 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14, 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce, 0x5e, 0x6e, 0x3e, 0xe, 0x3, 0x33, 0x63, 0x53, 0xc3, 0xf3, 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x7, 0x57, 0x67, 0x6a, 0x5a, 0xa, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0, 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d, 0x8d, 0xbd, 0xed, 0xdd}, + {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2, 0x37, 0x6, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93, 0xc0, 0xf1, 0x66, 0x57, 0x4, 0x35, 0x6e, 0x5f, 0xc, 0x3d, 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0xe, 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0xa, 0x9d, 0xac, 0xff, 0xce, 0xcc, 0xfd, 0xae, 0x9f, 0x8, 0x39, 0x6a, 0x5b, 0xdc, 0xed, 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a, 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9, 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16, 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6, 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x3, 0x32, 0x30, 0x1, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3, 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x5, 0x7, 0x36, 0x65, 0x54, 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0xf, 0x3e, 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0xd, 0x9a, 0xab, 0xf8, 0xc9, 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x9, 0x5a, 0x6b, 0x69, 0x58, 0xb, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a, 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9, 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26, 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1, 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22}, + {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x7, 0x35, 0x63, 0x51, 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, 0x26, 0x14, 0xe, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90, 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x9, 0x3b, 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2, 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0xf, 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4, 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x8, 0x12, 0x20, 0x76, 0x44, 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, 0x33, 0x1, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b, 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x6, 0x38, 0xa, 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3, 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0xd, 0x5b, 0x69, 0xf7, 0xc5, 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c, 0x36, 0x4, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89, 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x3, 0x55, 0x67, 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba, 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x5, 0x37, 0x23, 0x11, 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8, 0x66, 0x54, 0x2, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0xb, 0x39, 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92, 0xc4, 0xf6, 0x68, 0x5a, 0xc, 0x3e}, + {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e, 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0xb, 0x2e, 0x1d, 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe, 0x67, 0x54, 0x1, 0x32, 0x39, 0xa, 0x5f, 0x6c, 0xf5, 0xc6, 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25, 0x5c, 0x6f, 0x3a, 0x9, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea, 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e, 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x2, 0x31, 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb, 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x8, 0x5d, 0x6e, 0x65, 0x56, 0x3, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5, 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, 0x12, 0x21, 0x3d, 0xe, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4, 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x5, 0x36, 0x2a, 0x19, 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3, 0x5a, 0x69, 0x3c, 0xf, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18, 0x4, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7, 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x7, 0x34, 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0xc, 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef, 0xca, 0xf9, 0xac, 0x9f, 0x6, 0x35, 0x60, 0x53, 0x4f, 0x7c, 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88, 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0xd, 0x94, 0xa7, 0xf2, 0xc1}, + {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31, 0x67, 0x53, 0xf, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86, 0xa, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff, 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20, 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd, 0x51, 0x65, 0x39, 0xd, 0x3c, 0x8, 0x54, 0x60, 0xec, 0xd8, 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x2, 0x5e, 0x6a, 0x5b, 0x6f, 0x33, 0x7, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b, 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae, 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19, 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96, 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24, 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x1, 0x35, 0x69, 0x5d, 0x6c, 0x58, 0x4, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82, 0xde, 0xea, 0x66, 0x52, 0xe, 0x3a, 0xb, 0x3f, 0x63, 0x57, 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf, 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70, 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0xc, 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x9, 0x55, 0x61, 0x37, 0x3, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb, 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x6}, + {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49, 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0xf, 0x50, 0x65, 0x5b, 0x6e, 0x31, 0x4, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6, 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a, 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83, 0xdc, 0xe9, 0x62, 0x57, 0x8, 0x3d, 0x3, 0x36, 0x69, 0x5c, 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11, 0x58, 0x6d, 0x32, 0x7, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8, 0x87, 0xb2, 0x39, 0xc, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0, 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x9, 0x56, 0x63, 0x5d, 0x68, 0x37, 0x2, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44, 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b, 0x10, 0x25, 0x7a, 0x4f, 0x6, 0x33, 0x6c, 0x59, 0xd2, 0xe7, 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0xd, 0x38, 0x5e, 0x6b, 0x34, 0x1, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde, 0x81, 0xb4, 0x3f, 0xa, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76, 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0xe, 0x3b, 0x5, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2, 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d, 0xa6, 0x93, 0xcc, 0xf9}, + {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f, 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc, 0x86, 0xb0, 0x32, 0x4, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4, 0x56, 0x60, 0x3a, 0xc, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b, 0x64, 0x52, 0x8, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x1, 0x37, 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6, 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x5, 0x5f, 0x69, 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0xd, 0x22, 0x14, 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92, 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x9, 0x3f, 0xbd, 0x8b, 0xd1, 0xe7, 0x2, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80, 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73, 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2, 0x30, 0x6, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, 0x38, 0xe, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3, 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50, 0xa, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x3, 0x35, 0x6f, 0x59, 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6, 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x7, 0x5d, 0x6b, 0x8d, 0xbb, 0xe1, 0xd7, 0x55, 0x63, 0x39, 0xf, 0x20, 0x16, 0x4c, 0x7a, 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, 0x7e, 0x48, 0x67, 0x51, 0xb, 0x3d, 0xbf, 0x89, 0xd3, 0xe5}, + {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0xe, 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b, 0xb, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce, 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x5, 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0xf, 0x56, 0x61, 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84, 0xdd, 0xea, 0x6f, 0x58, 0x1, 0x36, 0xef, 0xd8, 0x81, 0xb6, 0x33, 0x4, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0xa, 0x3d, 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5, 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x7, 0x27, 0x10, 0x49, 0x7e, 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x9, 0x3e, 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5, 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe, 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0xc, 0x7b, 0x4c, 0x15, 0x22, 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x2, 0x35, 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46, 0x66, 0x51, 0x8, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3, 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x6, 0x5f, 0x68, 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x3, 0x34, 0xb1, 0x86, 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d, 0x3a, 0xd, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8, 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a}, + {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0xf, 0x7a, 0x42, 0xa, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b, 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6, 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81, 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0xe, 0x7b, 0x43, 0xb, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x1, 0x39, 0x71, 0x49, 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x4, 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d, 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a, 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7, 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24, 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13, 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x2, 0x3a, 0x72, 0x4a, 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x7, 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0xd, 0x78, 0x40, 0x8, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf, 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62, 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25, 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0xc, 0x79, 0x41, 0x9, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x3, 0x3b, 0x73, 0x4b, 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x6, 0x4e, 0x76}, + {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a, 0xb7, 0x8e, 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29, 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x1, 0x38, 0x97, 0xae, 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x9, 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28, 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad, 0x2, 0x3b, 0x70, 0x49, 0x33, 0xa, 0x41, 0x78, 0xd7, 0xee, 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe, 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac, 0xe7, 0xde, 0x71, 0x48, 0x3, 0x3a, 0x40, 0x79, 0x32, 0xb, 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58, 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0xc, 0x47, 0x7e, 0x4, 0x3d, 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d, 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0xd, 0x77, 0x4e, 0x5, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c, 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b, 0x24, 0x1d, 0x56, 0x6f, 0x37, 0xe, 0x45, 0x7c, 0xd3, 0xea, 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x6, 0x3f, 0x74, 0x4d, 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c, 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0xf, 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, 0x7, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c, 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89}, + {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x4, 0xa2, 0x98, 0xd6, 0xec, 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4, 0xaa, 0x90, 0x36, 0xc, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda, 0x7c, 0x46, 0x8, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80, 0xeb, 0xd1, 0x9f, 0xa5, 0x3, 0x39, 0x77, 0x4d, 0xa1, 0x9b, 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x7, 0x6c, 0x56, 0x18, 0x22, 0x84, 0xbe, 0xf0, 0xca, 0x35, 0xf, 0x41, 0x7b, 0xdd, 0xe7, 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e, 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45, 0xb, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x2, 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d, 0x6, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65, 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc, 0x7a, 0x40, 0xe, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0xa, 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3, 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d, 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x1, 0xed, 0xd7, 0x99, 0xa3, 0x5, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, 0xbc, 0x86, 0x79, 0x43, 0xd, 0x37, 0x91, 0xab, 0xe5, 0xdf, 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4, 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x9, 0x47, 0x7d, 0xdb, 0xe1, 0xaf, 0x95}, + {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64, 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0xd, 0x36, 0x52, 0x69, 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x8, 0x45, 0x7e, 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x5, 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d, 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee, 0x4f, 0x74, 0x39, 0x2, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, 0x6b, 0x50, 0x34, 0xf, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95, 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab, 0xe6, 0xdd, 0x7c, 0x47, 0xa, 0x31, 0xc2, 0xf9, 0xb4, 0x8f, 0x2e, 0x15, 0x58, 0x63, 0x7, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d, 0x9, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60, 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3, 0x72, 0x49, 0x4, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, 0x65, 0x5e, 0x3a, 0x1, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b, 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96, 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0xc, 0xaa, 0x91, 0xdc, 0xe7, 0x46, 0x7d, 0x30, 0xb, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, 0xf5, 0xce, 0x3d, 0x6, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c, 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2, 0xef, 0xd4, 0x75, 0x4e, 0x3, 0x38, 0x5c, 0x67, 0x2a, 0x11, 0xb0, 0x8b, 0xc6, 0xfd, 0xe, 0x35, 0x78, 0x43, 0xe2, 0xd9, 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a}, + {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3, 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67, 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x8, 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d, 0x39, 0x5, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, 0x33, 0xf, 0x46, 0x7a, 0x3e, 0x2, 0xb6, 0x8a, 0xce, 0xf2, 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d, 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c, 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x7, 0x3b, 0x72, 0x4e, 0xa, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57, 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2, 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, 0x4, 0x38, 0x71, 0x4d, 0x9, 0x35, 0x81, 0xbd, 0xf9, 0xc5, 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0xc, 0x45, 0x79, 0x3d, 0x1, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b, 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64, 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0xb, 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e, 0x3a, 0x6, 0x42, 0x7e, 0x3, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0xe, 0x32, 0x76, 0x4a, 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25, 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad}, + {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x3, 0x3e, 0x79, 0x44, 0x2, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce, 0x89, 0xb4, 0x7, 0x3a, 0x7d, 0x40, 0x6, 0x3b, 0x7c, 0x41, 0xf2, 0xcf, 0x88, 0xb5, 0x4, 0x39, 0x7e, 0x43, 0xf0, 0xcd, 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x5, 0x38, 0x7f, 0x42, 0xfb, 0xc6, 0x81, 0xbc, 0xf, 0x32, 0x75, 0x48, 0xe, 0x33, 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0xc, 0x31, 0x76, 0x4b, 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0xd, 0x30, 0x77, 0x4a, 0x8, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb, 0xfd, 0xc0, 0x87, 0xba, 0x9, 0x34, 0x73, 0x4e, 0xff, 0xc2, 0x85, 0xb8, 0xb, 0x36, 0x71, 0x4c, 0xa, 0x37, 0x70, 0x4d, 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad, 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4, 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f, 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c, 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d, 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2, 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1, 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b, 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53, 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, 0x6f, 0x52}, + {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x1, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68, 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, 0x17, 0x29, 0x7e, 0x40, 0x2, 0x3c, 0x86, 0xb8, 0xfa, 0xc4, 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x3, 0x3b, 0x5, 0x47, 0x79, 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x4, 0x3a, 0x78, 0x46, 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96, 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x7, 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x6, 0x38, 0x76, 0x48, 0xa, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5, 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3, 0x49, 0x77, 0x35, 0xb, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f, 0x8, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c, 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d, 0x37, 0x9, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0xf, 0xb5, 0x8b, 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a, 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0xe, 0x30, 0x67, 0x59, 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c, 0x26, 0x18, 0x5a, 0x64, 0x33, 0xd, 0x4f, 0x71, 0xcb, 0xf5, 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3, 0xf4, 0xca, 0x88, 0xb6, 0xc, 0x32, 0x70, 0x4e}, + {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, 0x55, 0x6a, 0x32, 0xd, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f, 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0xe, 0x56, 0x69, 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25, 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, 0x3, 0x3c, 0x7b, 0x44, 0x5, 0x3a, 0x87, 0xb8, 0xf9, 0xc6, 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93, 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x8, 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0xb, 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90, 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5, 0x84, 0xbb, 0x6, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7, 0xa, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c, 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x7, 0x46, 0x79, 0x45, 0x7a, 0x3b, 0x4, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1, 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, 0x10, 0x2f, 0x77, 0x48, 0x9, 0x36, 0x8b, 0xb4, 0xf5, 0xca, 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0xf, 0x30, 0x68, 0x57, 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b, 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, 0x3d, 0x2, 0x3e, 0x1, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83, 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6, 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0xc, 0x33, 0x72, 0x4d, 0xf0, 0xcf, 0x8e, 0xb1}, + {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7, 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0xe, 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28, 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, 0x4f, 0xf, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x1, 0x41, 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d, 0x4d, 0xd, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37, 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, 0x24, 0x64, 0x83, 0xc3, 0x3, 0x43, 0x9e, 0xde, 0x1e, 0x5e, 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f, 0x9f, 0xdf, 0x2, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91, 0x4c, 0xc, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, 0xf6, 0xb6, 0x87, 0xc7, 0x7, 0x47, 0x9a, 0xda, 0x1a, 0x5a, 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3, 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x9, 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x8, 0xc8, 0x88, 0x1b, 0x5b, 0x9b, 0xdb, 0x6, 0x46, 0x86, 0xc6, 0x21, 0x61, 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0xa, 0xca, 0x8a, 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3, 0x4, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2, 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58, 0x85, 0xc5, 0x5, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, 0x4b, 0xb, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31}, + {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7, 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0xe, 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0xb, 0xd1, 0x90, 0x53, 0x12, 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed, 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d, 0x87, 0xc6, 0x5, 0x44, 0x8d, 0xcc, 0xf, 0x4e, 0x94, 0xd5, 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65, 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a, 0x59, 0x18, 0xc2, 0x83, 0x40, 0x1, 0x45, 0x4, 0xc7, 0x86, 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb, 0x13, 0x52, 0x91, 0xd0, 0xa, 0x4b, 0x88, 0xc9, 0x7, 0x46, 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6, 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x9, 0xca, 0x8b, 0xcf, 0x8e, 0x4d, 0xc, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc, 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68, 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, 0x2, 0x43, 0x8a, 0xcb, 0x8, 0x49, 0x93, 0xd2, 0x11, 0x50, 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf, 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f, 0xc5, 0x84, 0x47, 0x6, 0x42, 0x3, 0xc0, 0x81, 0x5b, 0x1a, 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa, 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55, 0x96, 0xd7, 0xd, 0x4c, 0x8f, 0xce}, + {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x3, 0xc5, 0x87, 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea, 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x6, 0x44, 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x5, 0x4d, 0xf, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25, 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf, 0xc, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36, 0xcf, 0x8d, 0x4b, 0x9, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3, 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d, 0x8e, 0xcc, 0xa, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, 0xb, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63, 0xce, 0x8c, 0x4a, 0x8, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6, 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4, 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0xd, 0x4f, 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5, 0x4c, 0xe, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95, 0x53, 0x11, 0xc2, 0x80, 0x46, 0x4, 0xfd, 0xbf, 0x79, 0x3b, 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x7, 0x45, 0x96, 0xd4, 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a, 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17, 0xd1, 0x93, 0x40, 0x2, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed, 0x3e, 0x7c, 0xba, 0xf8, 0x1, 0x43, 0x85, 0xc7, 0x14, 0x56, 0x90, 0xd2}, + {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x7, 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3, 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0xe, 0x4d, 0x99, 0xda, 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e, 0xcc, 0x8f, 0x4a, 0x9, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad, 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0xd, 0x4e, 0x8b, 0xc8, 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, 0xb8, 0xfb, 0x49, 0xa, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d, 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6, 0x3, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62, 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x4, 0xd0, 0x93, 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37, 0x1a, 0x59, 0x9c, 0xdf, 0xb, 0x48, 0x8d, 0xce, 0x38, 0x7b, 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b, 0x4f, 0xc, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x5, 0x46, 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95, 0x50, 0x13, 0xc7, 0x84, 0x41, 0x2, 0xf4, 0xb7, 0x72, 0x31, 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x6, 0x45, 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1, 0x53, 0x10, 0xd5, 0x96, 0x42, 0x1, 0xc4, 0x87, 0x71, 0x32, 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a, 0x8e, 0xcd, 0x8, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0xf, 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d}, + {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef, 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36, 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90, 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x2, 0xce, 0x8a, 0x4b, 0xf, 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11, 0xca, 0x8e, 0x42, 0x6, 0xc7, 0x83, 0x4f, 0xb, 0xe4, 0xa0, 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32, 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63, 0x8c, 0xc8, 0x4, 0x40, 0x81, 0xc5, 0x9, 0x4d, 0x96, 0xd2, 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71, 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, 0x22, 0x66, 0x89, 0xcd, 0x1, 0x45, 0x84, 0xc0, 0xc, 0x48, 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91, 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x3, 0xc2, 0x86, 0x4a, 0xe, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a, 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33, 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95, 0x54, 0x10, 0xdc, 0x98, 0x43, 0x7, 0xcb, 0x8f, 0x4e, 0xa, 0xc6, 0x82, 0x5, 0x41, 0x8d, 0xc9, 0x8, 0x4c, 0x80, 0xc4, 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75, 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7, 0x26, 0x62, 0xae, 0xea}, + {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4, 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73, 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0xd, 0xc2, 0x87, 0x41, 0x4, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa, 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5, 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x8, 0x4d, 0x8b, 0xce, 0x1, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60, 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f, 0x40, 0x5, 0xc3, 0x86, 0x49, 0xc, 0xfc, 0xb9, 0x76, 0x33, 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb, 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c, 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0xb, 0x4e, 0x81, 0xc4, 0x2, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1, 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x6, 0xc9, 0x8c, 0x4a, 0xf, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62, 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, 0x3c, 0x79, 0x89, 0xcc, 0x3, 0x46, 0x80, 0xc5, 0xa, 0x4f, 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0, 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38, 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0xe, 0xc8, 0x8d, 0x42, 0x7, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15}, + {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde, 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7, 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a, 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc, 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95, 0x44, 0x2, 0xc8, 0x8e, 0x41, 0x7, 0xcd, 0x8b, 0x4e, 0x8, 0xc2, 0x84, 0x4b, 0xd, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2, 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3, 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6, 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60, 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71, 0x88, 0xce, 0x4, 0x42, 0x8d, 0xcb, 0x1, 0x47, 0x82, 0xc4, 0xe, 0x48, 0x87, 0xc1, 0xb, 0x4d, 0x9c, 0xda, 0x10, 0x56, 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f, 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2, 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24, 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d, 0xcc, 0x8a, 0x40, 0x6, 0xc9, 0x8f, 0x45, 0x3, 0xc6, 0x80, 0x4a, 0xc, 0xc3, 0x85, 0x4f, 0x9}, + {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca, 0x4, 0x43, 0x8a, 0xcd, 0x5, 0x42, 0x8b, 0xcc, 0x6, 0x41, 0x88, 0xcf, 0x7, 0x40, 0x89, 0xce, 0x8, 0x4f, 0x86, 0xc1, 0x9, 0x4e, 0x87, 0xc0, 0xa, 0x4d, 0x84, 0xc3, 0xb, 0x4c, 0x85, 0xc2, 0xc, 0x4b, 0x82, 0xc5, 0xd, 0x4a, 0x83, 0xc4, 0xe, 0x49, 0x80, 0xc7, 0xf, 0x48, 0x81, 0xc6, 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55, 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd, 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0, 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b, 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7, 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea, 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61, 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1, 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4, 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77, 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb, 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe, 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d, 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5, 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, 0xb1, 0xf6}, + {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f, 0xf4, 0xbc, 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56, 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a, 0x1, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33, 0xeb, 0xa3, 0x46, 0xe, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f, 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, 0x20, 0x68, 0x3, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6, 0x79, 0x31, 0xe9, 0xa1, 0x44, 0xc, 0xd4, 0x9c, 0x2, 0x4a, 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0, 0x45, 0xd, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69, 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1, 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x7, 0x4f, 0x97, 0xdf, 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x8, 0xd0, 0x98, 0x6, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3, 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x9, 0xd1, 0x99, 0xf2, 0xba, 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50, 0xb5, 0xfd, 0x25, 0x6d, 0x4, 0x4c, 0x94, 0xdc, 0x39, 0x71, 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0xb, 0xd3, 0x9b, 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2, 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29, 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, 0x26, 0x6e, 0x5, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0, 0x7f, 0x37, 0xef, 0xa7, 0x42, 0xa, 0xd2, 0x9a}, + {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, 0x4f, 0x6, 0x96, 0xdf, 0x4, 0x4d, 0xaf, 0xe6, 0x3d, 0x74, 0xd5, 0x9c, 0x47, 0xe, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee, 0x35, 0x7c, 0x9e, 0xd7, 0xc, 0x45, 0x31, 0x78, 0xa3, 0xea, 0x8, 0x41, 0x9a, 0xd3, 0x43, 0xa, 0xd1, 0x98, 0x7a, 0x33, 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55, 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a, 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa, 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2, 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd, 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8, 0x4a, 0x3, 0xd8, 0x91, 0x1, 0x48, 0x93, 0xda, 0x38, 0x71, 0xaa, 0xe3, 0x97, 0xde, 0x5, 0x4c, 0xae, 0xe7, 0x3c, 0x75, 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x7, 0xa6, 0xef, 0x34, 0x7d, 0x9f, 0xd6, 0xd, 0x44, 0xd4, 0x9d, 0x46, 0xf, 0xed, 0xa4, 0x7f, 0x36, 0x42, 0xb, 0xd0, 0x99, 0x7b, 0x32, 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x9, 0x40, 0x9b, 0xd2, 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff, 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb, 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3, 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc, 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c, 0xbe, 0xf7, 0x2c, 0x65}, + {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81, 0xd4, 0x9e, 0x40, 0xa, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4, 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b, 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x1, 0xea, 0xa0, 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a, 0xb, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d, 0xe3, 0xa9, 0x42, 0x8, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3, 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, 0x2, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22, 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2, 0x3c, 0x76, 0x9d, 0xd7, 0x9, 0x43, 0x16, 0x5c, 0x82, 0xc8, 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x3, 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x5, 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70, 0xae, 0xe4, 0xf, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e, 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x4, 0x4e, 0x90, 0xda, 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf, 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0xe, 0x99, 0xd3, 0xd, 0x47, 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, 0x52, 0x18, 0x4d, 0x7, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6, 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66, 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0xc, 0xd2, 0x98, 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, 0x59, 0x13, 0x92, 0xd8, 0x6, 0x4c, 0xa7, 0xed, 0x33, 0x79}, + {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19, 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, 0x1, 0x4a, 0x95, 0xde, 0x3, 0x48, 0xa4, 0xef, 0x32, 0x79, 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a, 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee, 0x2, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x6, 0x4d, 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9, 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda, 0x7, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f, 0x93, 0xd8, 0x5, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a, 0x4, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25, 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0xc, 0x47, 0x9a, 0xd1, 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, 0xd, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24, 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2, 0xf, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2, 0xe, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5, 0x3b, 0x70, 0xad, 0xe6, 0xa, 0x41, 0x9c, 0xd7, 0x9d, 0xd6, 0xb, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22, 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x9, 0x42, 0x8, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21, 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86}, + {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x1, 0x4d, 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39, 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb, 0x2, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, 0x74, 0x38, 0x9b, 0xd7, 0x3, 0x4f, 0xb6, 0xfa, 0x2e, 0x62, 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc, 0x28, 0x64, 0x9d, 0xd1, 0x5, 0x49, 0x5e, 0x12, 0xc6, 0x8a, 0x73, 0x3f, 0xeb, 0xa7, 0x4, 0x48, 0x9c, 0xd0, 0x29, 0x65, 0xb1, 0xfd, 0x9f, 0xd3, 0x7, 0x4b, 0xb2, 0xfe, 0x2a, 0x66, 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67, 0xb3, 0xff, 0x6, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5, 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, 0x7c, 0x30, 0x93, 0xdf, 0xb, 0x47, 0xbe, 0xf2, 0x26, 0x6a, 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b, 0xbf, 0xf3, 0xa, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68, 0x91, 0xdd, 0x9, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, 0x53, 0x1f, 0x8, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1, 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f, 0xbb, 0xf7, 0xe, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad, 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0xf, 0x43, 0xba, 0xf6, 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34, 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0xc, 0x40, 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36, 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, 0xd, 0x41}, + {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9, 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21, 0xdf, 0x92, 0x45, 0x8, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, 0xe6, 0xab, 0x7, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9, 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0xf, 0xa3, 0xee, 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d, 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, 0x4b, 0x6, 0xe, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0, 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2, 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x1, 0xad, 0xe0, 0x37, 0x7a, 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, 0xe8, 0xa5, 0x9, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7, 0x49, 0x4, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56, 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a, 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, 0xc, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2, 0x4e, 0x3, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5, 0x22, 0x6f, 0x91, 0xdc, 0xb, 0x46, 0xea, 0xa7, 0x70, 0x3d, 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x2, 0x4f, 0x47, 0xa, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58, 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61, 0x9f, 0xd2, 0x5, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec, 0x40, 0xd, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe}, + {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x8, 0x46, 0xb1, 0xff, 0x2d, 0x63, 0xde, 0x90, 0x42, 0xc, 0xfb, 0xb5, 0x67, 0x29, 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31, 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73, 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x1, 0xd3, 0x9d, 0x20, 0x6e, 0xbc, 0xf2, 0x5, 0x4b, 0x99, 0xd7, 0xfe, 0xb0, 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x9, 0xb4, 0xfa, 0x28, 0x66, 0x91, 0xdf, 0xd, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2, 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf, 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x6, 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x2, 0x4c, 0xbb, 0xf5, 0x27, 0x69, 0x40, 0xe, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7, 0xa, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf, 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79, 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8, 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x7, 0x49, 0xf4, 0xba, 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x3, 0x2a, 0x64, 0xb6, 0xf8, 0xf, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0xb, 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c, 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51, 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87, 0x70, 0x3e, 0xec, 0xa2}, + {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2, 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89, 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4, 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, 0xe8, 0xa7, 0x91, 0xde, 0xf, 0x40, 0xb0, 0xff, 0x2e, 0x61, 0xd3, 0x9c, 0x4d, 0x2, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65, 0xb4, 0xfb, 0xb, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9, 0x49, 0x6, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c, 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32, 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a, 0x9a, 0xd5, 0x4, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, 0x46, 0x9, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4, 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f, 0x4e, 0x1, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0xc, 0x43, 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0xe, 0xdf, 0x90, 0x60, 0x2f, 0xfe, 0xb1, 0x3, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3, 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8, 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf, 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0xa, 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x7, 0x48, 0x6b, 0x24, 0xf5, 0xba, 0x4a, 0x5, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8, 0x8, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d}, + {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99, 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f, 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb, 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x1, 0x51, 0xa1, 0xf1, 0x5c, 0xc, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, 0x44, 0x14, 0x3, 0x53, 0xa3, 0xf3, 0x5e, 0xe, 0xfe, 0xae, 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a, 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b, 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, 0x2c, 0x7c, 0x2, 0x52, 0xa2, 0xf2, 0x5f, 0xf, 0xff, 0xaf, 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f, 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25, 0x88, 0xd8, 0x28, 0x78, 0x6, 0x56, 0xa6, 0xf6, 0x5b, 0xb, 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11, 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x7, 0x57, 0xa7, 0xf7, 0x5a, 0xa, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24, 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b, 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef, 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x5, 0x55, 0xa5, 0xf5, 0x58, 0x8, 0xf8, 0xa8, 0x4, 0x54, 0xa4, 0xf4, 0x59, 0x9, 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13, 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87, 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a}, + {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3, 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3, 0x50, 0x1, 0xab, 0xfa, 0x9, 0x58, 0x40, 0x11, 0xe2, 0xb3, 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93, 0xf9, 0xa8, 0x5b, 0xa, 0xa0, 0xf1, 0x2, 0x53, 0x4b, 0x1a, 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73, 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, 0xc9, 0x98, 0xb, 0x5a, 0xa9, 0xf8, 0x52, 0x3, 0xf0, 0xa1, 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23, 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33, 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, 0x14, 0x45, 0x5d, 0xc, 0xff, 0xae, 0x4, 0x55, 0xa6, 0xf7, 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75, 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee, 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0xd, 0x5c, 0xf6, 0xa7, 0x54, 0x5, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce, 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47, 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x6, 0x57, 0xfd, 0xac, 0x5f, 0xe, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77, 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x7, 0xf4, 0xa5, 0xf, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e, 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, 0xd4, 0x85}, + {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9, 0x49, 0x1b, 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15, 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b, 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23, 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf, 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3, 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9, 0xf, 0x5d, 0xfe, 0xac, 0x5a, 0x8, 0x1, 0x53, 0xa5, 0xf7, 0x54, 0x6, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb, 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a, 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd, 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43, 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb, 0xd, 0x5f, 0xfc, 0xae, 0x58, 0xa, 0x3, 0x51, 0xa7, 0xf5, 0x56, 0x4, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42, 0x2, 0x50, 0xa6, 0xf4, 0x57, 0x5, 0xf3, 0xa1, 0xa8, 0xfa, 0xc, 0x5e, 0xfd, 0xaf, 0x59, 0xb, 0xd9, 0x8b, 0x7d, 0x2f, 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33, 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99}, + {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6, 0x59, 0xa, 0xff, 0xac, 0x8, 0x5b, 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0xe, 0xaa, 0xf9, 0xc, 0x5f, 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43, 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e, 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd, 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73, 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77, 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd, 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63, 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x7, 0xa3, 0xf0, 0x5, 0x56, 0x50, 0x3, 0xf6, 0xa5, 0x1, 0x52, 0xa7, 0xf4, 0xab, 0xf8, 0xd, 0x5e, 0xfa, 0xa9, 0x5c, 0xf, 0x9, 0x5a, 0xaf, 0xfc, 0x58, 0xb, 0xfe, 0xad, 0x40, 0x13, 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17, 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f, 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a, 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27, 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d, 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33, 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37, 0x93, 0xc0, 0x35, 0x66}, + {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b, 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7, 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x2, 0x52, 0x6, 0xfa, 0xae, 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca, 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x4, 0x50, 0xa4, 0xf0, 0xc, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2, 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0xe, 0xf2, 0xa6, 0xf6, 0xa2, 0x5e, 0xa, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38, 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23, 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x8, 0x5c, 0xa0, 0xf4, 0x55, 0x1, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4, 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28, 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a, 0xab, 0xff, 0x3, 0x57, 0x7, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c, 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0, 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x5, 0xf1, 0xa5, 0x59, 0xd, 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69, 0x42, 0x16, 0xea, 0xbe, 0xf, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7, 0xb, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5, 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x9, 0xf5, 0xa1}, + {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6, 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x1, 0x54, 0xe2, 0xb7, 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4, 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x3, 0x56, 0x4b, 0x1e, 0xe1, 0xb4, 0x2, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26, 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, 0x7, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0, 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a, 0xe5, 0xb0, 0x6, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69, 0xdf, 0x8a, 0x75, 0x20, 0x4, 0x51, 0xae, 0xfb, 0x4d, 0x18, 0xe7, 0xb2, 0xaf, 0xfa, 0x5, 0x50, 0xe6, 0xb3, 0x4c, 0x19, 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80, 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8, 0xe, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, 0xf, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8, 0xa7, 0xf2, 0xd, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60, 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61, 0xd7, 0x82, 0x7d, 0x28, 0xc, 0x59, 0xa6, 0xf3, 0x45, 0x10, 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87, 0xa3, 0xf6, 0x9, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x8, 0x5d, 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65, 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0xa, 0x5f, 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67, 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd, 0x42, 0x17, 0xa1, 0xf4, 0xb, 0x5e}, + {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35, 0x9, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6, 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44, 0xbe, 0xe8, 0x57, 0x1, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62, 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x8, 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e, 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8, 0x2, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7, 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0xb, 0x5d, 0xe2, 0xb4, 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89, 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x3, 0x3f, 0x69, 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f, 0xf0, 0xa6, 0x5c, 0xa, 0x48, 0x1e, 0xe4, 0xb2, 0xd, 0x5b, 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d, 0x41, 0x17, 0xed, 0xbb, 0x4, 0x52, 0xa8, 0xfe, 0xcb, 0x9d, 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0xc, 0xf6, 0xa0, 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, 0x39, 0x6f, 0x53, 0x5, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec, 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a, 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c, 0xa3, 0xf5, 0xf, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x6, 0x50, 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2, 0x58, 0xe, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d, 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x7, 0xb8, 0xee, 0x14, 0x42}, + {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e, 0xb7, 0xe0, 0x58, 0xf, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62, 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x8, 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe, 0x7, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d, 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, 0x9, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5, 0xff, 0xa8, 0x51, 0x6, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x1, 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d, 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0xe, 0x59, 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75, 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d, 0xe4, 0xb3, 0xb, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28, 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x4, 0xfd, 0xaa, 0x12, 0x45, 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x3, 0xbb, 0xec, 0x15, 0x42, 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4, 0x4d, 0x1a, 0xa2, 0xf5, 0xc, 0x5b, 0x61, 0x36, 0xcf, 0x98, 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x2, 0x55, 0xed, 0xba, 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96, 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0xd, 0x37, 0x60, 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67, 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0xa, 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f, 0x5, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd}, + {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x1, 0x94, 0xcc, 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86, 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x2, 0x5a, 0x35, 0x6d, 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce, 0x5b, 0x3, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16, 0x79, 0x21, 0xc9, 0x91, 0x4, 0x5c, 0xb4, 0xec, 0x6a, 0x32, 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78, 0xed, 0xb5, 0x5d, 0x5, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, 0x81, 0xd9, 0xb6, 0xee, 0x6, 0x5e, 0xcb, 0x93, 0x7b, 0x23, 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x7, 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3, 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x9, 0x9c, 0xc4, 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67, 0x8, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c, 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6, 0x53, 0xb, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0xa, 0x52, 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0xd, 0x62, 0x3a, 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99, 0xc, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, 0x46, 0x1e, 0x57, 0xf, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2, 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6, 0xe, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac, 0x39, 0x61, 0x89, 0xd1}, + {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60, 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0xb, 0x52, 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x4, 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84, 0xe4, 0xbd, 0x56, 0xf, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a, 0x71, 0x28, 0xba, 0xe3, 0x8, 0x51, 0x31, 0x68, 0x83, 0xda, 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x3, 0x5a, 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0xc, 0xe7, 0xbe, 0xde, 0x87, 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e, 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x7, 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x9, 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b, 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b, 0xe9, 0xb0, 0x5b, 0x2, 0x74, 0x2d, 0xc6, 0x9f, 0xd, 0x54, 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14, 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26, 0xcd, 0x94, 0x6, 0x5f, 0xb4, 0xed, 0x58, 0x1, 0xea, 0xb3, 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33, 0x53, 0xa, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee, 0x5, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae, 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, 0x85, 0xdc, 0xbc, 0xe5, 0xe, 0x57, 0xc5, 0x9c, 0x77, 0x2e}, + {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27, 0xbc, 0xe6, 0x8, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0xc, 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14, 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c, 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42, 0xd9, 0x83, 0x6d, 0x37, 0x3, 0x59, 0xb7, 0xed, 0x76, 0x2c, 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x7, 0x9c, 0xc6, 0x28, 0x72, 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0xb, 0x51, 0x20, 0x7a, 0x94, 0xce, 0x55, 0xf, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62, 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde, 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x6, 0x5c, 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x2, 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, 0xe, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0xa, 0xe4, 0xbe, 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39, 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae, 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, 0x6b, 0x31, 0x5, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e, 0xef, 0xb5, 0x5b, 0x1, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96, 0x78, 0x22, 0xb9, 0xe3, 0xd, 0x57, 0x26, 0x7c, 0x92, 0xc8, 0x53, 0x9, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb, 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3, 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32}, + {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45, 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4, 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0, 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x7, 0x5c, 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x8, 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa, 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77, 0xeb, 0xb0, 0x5d, 0x6, 0x78, 0x23, 0xce, 0x95, 0x9, 0x52, 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x1, 0x9d, 0xc6, 0x2b, 0x70, 0xe, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e, 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a, 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8, 0x5f, 0x4, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6, 0xb, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4, 0x58, 0x3, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, 0xc, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c, 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e, 0x73, 0x28, 0xb4, 0xef, 0x2, 0x59, 0x27, 0x7c, 0x91, 0xca, 0x56, 0xd, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62, 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3, 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x5, 0x5e, 0xc2, 0x99, 0x74, 0x2f, 0x51, 0xa, 0xe7, 0xbc, 0x20, 0x7b, 0x96, 0xcd}, + {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53, 0xa9, 0xf5, 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97, 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c, 0xe6, 0xba, 0x5e, 0x2, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60, 0x84, 0xd8, 0x51, 0xd, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a, 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x6, 0xe2, 0xbe, 0xed, 0xb1, 0x55, 0x9, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d, 0x69, 0x35, 0xbc, 0xe0, 0x4, 0x58, 0xb, 0x57, 0xb3, 0xef, 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b, 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7, 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c, 0xe5, 0xb9, 0x5d, 0x1, 0x52, 0xe, 0xea, 0xb6, 0x3f, 0x63, 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x3, 0x5f, 0xbb, 0xe7, 0xb4, 0xe8, 0xc, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b, 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9, 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x7, 0x5b, 0xd2, 0x8e, 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x8, 0x54, 0xb0, 0xec, 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90, 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14, 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, 0xff, 0xa3, 0x59, 0x5, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0, 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0xa}, + {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c, 0xb9, 0xe4, 0x3, 0x5e, 0xd0, 0x8d, 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x2, 0x5f, 0xb8, 0xe5, 0x6f, 0x32, 0xd5, 0x88, 0x6, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0, 0x7, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31, 0xbf, 0xe2, 0x5, 0x58, 0x4, 0x59, 0xbe, 0xe3, 0x6d, 0x30, 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0xd, 0x50, 0xc, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a, 0xdd, 0x80, 0xe, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0xf, 0x52, 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0xb, 0x56, 0xd8, 0x85, 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0xa, 0x57, 0xb0, 0xed, 0x8, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87, 0x60, 0x3d, 0xb3, 0xee, 0x9, 0x54, 0xa1, 0xfc, 0x1b, 0x46, 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96, 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93, 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb, 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b, 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0, 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21, 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e, 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4, 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c, 0x12, 0x4f, 0xa8, 0xf5}, + {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d, 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0xe, 0x43, 0x1d, 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0xf, 0x51, 0xb3, 0xed, 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x1, 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40, 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36, 0xb1, 0xef, 0xd, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, 0x4e, 0x10, 0x5d, 0x3, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda, 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85, 0x67, 0x39, 0xbe, 0xe0, 0x2, 0x5c, 0x98, 0xc6, 0x24, 0x7a, 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0xc, 0xee, 0xb0, 0x37, 0x69, 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x7, 0xe5, 0xbb, 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb, 0x9, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d, 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x8, 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e, 0xba, 0xe4, 0x6, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e, 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0, 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0xa, 0x8d, 0xd3, 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c, 0x61, 0x3f, 0xdd, 0x83, 0x4, 0x5a, 0xb8, 0xe6, 0x2d, 0x73, 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x5, 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0xb, 0x55, 0xb7, 0xe9}, + {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78, 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x4, 0xe5, 0xba, 0x3a, 0x65, 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf, 0xed, 0xb2, 0x53, 0xc, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9, 0x8, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95, 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x1, 0xe0, 0xbf, 0x3f, 0x60, 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c, 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x5, 0x5a, 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90, 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0xd, 0x52, 0xd2, 0x8d, 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x9, 0x89, 0xd6, 0x37, 0x68, 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3, 0x2, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f, 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x6, 0x86, 0xd9, 0x38, 0x67, 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0xe, 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0xa, 0x55, 0xb4, 0xeb, 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x3, 0x83, 0xdc, 0x3d, 0x62, 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24, 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x7, 0x58, 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, 0x12, 0x4d, 0xf, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f, 0x54, 0xb, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9, 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16}, + {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73, 0x69, 0x9, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc, 0x5c, 0x3c, 0x1, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b, 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x8, 0xa8, 0xc8, 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x2, 0x62, 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0xb, 0xab, 0xcb, 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84, 0x9e, 0xfe, 0x5e, 0x3e, 0x3, 0x63, 0xc3, 0xa3, 0xf7, 0x97, 0x37, 0x57, 0x6a, 0xa, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70, 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, 0x17, 0x77, 0x6d, 0xd, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50, 0x4, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43, 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76, 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0xc, 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x5, 0x65, 0xc5, 0xa5, 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0xf, 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8, 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, 0x7c, 0x1c, 0x6, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b, 0xf3, 0x93, 0x33, 0x53, 0x6e, 0xe, 0xae, 0xce, 0xd4, 0xb4, 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d, 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x7, 0x67, 0xc7, 0xa7}, + {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f, 0x9c, 0xfd, 0xc7, 0xa6, 0x5, 0x64, 0x71, 0x10, 0xb3, 0xd2, 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0xa, 0x6b, 0xc8, 0xa9, 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac, 0xf, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x4, 0xa7, 0xc6, 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x1, 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8, 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55, 0x6f, 0xe, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0xb, 0x31, 0x50, 0xf3, 0x92, 0xca, 0xab, 0x8, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84, 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37, 0xd, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c, 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x2, 0x63, 0x28, 0x49, 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x7, 0x66, 0xc5, 0xa4, 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0xc, 0x36, 0x57, 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba, 0xf1, 0x90, 0x33, 0x52, 0x68, 0x9, 0xaa, 0xcb, 0xde, 0xbf, 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0, 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, 0x67, 0x6, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77, 0x62, 0x3, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58}, + {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4, 0x6e, 0xc, 0xaa, 0xc8, 0xfb, 0x99, 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x8, 0x6a, 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89, 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14, 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x3, 0x30, 0x52, 0xf4, 0x96, 0x92, 0xf0, 0x56, 0x34, 0x7, 0x65, 0xc3, 0xa1, 0xcb, 0xa9, 0xf, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a, 0x69, 0xb, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d, 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42, 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1, 0xc2, 0xa0, 0x6, 0x64, 0x60, 0x2, 0xa4, 0xc6, 0xf5, 0x97, 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0xa, 0xe, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9, 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a, 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1, 0xf2, 0x90, 0x36, 0x54, 0x67, 0x5, 0xa3, 0xc1, 0xc5, 0xa7, 0x1, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a, 0x9, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0xd, 0x3e, 0x5c, 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d, 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22, 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1, 0xe2, 0x80, 0x26, 0x44}, + {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb, 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22, 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59, 0x6d, 0xe, 0xab, 0xc8, 0xc3, 0xa0, 0x5, 0x66, 0x52, 0x31, 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6, 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86, 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f, 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0xa, 0x69, 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x1, 0x35, 0x56, 0xf3, 0x90, 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45, 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x4, 0xa1, 0xc2, 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, 0xf, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3, 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca, 0x6f, 0xc, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33, 0x7, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20, 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x2, 0x61, 0x6a, 0x9, 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97, 0xa3, 0xc0, 0x65, 0x6, 0xd, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78, 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad, 0x8, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54, 0x60, 0x3, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb}, + {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0xe, 0x6a, 0xc6, 0xa2, 0x83, 0xe7, 0x4b, 0x2f, 0x9, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d, 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76, 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9, 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e, 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55, 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88, 0xa9, 0xcd, 0x61, 0x5, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, 0x66, 0x2, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0xb, 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0xc, 0x70, 0x14, 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb, 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58, 0x6c, 0x8, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0xf, 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x6, 0xaa, 0xce, 0xef, 0x8b, 0x27, 0x43, 0x65, 0x1, 0xad, 0xc9, 0xe8, 0x8c, 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0xd, 0x69, 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0xa, 0x6e, 0x46, 0x22, 0x8e, 0xea, 0xcb, 0xaf, 0x3, 0x67, 0x41, 0x25, 0x89, 0xed, 0xcc, 0xa8, 0x4, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72, 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39, 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c}, + {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38, 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59, 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c, 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, 0x61, 0x4, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0xb, 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12, 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x3, 0xac, 0xc9, 0xef, 0x8a, 0x25, 0x40, 0x69, 0xc, 0xa3, 0xc6, 0xe0, 0x85, 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x7, 0x62, 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x8, 0x6d, 0x5a, 0x3f, 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa, 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9, 0xee, 0x8b, 0x24, 0x41, 0x67, 0x2, 0xad, 0xc8, 0xe1, 0x84, 0x2b, 0x4e, 0x68, 0xd, 0xa2, 0xc7, 0xcc, 0xa9, 0x6, 0x63, 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x9, 0x6c, 0x4a, 0x2f, 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4, 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed, 0x42, 0x27, 0x1, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28, 0xe, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf, 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde, 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x5, 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0xa, 0x2c, 0x49, 0xe6, 0x83}, + {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48, 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x1, 0x39, 0x5f, 0xf5, 0x93, 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x2, 0x64, 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x3, 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12, 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x5, 0x2a, 0x4c, 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9, 0x81, 0xe7, 0x4d, 0x2b, 0x4, 0x62, 0xc8, 0xae, 0xe4, 0x82, 0x28, 0x4e, 0x61, 0x7, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59, 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x6, 0x60, 0x4f, 0x29, 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2, 0x6d, 0xb, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c, 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9, 0xc6, 0xa0, 0xa, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e, 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x9, 0x1f, 0x79, 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x8, 0x6e, 0xc4, 0xa2, 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, 0x9c, 0xfa, 0xc2, 0xa4, 0xe, 0x68, 0x47, 0x21, 0x8b, 0xed, 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a, 0x20, 0x46, 0x69, 0xf, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23, 0xc, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0xd, 0x22, 0x44, 0xee, 0x88, 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f}, + {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x9, 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x4, 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb, 0xc3, 0xa4, 0xd, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0, 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x1, 0xa8, 0xcf, 0xc6, 0xa1, 0x8, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70, 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x5, 0x62, 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3, 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2, 0x6b, 0xc, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44, 0x6c, 0xb, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb, 0xcc, 0xab, 0x2, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6, 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27, 0xf, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x6, 0x2e, 0x49, 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98, 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0xa, 0x6d, 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82, 0xaa, 0xcd, 0x64, 0x3, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, 0x7b, 0x1c, 0x69, 0xe, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41, 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30, 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1, 0xc9, 0xae, 0x7, 0x60}, + {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62, 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1, 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39, 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a, 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77, 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0, 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x1, 0x69, 0x6c, 0x4, 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0xe, 0xb, 0x63, 0xdb, 0xb3, 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91, 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8, 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b, 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98, 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f, 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x7, 0x2, 0x6a, 0xd2, 0xba, 0xd8, 0xb0, 0x8, 0x60, 0x65, 0xd, 0xb5, 0xdd, 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e, 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99, 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea, 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8, 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f, 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x6, 0xbe, 0xd6, 0xd3, 0xbb, 0x3, 0x6b, 0x9, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0xc}, + {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d, 0xde, 0xb7, 0xc, 0x65, 0x67, 0xe, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0xa, 0x8, 0x61, 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3, 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16, 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab, 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32, 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87, 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45, 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22, 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7, 0x6c, 0x5, 0x7, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x3, 0x6a, 0x68, 0x1, 0xba, 0xd3, 0x60, 0x9, 0xb2, 0xdb, 0xd9, 0xb0, 0xb, 0x62, 0xf, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0xd, 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a, 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3, 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56, 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb, 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d, 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98, 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3}, + {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5, 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab, 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x8, 0x3, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53, 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82, 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf, 0xc4, 0xae, 0x10, 0x7a, 0x6, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, 0x67, 0xd, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55, 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda, 0x64, 0xe, 0x5, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79, 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f, 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0, 0x6e, 0x4, 0xf, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c, 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, 0xf4, 0x9e, 0xc, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x7, 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6, 0x68, 0x2, 0x9, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75, 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e, 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0xa, 0x60, 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x1, 0x93, 0xf9, 0x47, 0x2d, 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, 0x85, 0xef}, + {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95, 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c, 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92, 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0xb, 0xb6, 0xdd, 0xd1, 0xba, 0x7, 0x6c, 0xdf, 0xb4, 0x9, 0x62, 0x6e, 0x5, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d, 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55, 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc, 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x2, 0xe, 0x65, 0xd8, 0xb3, 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7, 0xa, 0x61, 0x6d, 0x6, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0, 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e, 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7, 0x6a, 0x1, 0xd, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e, 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, 0x1b, 0x70, 0x3, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0xf, 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96, 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20, 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, 0x85, 0xee, 0x63, 0x8, 0xb5, 0xde, 0xd2, 0xb9, 0x4, 0x6f, 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10}, + {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x8, 0xbc, 0xd0, 0x1, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a, 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b, 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x9, 0xbd, 0xd1, 0x2, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b, 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0, 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f, 0x66, 0xa, 0xbe, 0xd2, 0x3, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d, 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6, 0x12, 0x7e, 0x67, 0xb, 0xbf, 0xd3, 0x4, 0x68, 0xdc, 0xb0, 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93, 0xcd, 0xa1, 0x15, 0x79, 0x60, 0xc, 0xb8, 0xd4, 0x5, 0x69, 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6, 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0xd, 0xb9, 0xd5, 0x6, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d, 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c, 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0xe, 0xba, 0xd6, 0x7, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e, 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5, 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a, 0x63, 0xf, 0xbb, 0xd7}, + {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51, 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc, 0xb, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96, 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x3, 0xb4, 0xd9, 0xc7, 0xaa, 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x8, 0x16, 0x7b, 0xcc, 0xa1, 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f, 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0xd, 0x60, 0xd7, 0xba, 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x6, 0x6b, 0x75, 0x18, 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d, 0x63, 0xe, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41, 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a, 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x5, 0x1b, 0x76, 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a, 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0xf, 0xb8, 0xd5, 0x1a, 0x77, 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x4, 0x55, 0x38, 0x8f, 0xe2, 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0xc, 0x61, 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4, 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19, 0xae, 0xc3, 0xdd, 0xb0, 0x7, 0x6a, 0xc6, 0xab, 0x1c, 0x71, 0x6f, 0x2, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46, 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x9, 0xe7, 0x8a, 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f, 0x1, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, 0xa, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28}, + {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c, 0xb, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56, 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x1, 0xef, 0x81, 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0xa, 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, 0xfb, 0x95, 0xd5, 0xbb, 0x9, 0x67, 0x70, 0x1e, 0xac, 0xc2, 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15, 0xa7, 0xc9, 0xde, 0xb0, 0x2, 0x6c, 0xc3, 0xad, 0x1f, 0x71, 0x66, 0x8, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, 0xed, 0x83, 0x6d, 0x3, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a, 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77, 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0xe, 0x4e, 0x20, 0x92, 0xfc, 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x5, 0x12, 0x7c, 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7, 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0xf, 0x61, 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44, 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x4, 0x6a, 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c, 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x7, 0xb5, 0xdb, 0x35, 0x5b, 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0xc, 0xbe, 0xd0, 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x6, 0x68, 0x7f, 0x11, 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a, 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0xd, 0x63, 0x23, 0x4d, 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34}, + {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0xf, 0x1f, 0x70, 0xc1, 0xae, 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0xe, 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f, 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x1, 0x6e, 0x7e, 0x11, 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90, 0xc2, 0xad, 0x1c, 0x73, 0x63, 0xc, 0xbd, 0xd2, 0x9d, 0xf2, 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd, 0xdd, 0xb2, 0x3, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x2, 0x6d, 0xdc, 0xb3, 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72, 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0xd, 0x42, 0x2d, 0x9c, 0xf3, 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x8, 0xb9, 0xd6, 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17, 0xa6, 0xc9, 0xd9, 0xb6, 0x7, 0x68, 0xf8, 0x97, 0x26, 0x49, 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x6, 0x69, 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56, 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x9, 0x5b, 0x34, 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x4, 0x6b, 0xda, 0xb5, 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0xb, 0x1b, 0x74, 0xc5, 0xaa, 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0xa, 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35, 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x5, 0x6a, 0x7a, 0x15, 0xa4, 0xcb}, + {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea, 0x53, 0x23, 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64, 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0xb, 0x9b, 0xeb, 0x1, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c, 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22, 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1, 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, 0xcb, 0xbb, 0x2, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f, 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x8, 0x98, 0xe8, 0xf7, 0x87, 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0, 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x9, 0x99, 0xe9, 0x3, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e, 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0xf, 0x9f, 0xef, 0x5, 0x75, 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61, 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, 0x6b, 0x1b, 0x4, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49, 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0xe, 0x9e, 0xee, 0x57, 0x27, 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60, 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19, 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0xd, 0x9d, 0xed, 0x7, 0x77, 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5, 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, 0xcf, 0xbf, 0x6, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b, 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0xc, 0x9c, 0xec}, + {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, 0x78, 0x9, 0xec, 0x9d, 0xe, 0x7f, 0x35, 0x44, 0xd7, 0xa6, 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58, 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56, 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b, 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23, 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e, 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x4, 0x4e, 0x3f, 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x3, 0x72, 0xd4, 0xa5, 0x36, 0x47, 0xd, 0x7c, 0xef, 0x9e, 0x7b, 0xa, 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1, 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b, 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5, 0x46, 0x37, 0x7d, 0xc, 0x9f, 0xee, 0xb, 0x7a, 0xe9, 0x98, 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x5, 0x74, 0x3e, 0x4f, 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x2, 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x8, 0x79, 0x9c, 0xed, 0x7e, 0xf, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x1, 0x92, 0xe3, 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x6, 0x77, 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff, 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87, 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca, 0x80, 0xf1, 0x62, 0x13}, + {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4, 0x73, 0x1, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6, 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x2, 0x70, 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, 0x60, 0x12, 0x95, 0xe7, 0x71, 0x3, 0x40, 0x32, 0xa4, 0xd6, 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3, 0x35, 0x47, 0x4, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0, 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x5, 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56, 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x6, 0x74, 0x80, 0xf2, 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2, 0x91, 0xe3, 0x75, 0x7, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc, 0x8, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe, 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x9, 0x9f, 0xed, 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, 0x68, 0x1a, 0xee, 0x9c, 0xa, 0x78, 0x3b, 0x49, 0xdf, 0xad, 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef, 0x79, 0xb, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8, 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0xc, 0x7e, 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e, 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0xd, 0x9b, 0xe9, 0x88, 0xfa, 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9, 0xea, 0x98, 0xe, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0xf}, + {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6, 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0xd, 0x7e, 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82, 0x79, 0xa, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6, 0x43, 0x30, 0x74, 0x7, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f, 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x4, 0x40, 0x33, 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a, 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e, 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x9, 0x57, 0x24, 0xb1, 0xc2, 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0xe, 0x7d, 0x39, 0x4a, 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x3, 0x70, 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c, 0xd9, 0xaa, 0xee, 0x9d, 0x8, 0x7b, 0x80, 0xf3, 0x66, 0x15, 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, 0x6b, 0x18, 0xe3, 0x90, 0x5, 0x76, 0x32, 0x41, 0xd4, 0xa7, 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35, 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x2, 0x9a, 0xe9, 0x7c, 0xf, 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0xc, 0x99, 0xea, 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe, 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x1, 0x94, 0xe7, 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x6, 0x75, 0xe0, 0x93, 0xb, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7, 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0}, + {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42, 0x94, 0xe0, 0x7c, 0x8, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52, 0xce, 0xba, 0xeb, 0x9f, 0x3, 0x77, 0xa1, 0xd5, 0x49, 0x3d, 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0xb, 0x97, 0xe3, 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf, 0x23, 0x57, 0x6, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3, 0x92, 0xe6, 0x7a, 0xe, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b, 0xed, 0x99, 0x5, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0xd, 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62, 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x4, 0x55, 0x21, 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e, 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0xc, 0x78, 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22, 0x73, 0x7, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc, 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0xf, 0x7b, 0xd4, 0xa0, 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf, 0x9e, 0xea, 0x76, 0x2, 0xc7, 0xb3, 0x2f, 0x5b, 0xa, 0x7e, 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11, 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x1, 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x9, 0x7d, 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, 0x43, 0x37}, + {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x3, 0x76, 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13, 0x45, 0x30, 0xaf, 0xda, 0x6, 0x73, 0xec, 0x99, 0xcf, 0xba, 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf, 0x5, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff, 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0xc, 0x79, 0xe6, 0x93, 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, 0xa0, 0xd5, 0xf, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59, 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0xa, 0x7f, 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a, 0x4c, 0x39, 0xa6, 0xd3, 0x9, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0, 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2, 0x7d, 0x8, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84, 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0xb, 0x5d, 0x28, 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48, 0x91, 0xe4, 0x7b, 0xe, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68, 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0xd, 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, 0x37, 0x42, 0x9b, 0xee, 0x71, 0x4, 0x52, 0x27, 0xb8, 0xcd, 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed, 0x72, 0x7, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d, 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x2, 0x54, 0x21, 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47, 0x9e, 0xeb, 0x74, 0x1, 0x57, 0x22, 0xbd, 0xc8}, + {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb, 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87, 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf, 0x90, 0xe6, 0x7c, 0xa, 0xc2, 0xb4, 0x2e, 0x58, 0x7, 0x71, 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x9, 0x7f, 0xe5, 0x93, 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x4, 0xff, 0x89, 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2, 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62, 0x99, 0xef, 0x75, 0x3, 0x5c, 0x2a, 0xb0, 0xc6, 0xe, 0x78, 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f, 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x5, 0x9f, 0xe9, 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x8, 0x7e, 0xe3, 0x95, 0xf, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x2, 0x98, 0xee, 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18, 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8, 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0xc, 0x96, 0xe0, 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x1, 0x77, 0x28, 0x5e, 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x6, 0x70, 0xb8, 0xce, 0x54, 0x22, 0x7d, 0xb, 0x91, 0xe7, 0x1c, 0x6a, 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11, 0x4e, 0x38, 0xa2, 0xd4}, + {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7, 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0xc, 0x7b, 0xbc, 0xcb, 0x52, 0x25, 0x7d, 0xa, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf, 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d, 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb, 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a, 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68, 0xca, 0xbd, 0x24, 0x53, 0xb, 0x7c, 0xe5, 0x92, 0x55, 0x22, 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0xd, 0xe9, 0x9e, 0x7, 0x70, 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x1, 0x98, 0xef, 0xb7, 0xc0, 0x59, 0x2e, 0x5, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d, 0x9a, 0xed, 0x74, 0x3, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51, 0xc8, 0xbf, 0xe7, 0x90, 0x9, 0x7e, 0xb9, 0xce, 0x57, 0x20, 0x78, 0xf, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84, 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88, 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10, 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2, 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8, 0x21, 0x56, 0xe, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9, 0x91, 0xe6, 0x7f, 0x8, 0xec, 0x9b, 0x2, 0x75, 0x2d, 0x5a, 0xc3, 0xb4, 0x73, 0x4, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b}, + {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b, 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce, 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10, 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x7, 0x72, 0xa, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x4, 0x71, 0x9, 0x81, 0xf9, 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27, 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58, 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, 0x3a, 0x42, 0x3, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0xe, 0x76, 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae, 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9, 0xcc, 0xb4, 0x3c, 0x44, 0x5, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, 0x8, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97, 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2, 0x7a, 0x2, 0x77, 0xf, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36, 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8, 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16, 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x1, 0x74, 0xc, 0x84, 0xfc, 0x6, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, 0xb, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94, 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a, 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47}, + {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1, 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2, 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0xd, 0x86, 0xff, 0x8d, 0xf4, 0x7f, 0x6, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5, 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd, 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63, 0x11, 0x68, 0xe3, 0x9a, 0x7, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, 0xc, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2, 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0xa, 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x1, 0x9c, 0xe5, 0x6e, 0x17, 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb, 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x3, 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x8, 0xcd, 0xb4, 0x3f, 0x46, 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, 0x29, 0x50, 0xe, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x5, 0x7c, 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x4, 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0xf, 0x92, 0xeb, 0x60, 0x19, 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23, 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x9, 0x70, 0xfb, 0x82, 0xf0, 0x89, 0x2, 0x7b, 0x25, 0x5c, 0xd7, 0xae, 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, 0xc1, 0xb8}, + {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c, 0xf3, 0x89, 0x7, 0x7d, 0x6, 0x7c, 0xf2, 0x88, 0x4, 0x7e, 0xf0, 0x8a, 0xf1, 0x8b, 0x5, 0x7f, 0xfb, 0x81, 0xf, 0x75, 0xe, 0x74, 0xfa, 0x80, 0xc, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0xd, 0x77, 0x8, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x9, 0x73, 0xff, 0x85, 0xb, 0x71, 0xa, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65, 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63, 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a, 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69, 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f, 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46, 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6, 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b, 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9, 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba, 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac, 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e, 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55, 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53, 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4}, + {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60, 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f, 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5, 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7, 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33, 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a, 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0xb, 0x86, 0xfd, 0x81, 0xfa, 0x77, 0xc, 0x8f, 0xf4, 0x79, 0x2, 0x7e, 0x5, 0x88, 0xf3, 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17, 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6, 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4, 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb, 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2, 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x8, 0x85, 0xfe, 0x82, 0xf9, 0x74, 0xf, 0x8c, 0xf7, 0x7a, 0x1, 0x7d, 0x6, 0x8b, 0xf0, 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64, 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x3, 0x78, 0xf5, 0x8e, 0xf2, 0x89, 0x4, 0x7f, 0xfc, 0x87, 0xa, 0x71, 0xd, 0x76, 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47, 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3, 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa, 0xd6, 0xad, 0x20, 0x5b}, + {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae, 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x2, 0x86, 0xfa, 0x54, 0x28, 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf, 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x4, 0x78, 0x11, 0x6d, 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1, 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x6, 0x76, 0xa, 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35, 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x8, 0x74, 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b, 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6, 0x72, 0xe, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a, 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, 0xc, 0x70, 0xec, 0x90, 0x14, 0x68, 0x1, 0x7d, 0xf9, 0x85, 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x3, 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c, 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x5, 0x79, 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff, 0x7b, 0x7, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e, 0x77, 0xb, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, 0x48, 0x34, 0x9, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60, 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd, 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2, 0x8b, 0xf7, 0x73, 0xf, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, 0x27, 0x5b, 0xf5, 0x89, 0xd, 0x71, 0x18, 0x64, 0xe0, 0x9c}, + {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x4, 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x8, 0x75, 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5, 0x62, 0x1f, 0x71, 0xc, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0, 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, 0x25, 0x58, 0xf9, 0x84, 0x3, 0x7e, 0x10, 0x6d, 0xea, 0x97, 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x7, 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa, 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0xb, 0x76, 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0, 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0xf, 0x6c, 0x11, 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x2, 0xa3, 0xde, 0x59, 0x24, 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x6, 0x7b, 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e, 0x77, 0xa, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5, 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0xe, 0x73, 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34, 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x1, 0x86, 0xfb, 0xd9, 0xa4, 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91, 0xff, 0x82, 0x5, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x9, 0x67, 0x1a, 0x9d, 0xe0, 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0xd, 0x70, 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63}, + {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4, 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x3, 0x7b, 0x5, 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e, 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x6, 0x78, 0xf6, 0x88, 0xa, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f, 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7, 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x9, 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0xf, 0x68, 0x16, 0x94, 0xea, 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40, 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b, 0xc, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0xd, 0x73, 0x14, 0x6a, 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41, 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb, 0x69, 0x17, 0x70, 0xe, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x8, 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e, 0xee, 0x90, 0x12, 0x6c, 0xb, 0x75, 0xf7, 0x89, 0x7, 0x79, 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52, 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x4, 0x7c, 0x2, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5, 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d, 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, 0x1, 0x7f}, + {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc, 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x3, 0x82, 0xfd, 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x5, 0x65, 0x1a, 0x9b, 0xe4, 0xf8, 0x87, 0x6, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58, 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37, 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, 0x76, 0x9, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0xa, 0x75, 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92, 0x13, 0x6c, 0xc, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3, 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0xf, 0x8e, 0xf1, 0x71, 0xe, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1, 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53, 0x33, 0x4c, 0xcd, 0xb2, 0xd, 0x72, 0xf3, 0x8c, 0xec, 0x93, 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a, 0xf5, 0x8a, 0xb, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6, 0x77, 0x8, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7, 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x7, 0x78, 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x4, 0xbb, 0xc4, 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d, 0x7d, 0x2, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f, 0xe0, 0x9f, 0x1e, 0x61, 0x1, 0x7e, 0xff, 0x80}, + {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x1, 0xa6, 0x26, 0xbb, 0x3b, 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39, 0xa4, 0x24, 0x83, 0x3, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8, 0x1f, 0x9f, 0x2, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, 0x76, 0xf6, 0x87, 0x7, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20, 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef, 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x6, 0x86, 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x4, 0x84, 0x19, 0x99, 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x5, 0xd6, 0x56, 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0xe, 0x8e, 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c, 0x8f, 0xf, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e, 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37, 0x90, 0x10, 0x8d, 0xd, 0x36, 0xb6, 0x2b, 0xab, 0xc, 0x8c, 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5, 0x94, 0x14, 0x89, 0x9, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60, 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1, 0x46, 0xc6, 0x5b, 0xdb, 0x8, 0x88, 0x15, 0x95, 0x32, 0xb2, 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe, 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0xa, 0x8a, 0xb1, 0x31, 0xac, 0x2c, 0x8b, 0xb, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58, 0xff, 0x7f, 0xe2, 0x62}, + {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc, 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x5, 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73, 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0xf, 0xaf, 0x2e, 0xb0, 0x31, 0x15, 0x94, 0xa, 0x8b, 0x2b, 0xaa, 0x34, 0xb5, 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46, 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25, 0x85, 0x4, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x1, 0x80, 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3, 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0xb, 0x8a, 0x56, 0xd7, 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c, 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, 0x8f, 0xe, 0x93, 0x12, 0x8c, 0xd, 0xad, 0x2c, 0xb2, 0x33, 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea, 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x8, 0x89, 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, 0x5f, 0xde, 0x2, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2, 0x86, 0x7, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b, 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca, 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, 0x9, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0xc, 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38, 0xa6, 0x27, 0x87, 0x6, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b, 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x3, 0x82, 0x1c, 0x9d}, + {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53, 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, 0x87, 0x5, 0x8d, 0xf, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24, 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7, 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba, 0x13, 0x91, 0xa, 0x88, 0x7, 0x85, 0x1e, 0x9c, 0x35, 0xb7, 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca, 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29, 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x2, 0x8a, 0x8, 0x93, 0x11, 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb, 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0xd, 0x8f, 0xe, 0x8c, 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1, 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0xb, 0x83, 0x1, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65, 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0, 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, 0x4, 0x86, 0x9, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0, 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43, 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e, 0x97, 0x15, 0x8e, 0xc, 0x84, 0x6, 0x9d, 0x1f, 0xb6, 0x34, 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49, 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa, 0x31, 0xb3, 0x1a, 0x98, 0x3, 0x81}, + {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76, 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x1, 0x99, 0x1a, 0xad, 0x2e, 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x3, 0xc1, 0x42, 0xda, 0x59, 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, 0x58, 0xdb, 0x19, 0x9a, 0x2, 0x81, 0x2f, 0xac, 0x34, 0xb7, 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8, 0x30, 0xb3, 0x1d, 0x9e, 0x6, 0x85, 0x9f, 0x1c, 0x84, 0x7, 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44, 0x86, 0x5, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1, 0x29, 0xaa, 0x4, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6, 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0xd, 0x95, 0x16, 0xb8, 0x3b, 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c, 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9, 0x21, 0xa2, 0xc, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb, 0x15, 0x96, 0xe, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55, 0x97, 0x14, 0x8c, 0xf, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a, 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d, 0x93, 0x10, 0x88, 0xb, 0x11, 0x92, 0xa, 0x89, 0x27, 0xa4, 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3, 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x8, 0x8b, 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24, 0x8a, 0x9, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, 0xfd, 0x7e}, + {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c, 0xbd, 0x39, 0x82, 0x6, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d, 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, 0x72, 0xf6, 0x19, 0x9d, 0xc, 0x88, 0x33, 0xb7, 0x26, 0xa2, 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35, 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0xa, 0x9a, 0x1e, 0x8f, 0xb, 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0xd, 0x89, 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53, 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x7, 0x96, 0x12, 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x1, 0x85, 0x14, 0x90, 0x29, 0xad, 0x3c, 0xb8, 0x3, 0x87, 0x16, 0x92, 0x7d, 0xf9, 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x5, 0x94, 0x10, 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf, 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0xf, 0x8b, 0xcc, 0x48, 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x9, 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, 0x8c, 0x8, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c, 0x1b, 0x9f, 0xe, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb, 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f, 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x4, 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed, 0x2, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9}, + {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, 0x81, 0x4, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58, 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4, 0x26, 0xa3, 0x1f, 0x9a, 0x8, 0x8d, 0xd5, 0x50, 0xc2, 0x47, 0xfb, 0x7e, 0xec, 0x69, 0x89, 0xc, 0x9e, 0x1b, 0xa7, 0x22, 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66, 0x86, 0x3, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7, 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac, 0x10, 0x95, 0x7, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, 0x8e, 0xb, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57, 0xf, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6, 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b, 0x87, 0x2, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, 0xcc, 0x49, 0x11, 0x94, 0x6, 0x83, 0x3f, 0xba, 0x28, 0xad, 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41, 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0xa, 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0xe, 0x8b, 0x19, 0x9c, 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa, 0x38, 0xbd, 0x1, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59, 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x5, 0xb9, 0x3c, 0xae, 0x2b, 0x1e, 0x9b, 0x9, 0x8c, 0x30, 0xb5, 0x27, 0xa2, 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23, 0xb1, 0x34, 0x88, 0xd, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68, 0xd4, 0x51, 0xc3, 0x46}, + {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1, 0x88, 0xe, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a, 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0xd, 0x8b, 0x1c, 0x9a, 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, 0x7a, 0xfc, 0x85, 0x3, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30, 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c, 0xb, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9, 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x5, 0xb0, 0x36, 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63, 0x17, 0x91, 0x6, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5, 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x8, 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x7, 0x81, 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a, 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x9, 0xf8, 0x7e, 0xe9, 0x6f, 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, 0xa, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8, 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x4, 0xf5, 0x73, 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9, 0xc, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x2, 0x95, 0x13, 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5, 0x32, 0xb4, 0x1, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0, 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0xf, 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a}, + {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0xc, 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0xb, 0x8c, 0x18, 0x9f, 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32, 0xa6, 0x21, 0x93, 0x14, 0x80, 0x7, 0xf9, 0x7e, 0xea, 0x6d, 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, 0x6f, 0xe8, 0x16, 0x91, 0x5, 0x82, 0x30, 0xb7, 0x23, 0xa4, 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x9, 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3, 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, 0xe, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d, 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x2, 0x96, 0x11, 0xb4, 0x33, 0xa7, 0x20, 0x92, 0x15, 0x81, 0x6, 0xf8, 0x7f, 0xeb, 0x6c, 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0xa, 0x8d, 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2, 0x99, 0x1e, 0x8a, 0xd, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52, 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x1, 0x86, 0x12, 0x95, 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c, 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x3, 0x97, 0x10, 0x76, 0xf1, 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae, 0x1c, 0x9b, 0xf, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, 0xf6, 0x71, 0x8f, 0x8, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d, 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90, 0x4, 0x83, 0x31, 0xb6, 0x22, 0xa5}, + {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7, 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58, 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61, 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, 0xaf, 0x27, 0x8c, 0x4, 0x81, 0x9, 0x96, 0x1e, 0x9b, 0x13, 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x1, 0x84, 0xc, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50, 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2, 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x5, 0x8d, 0x8, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4, 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc, 0xf, 0x87, 0x2, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3, 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32, 0xad, 0x25, 0xa0, 0x28, 0x83, 0xb, 0x8e, 0x6, 0x99, 0x11, 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40, 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52, 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b, 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, 0xa5, 0x2d, 0x86, 0xe, 0x8b, 0x3, 0x9c, 0x14, 0x91, 0x19, 0xa, 0x82, 0x7, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6, 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7, 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, 0x41, 0xc9}, + {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1, 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2, 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54, 0x88, 0x1, 0x87, 0xe, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d, 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b, 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, 0xd0, 0x59, 0x85, 0xc, 0x8a, 0x3, 0x9b, 0x12, 0x94, 0x1d, 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0xd, 0x84, 0x2, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7, 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1, 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52, 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19, 0x81, 0x8, 0x8e, 0x7, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x9, 0x80, 0x6, 0x8f, 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6, 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5, 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x4, 0x8d, 0xb, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe, 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7, 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c, 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x5, 0x83, 0xa, 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36}, + {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd, 0x90, 0x1a, 0x99, 0x13, 0x82, 0x8, 0x8b, 0x1, 0xb4, 0x3e, 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b, 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac, 0x19, 0x93, 0x10, 0x9a, 0xb, 0x81, 0x2, 0x88, 0x75, 0xff, 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2, 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, 0xb6, 0x3c, 0x89, 0x3, 0x80, 0xa, 0x9b, 0x11, 0x92, 0x18, 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b, 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9, 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3, 0x16, 0x9c, 0x1f, 0x95, 0x4, 0x8e, 0xd, 0x87, 0xea, 0x60, 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d, 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, 0xb9, 0x33, 0x86, 0xc, 0x8f, 0x5, 0x94, 0x1e, 0x9d, 0x17, 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9, 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0xf, 0x85, 0x6, 0x8c, 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46, 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15, 0x96, 0x1c, 0x8d, 0x7, 0x84, 0xe, 0xbb, 0x31, 0xb2, 0x38, 0xa9, 0x23, 0xa0, 0x2a}, + {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba, 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff, 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30, 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x1, 0x81, 0xa, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e, 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6, 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1, 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, 0x38, 0xb3, 0x9, 0x82, 0x2, 0x89, 0x1f, 0x94, 0x14, 0x9f, 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a, 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15, 0x83, 0x8, 0x88, 0x3, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c, 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29, 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x5, 0x85, 0xe, 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0, 0x12, 0x99, 0x19, 0x92, 0x4, 0x8f, 0xf, 0x84, 0x3e, 0xb5, 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0xc, 0x8c, 0x7, 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49, 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc, 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b, 0xd, 0x86, 0x6, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5}, + {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1, 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3, 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4, 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5, 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37, 0x88, 0x4, 0x8d, 0x1, 0x82, 0xe, 0x87, 0xb, 0x9c, 0x10, 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79, 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b, 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1, 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0, 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2, 0xd, 0x81, 0x8, 0x84, 0x7, 0x8b, 0x2, 0x8e, 0x19, 0x95, 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac, 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e, 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59, 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48, 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a, 0x85, 0x9, 0x80, 0xc, 0x8f, 0x3, 0x8a, 0x6, 0x91, 0x1d, 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12}, + {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc, 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd, 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6, 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0, 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71, 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52, 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14, 0x8c, 0x1, 0x8b, 0x6, 0x82, 0xf, 0x85, 0x8, 0xa8, 0x25, 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e, 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45, 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74, 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27, 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11, 0x89, 0x4, 0x8e, 0x3, 0x87, 0xa, 0x80, 0xd, 0x3d, 0xb0, 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab, 0x2f, 0xa2, 0x28, 0xa5, 0x5, 0x88, 0x2, 0x8f, 0xb, 0x86, 0xc, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d, 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc, 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff, 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, 0x60, 0xed}, + {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89, 0x8, 0x86, 0x9, 0x87, 0xa, 0x84, 0xb, 0x85, 0xc, 0x82, 0xd, 0x83, 0xe, 0x80, 0xf, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99, 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92, 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf, 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5, 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe, 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb, 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1, 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca, 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7, 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd, 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6, 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3, 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9, 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2, 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff, 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5, 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1}, + {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e, 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3, 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4, 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea, 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7, 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8, 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6, 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb, 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c, 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52, 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f, 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70, 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e, 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23, 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34, 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a, 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x7, 0x8b, 0x4, 0x8e, 0x1, 0x8d, 0x2, 0x84, 0xb, 0x87, 0x8, 0x82, 0xd, 0x81, 0xe}, + {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23, 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x1, 0x91, 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a, 0x8d, 0x1d, 0xb0, 0x20, 0x3, 0x93, 0x3e, 0xae, 0x79, 0xe9, 0x44, 0xd4, 0x2, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5, 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63, 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x7, 0x97, 0x3a, 0xaa, 0x7d, 0xed, 0x40, 0xd0, 0x6, 0x96, 0x3b, 0xab, 0x7c, 0xec, 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25, 0x4, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60, 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c, 0x8b, 0x1b, 0xb6, 0x26, 0x5, 0x95, 0x38, 0xa8, 0x7f, 0xef, 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c, 0xf, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0xe, 0x9e, 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57, 0x80, 0x10, 0xbd, 0x2d, 0xc, 0x9c, 0x31, 0xa1, 0x76, 0xe6, 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f, 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0xd, 0x9d, 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x8, 0x98, 0x35, 0xa5, 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a, 0x9, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f, 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0xb, 0x9b, 0x36, 0xa6, 0x71, 0xe1, 0x4c, 0xdc, 0xa, 0x9a, 0x37, 0xa7, 0x70, 0xe0, 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29}, + {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b, 0x9b, 0xa, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x7, 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3, 0xd, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60, 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0xe, 0x9f, 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b, 0x95, 0x4, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca, 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x9, 0xa7, 0x36, 0xe6, 0x77, 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51, 0x7d, 0xec, 0x42, 0xd3, 0x3, 0x92, 0x3c, 0xad, 0x7b, 0xea, 0x44, 0xd5, 0x5, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29, 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0xf, 0xa1, 0x30, 0xe0, 0x71, 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2, 0xac, 0x3d, 0x93, 0x2, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1, 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7, 0x37, 0xa6, 0x8, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18, 0x34, 0xa5, 0xb, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc, 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f, 0xaf, 0x3e, 0x90, 0x1, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0xc, 0xa2, 0x33, 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x6, 0x97, 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6}, + {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0xc, 0x31, 0xa3, 0x8, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25, 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8, 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f, 0x73, 0xe1, 0x4a, 0xd8, 0x1, 0x93, 0x38, 0xaa, 0x97, 0x5, 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0xd, 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, 0x9, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d, 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83, 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e, 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x6, 0xad, 0x3f, 0x2, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb, 0x33, 0xa1, 0xa, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45, 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0xe, 0x51, 0xc3, 0x68, 0xfa, 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d, 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x7, 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda, 0x3, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, 0xb, 0x99, 0xa4, 0x36, 0x9d, 0xf, 0xd6, 0x44, 0xef, 0x7d, 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54, 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c, 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, 0x58, 0xca}, + {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32, 0xc5, 0x56, 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81, 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x4, 0xac, 0x3f, 0xe1, 0x72, 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0xd, 0x9e, 0x36, 0xa5, 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d, 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x8, 0x9b, 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, 0x92, 0x1, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28, 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37, 0x9f, 0xc, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0, 0x3e, 0xad, 0x5, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53, 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19, 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0xb, 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, 0x2, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f, 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7, 0xf, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70, 0xae, 0x3d, 0x95, 0x6, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67, 0x90, 0x3, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef, 0x47, 0xd4, 0xa, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a, 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, 0x63, 0xf0, 0x7, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9, 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0xe, 0xa6, 0x35}, + {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0xb, 0x9f, 0x3e, 0xaa, 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37, 0x96, 0x2, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63, 0xa8, 0x3c, 0x9d, 0x9, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25, 0x3a, 0xae, 0xf, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf, 0x6e, 0xfa, 0x31, 0xa5, 0x4, 0x90, 0x8f, 0x1b, 0xba, 0x2e, 0xe5, 0x71, 0xd0, 0x44, 0x99, 0xd, 0xac, 0x38, 0xf3, 0x67, 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86, 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c, 0xcd, 0x59, 0x92, 0x6, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60, 0xab, 0x3f, 0x9e, 0xa, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf, 0xa0, 0x34, 0x95, 0x1, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22, 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3, 0x8, 0x9c, 0x3d, 0xa9, 0x3, 0x97, 0x36, 0xa2, 0x69, 0xfd, 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c, 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f, 0xce, 0x5a, 0x91, 0x5, 0xa4, 0x30, 0x9a, 0xe, 0xaf, 0x3b, 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x7, 0x93, 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79, 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0xc, 0x98, 0x53, 0xc7, 0x66, 0xf2}, + {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10, 0xa5, 0x30, 0x92, 0x7, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec, 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5, 0x39, 0xac, 0xe, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x9, 0xab, 0x3e, 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b, 0x99, 0xc, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0, 0x1c, 0x89, 0x2b, 0xbe, 0xb, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b, 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x2, 0xa0, 0x35, 0x25, 0xb0, 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe, 0x32, 0xa7, 0x5, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d, 0x9d, 0x8, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71, 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0xf, 0x9a, 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x6, 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa, 0x58, 0xcd, 0x1, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d, 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x4, 0x91, 0x5d, 0xc8, 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86, 0x96, 0x3, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d, 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6, 0xa, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0xd}, + {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1, 0x95, 0x3, 0xa4, 0x32, 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, 0x2, 0x94, 0x37, 0xa1, 0x6, 0x90, 0x55, 0xc3, 0x64, 0xf2, 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x7, 0xa0, 0x36, 0xa2, 0x34, 0x93, 0x5, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1, 0x4, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0xc, 0x9a, 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0xd, 0xc8, 0x5e, 0xf9, 0x6f, 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0xf, 0xa8, 0x3e, 0x3f, 0xa9, 0xe, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe, 0x3b, 0xad, 0xa, 0x9c, 0x9d, 0xb, 0xac, 0x3a, 0xff, 0x69, 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x9, 0x8, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a, 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf, 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48, 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9, 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9, 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77, 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1, 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44, 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84, 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42, 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11}, + {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47, 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80, 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f, 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x5, 0xa1, 0x36, 0xf4, 0x63, 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0xb, 0x9c, 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75, 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0xf, 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x1, 0x96, 0x32, 0xa5, 0x39, 0xae, 0xa, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb, 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x4, 0xa0, 0x37, 0xbc, 0x2b, 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4, 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, 0x9, 0x9e, 0x90, 0x7, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52, 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82, 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef, 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0xc, 0x2, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5, 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a, 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x6, 0xa2, 0x35, 0x3b, 0xac, 0x8, 0x9f, 0x5d, 0xca, 0x6e, 0xf9, 0x65, 0xf2, 0x56, 0xc1, 0x3, 0x94, 0x30, 0xa7, 0xa9, 0x3e, 0x9a, 0xd, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44, 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, 0x79, 0xee}, + {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed, 0x58, 0xc0, 0x2f, 0xb7, 0x2, 0x9a, 0xc1, 0x59, 0xec, 0x74, 0x9b, 0x3, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, 0x9d, 0x5, 0x5e, 0xc6, 0x73, 0xeb, 0x4, 0x9c, 0x29, 0xb1, 0x9f, 0x7, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3, 0x6, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c, 0x93, 0xb, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, 0xa, 0x92, 0xbc, 0x24, 0x91, 0x9, 0xe6, 0x7e, 0xcb, 0x53, 0x8, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb, 0xe, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0xf, 0xba, 0x22, 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0xc, 0x94, 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0xd, 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3, 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f, 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a, 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88, 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11, 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d, 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f, 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19, 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36, 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82}, + {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31, 0xca, 0x53, 0xe5, 0x7c, 0x94, 0xd, 0xbb, 0x22, 0x76, 0xef, 0x59, 0xc0, 0x28, 0xb1, 0x7, 0x9e, 0xaf, 0x36, 0x80, 0x19, 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61, 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75, 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x4, 0x50, 0xc9, 0x7f, 0xe6, 0xe, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17, 0x26, 0xbf, 0x9, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x3, 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0xf, 0x96, 0x20, 0xb9, 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x5, 0xed, 0x74, 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82, 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c, 0xea, 0x73, 0x9b, 0x2, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf, 0x27, 0xbe, 0x8, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4, 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3, 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55, 0xbd, 0x24, 0x92, 0xb, 0x5f, 0xc6, 0x70, 0xe9, 0x1, 0x98, 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4, 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0, 0x6, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0xc, 0xba, 0x23, 0xcb, 0x52, 0xe4, 0x7d}, + {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45, 0x55, 0xcf, 0x7c, 0xe6, 0x7, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b, 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19, 0xf8, 0x62, 0xd1, 0x4b, 0xe, 0x94, 0x27, 0xbd, 0x5c, 0xc6, 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e, 0x5b, 0xc1, 0x72, 0xe8, 0x9, 0x93, 0x20, 0xba, 0x49, 0xd3, 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e, 0xbf, 0x25, 0x96, 0xc, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, 0x67, 0xfd, 0xb8, 0x22, 0x91, 0xb, 0xea, 0x70, 0xc3, 0x59, 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x2, 0x47, 0xdd, 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x5, 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, 0x69, 0xf3, 0x92, 0x8, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73, 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d, 0xee, 0x74, 0x95, 0xf, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0, 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, 0x43, 0xd9, 0x9c, 0x6, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d, 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53, 0xe0, 0x7a, 0x9b, 0x1, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68, 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, 0x4, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f, 0x2a, 0xb0, 0x3, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb, 0x58, 0xc2, 0x23, 0xb9, 0xa, 0x90, 0xd5, 0x4f, 0xfc, 0x66, 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0xd, 0x97, 0x76, 0xec, 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61}, + {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5, 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, 0x94, 0xf, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c, 0x26, 0xbd, 0xd, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54, 0xe4, 0x7f, 0x99, 0x2, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3, 0x35, 0xae, 0x1e, 0x85, 0x9, 0x92, 0x22, 0xb9, 0x5f, 0xc4, 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43, 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b, 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x6, 0x83, 0x18, 0xa8, 0x33, 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x4, 0x9f, 0x79, 0xe2, 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0xb, 0xbb, 0x20, 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89, 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0xe, 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x1, 0x9a, 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d, 0x98, 0x3, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf, 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d, 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, 0xc, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd, 0xb7, 0x2c, 0x9c, 0x7, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5, 0x75, 0xee, 0x8, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42, 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0xa, 0xba, 0x21, 0xc7, 0x5c, 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb, 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3, 0x53, 0xc8, 0x2e, 0xb5, 0x5, 0x9e}, + {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6, 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6, 0x4f, 0xd3, 0x20, 0xbc, 0x5, 0x99, 0xfe, 0x62, 0xdb, 0x47, 0xb4, 0x28, 0x91, 0xd, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38, 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x2, 0xbb, 0x27, 0x40, 0xdc, 0x65, 0xf9, 0xa, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58, 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x7, 0xf4, 0x68, 0xd1, 0x4d, 0x2a, 0xb6, 0xf, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17, 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6, 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0xc, 0xff, 0x63, 0xda, 0x46, 0x21, 0xbd, 0x4, 0x98, 0x6b, 0xf7, 0x4e, 0xd2, 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88, 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66, 0x95, 0x9, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x1, 0x9d, 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19, 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd, 0x44, 0xd8, 0x2b, 0xb7, 0xe, 0x92, 0xf5, 0x69, 0xd0, 0x4c, 0xbf, 0x23, 0x9a, 0x6, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33, 0xb, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x3, 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87, 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, 0xc5, 0x59}, + {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8, 0x2, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x3, 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x4, 0x99, 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x5, 0xbf, 0x22, 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x6, 0x9b, 0xf3, 0x6e, 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x7, 0x94, 0x9, 0xb3, 0x2e, 0xda, 0x47, 0xfd, 0x60, 0x8, 0x95, 0x2f, 0xb2, 0x46, 0xdb, 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0xb, 0xff, 0x62, 0xd8, 0x45, 0x2d, 0xb0, 0xa, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43, 0xf9, 0x64, 0x90, 0xd, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8, 0xc, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, 0x92, 0xf, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0xe, 0x93, 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34, 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa, 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b, 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7, 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c, 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9, 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85, 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51, 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a, 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6}, + {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, 0x76, 0xe8, 0x91, 0xf, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c, 0x2a, 0xb4, 0xb, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30, 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80, 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x4, 0xf9, 0x67, 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9, 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0xc, 0xb3, 0x2d, 0x41, 0xdf, 0x60, 0xfe, 0x3, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a, 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x7, 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x8, 0x96, 0xef, 0x71, 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17, 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0xd, 0x93, 0x6e, 0xf0, 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x2, 0xff, 0x61, 0xde, 0x40, 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c, 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x6, 0x98, 0x27, 0xb9, 0x44, 0xda, 0x65, 0xfb, 0x97, 0x9, 0xb6, 0x28, 0xd5, 0x4b, 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee, 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x1, 0x78, 0xe6, 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56, 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, 0xe, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0xa, 0xb5, 0x2b, 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d, 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8, 0x5, 0x9b, 0x24, 0xba}, + {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76, 0x5, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16, 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0xa, 0x95, 0x29, 0xb6, 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, 0xe3, 0x7c, 0xf, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5, 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b, 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x7, 0xbb, 0x24, 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, 0x74, 0xeb, 0x9d, 0x2, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67, 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0xd, 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7, 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x8, 0xb4, 0x2b, 0xd1, 0x4e, 0xf2, 0x6d, 0x28, 0xb7, 0xb, 0x94, 0x6e, 0xf1, 0x4d, 0xd2, 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2, 0xe, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d, 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x1, 0x9e, 0x64, 0xfb, 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54, 0x27, 0xb8, 0x4, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34, 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80, 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0xc, 0xf6, 0x69, 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3, 0xb5, 0x2a, 0x96, 0x9, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9, 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x6, 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x3, 0xf9, 0x66, 0xda, 0x45}, + {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f, 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x1, 0xa1, 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x3, 0xa3, 0x5e, 0xfe, 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb, 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x2, 0xa2, 0x5f, 0xff, 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, 0x88, 0x28, 0x6, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41, 0xbd, 0x1d, 0xe0, 0x40, 0x7, 0xa7, 0x5a, 0xfa, 0xd4, 0x74, 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b, 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x5, 0xa5, 0x58, 0xf8, 0x4, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43, 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e, 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a, 0xd, 0xad, 0x50, 0xf0, 0xc, 0xac, 0x51, 0xf1, 0xb6, 0x16, 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22, 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0xe, 0xae, 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48, 0xf, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0xb, 0xab, 0x56, 0xf6, 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3, 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0xa, 0xaa, 0x57, 0xf7, 0xb0, 0x10, 0xed, 0x4d, 0x8, 0xa8, 0x55, 0xf5, 0xb2, 0x12, 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26, 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13, 0xee, 0x4e, 0x9, 0xa9, 0x54, 0xf4}, + {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82, 0xa3, 0x2, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38, 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x6, 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x4, 0xa5, 0xe5, 0x44, 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a, 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef, 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0xe, 0xed, 0x4c, 0xb2, 0x13, 0x53, 0xf2, 0xc, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x8, 0xa9, 0x57, 0xf6, 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5, 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb, 0xab, 0xa, 0xf4, 0x55, 0x5e, 0xff, 0x1, 0xa0, 0xe0, 0x41, 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f, 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c, 0xa2, 0x3, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39, 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x7, 0xf9, 0x58, 0x18, 0xb9, 0x47, 0xe6, 0x5, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45, 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0, 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee, 0xae, 0xf, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0xd, 0xac, 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92, 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x9, 0xa8, 0x89, 0x28, 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4, 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, 0xaa, 0xb}, + {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50, 0xab, 0x9, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70, 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x2, 0x4b, 0xe9, 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9, 0xb, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0, 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14, 0x5d, 0xff, 0x4, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0xd, 0xf6, 0x54, 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4, 0x4f, 0xed, 0xa4, 0x6, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94, 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, 0xf, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4, 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18, 0xe3, 0x41, 0x8, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca, 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, 0xa3, 0x1, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73, 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0xa, 0xc8, 0x6a, 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a, 0x3, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, 0xc7, 0x65, 0x55, 0xf7, 0xc, 0xae, 0xe7, 0x45, 0xbe, 0x1c, 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x5, 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e, 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0xe, 0xf5, 0x57, 0x1e, 0xbc, 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e, 0x5e, 0xfc, 0x7, 0xa5, 0xec, 0x4e, 0xb5, 0x17}, + {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, 0xf, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd, 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0xb, 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3, 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, 0xa7, 0x4, 0xaf, 0xc, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1, 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee, 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x3, 0x3c, 0x9f, 0x67, 0xc4, 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, 0x9b, 0x38, 0x7, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49, 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46, 0xbe, 0x1d, 0x53, 0xf0, 0x8, 0xab, 0x43, 0xe0, 0x18, 0xbb, 0xf5, 0x56, 0xae, 0xd, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, 0xdf, 0x7c, 0xa1, 0x2, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef, 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39, 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13, 0x5d, 0xfe, 0x6, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, 0x95, 0x36, 0x9, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47, 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x1, 0xa2, 0x9d, 0x3e, 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0xe, 0xad, 0x55, 0xf6, 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b, 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0xa, 0xd7, 0x74, 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x5, 0xfd, 0x5e, 0x10, 0xb3, 0x4b, 0xe8}, + {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12, 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f, 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8, 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, 0x8f, 0x2b, 0xab, 0xf, 0xfe, 0x5a, 0x1, 0xa5, 0x54, 0xf0, 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6, 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca, 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, 0x1f, 0xbb, 0xa9, 0xd, 0xfc, 0x58, 0x3, 0xa7, 0x56, 0xf2, 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x2, 0xa6, 0x57, 0xf3, 0xa8, 0xc, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28, 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf, 0xad, 0x9, 0xf8, 0x5c, 0x7, 0xa3, 0x52, 0xf6, 0x76, 0xd2, 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce, 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf, 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x6, 0xa2, 0x53, 0xf7, 0xac, 0x8, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67, 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, 0x20, 0x84, 0x4, 0xa0, 0x51, 0xf5, 0xae, 0xa, 0xfb, 0x5f, 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0xb, 0xfa, 0x5e, 0x5, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17, 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f}, + {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70, 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45, 0x58, 0xfd, 0xf, 0xaa, 0xf6, 0x53, 0xa1, 0x4, 0x9b, 0x3e, 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28, 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f, 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54, 0xa6, 0x3, 0x5f, 0xfa, 0x8, 0xad, 0x2b, 0x8e, 0x7c, 0xd9, 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, 0x93, 0x36, 0xa9, 0xc, 0xfe, 0x5b, 0x7, 0xa2, 0x50, 0xf5, 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1, 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7, 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, 0x1f, 0xba, 0xa7, 0x2, 0xf0, 0x55, 0x9, 0xac, 0x5e, 0xfb, 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99, 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0xd, 0x51, 0xf4, 0x6, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, 0x47, 0xe2, 0x56, 0xf3, 0x1, 0xa4, 0xf8, 0x5d, 0xaf, 0xa, 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71, 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67, 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, 0xb6, 0x13, 0xe, 0xab, 0x59, 0xfc, 0xa0, 0x5, 0xf7, 0x52, 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29, 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0}, + {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7, 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf, 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7, 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7, 0xf2, 0x54, 0xa3, 0x5, 0x50, 0xf6, 0x1, 0xa7, 0xab, 0xd, 0xfa, 0x5c, 0x9, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7, 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde, 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f, 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97, 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0xe, 0x5b, 0xfd, 0xa, 0xac, 0xa0, 0x6, 0xf1, 0x57, 0x2, 0xa4, 0x53, 0xf5, 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4, 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77, 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67, 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0xb, 0xad, 0x5a, 0xfc, 0xa9, 0xf, 0xf8, 0x5e, 0x52, 0xf4, 0x3, 0xa5, 0xf0, 0x56, 0xa1, 0x7, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5, 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d, 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37, 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, 0x6a, 0xcc}, + {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3, 0xa2, 0x5, 0xf1, 0x56, 0x4, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x7, 0x55, 0xf2, 0x6, 0xa1, 0x59, 0xfe, 0xa, 0xad, 0xff, 0x58, 0xac, 0xb, 0x8, 0xaf, 0x5b, 0xfc, 0xae, 0x9, 0xfd, 0x5a, 0xfb, 0x5c, 0xa8, 0xf, 0x5d, 0xfa, 0xe, 0xa9, 0xaa, 0xd, 0xf9, 0x5e, 0xc, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46, 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42, 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c, 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e, 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a, 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f, 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f, 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72, 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25, 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27, 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8, 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f, 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66, 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62, 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33}, + {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x4, 0xa4, 0xc, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25, 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13, 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, 0x8, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a, 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x3, 0x7, 0xaf, 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb, 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0xf, 0xa7, 0xa3, 0xb, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22, 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x2, 0xe7, 0x4f, 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87, 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x6, 0xae, 0xe, 0xa6, 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2, 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0xa, 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e, 0x9b, 0x33, 0x4c, 0xe4, 0x1, 0xa9, 0xad, 0x5, 0xe0, 0x48, 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24, 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0xd, 0x9, 0xa1, 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5, 0xba, 0x12, 0xf7, 0x5f}, + {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59, 0x42, 0xeb, 0xd, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca, 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62, 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0xc, 0xea, 0x43, 0x3b, 0x92, 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe, 0xe7, 0x4e, 0xa8, 0x1, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc, 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2, 0xaa, 0x3, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0xe, 0x91, 0x38, 0xde, 0x77, 0xf, 0xa6, 0x40, 0xe9, 0xb0, 0x19, 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35, 0x4d, 0xe4, 0x2, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52, 0xb, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1, 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x6, 0xaf, 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x7, 0xe1, 0x48, 0x30, 0x99, 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7, 0xec, 0x45, 0xa3, 0xa, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64, 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9, 0xa1, 0x8, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x5, 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12, 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c, 0x4, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x9, 0xa0}, + {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91, 0xe0, 0x4a, 0xa9, 0x3, 0x4b, 0xe1, 0x2, 0xa8, 0xd9, 0x73, 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x7, 0x76, 0xdc, 0x3f, 0x95, 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x6, 0xac, 0x96, 0x3c, 0xdf, 0x75, 0x4, 0xae, 0x4d, 0xe7, 0xaf, 0x5, 0xe6, 0x4c, 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, 0xe, 0xa4, 0xec, 0x46, 0xa5, 0xf, 0x7e, 0xd4, 0x37, 0x9d, 0xa7, 0xd, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34, 0xd7, 0x7d, 0xc, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2, 0xa3, 0x9, 0xea, 0x40, 0x8, 0xa2, 0x41, 0xeb, 0x9a, 0x30, 0xd3, 0x79, 0x43, 0xe9, 0xa, 0xa0, 0xd1, 0x7b, 0x98, 0x32, 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0xb, 0xb7, 0x1d, 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d, 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d, 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0, 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2, 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13, 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba, 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca, 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce, 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67, 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc}, + {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14, 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f, 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15, 0x63, 0xc8, 0x28, 0x83, 0xa6, 0xd, 0xed, 0x46, 0x30, 0x9b, 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x1, 0xaa, 0x4a, 0xe1, 0x95, 0x3e, 0xde, 0x75, 0x3, 0xa8, 0x48, 0xe3, 0xa4, 0xf, 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17, 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27, 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98, 0x78, 0xd3, 0xa5, 0xe, 0xee, 0x45, 0x2, 0xa9, 0x49, 0xe2, 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0xa, 0xea, 0x41, 0x6, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70, 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf, 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13, 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x7, 0xac, 0x4c, 0xe7, 0xa0, 0xb, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x9, 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73, 0x5, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87, 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc, 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x4, 0xaf, 0x4f, 0xe4, 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x8, 0xe8, 0x43}, + {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe, 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2, 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x2, 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0xb, 0xe2, 0x4e, 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93, 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0xd, 0xa1, 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x4, 0xa8, 0xcb, 0x67, 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39, 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0, 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0xf, 0x65, 0xc9, 0x20, 0x8c, 0xef, 0x43, 0xaa, 0x6, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14, 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35, 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b, 0x8, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x1, 0xad, 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7, 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0xa, 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x3, 0xea, 0x46, 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2, 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x5, 0xa9, 0xc3, 0x6f, 0x86, 0x2a, 0x49, 0xe5, 0xc, 0xa0, 0xfc, 0x50, 0xb9, 0x15, 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, 0x3a, 0x96, 0xee, 0x42, 0xab, 0x7, 0x64, 0xc8, 0x21, 0x8d, 0xe7, 0x4b, 0xa2, 0xe, 0x6d, 0xc1, 0x28, 0x84}, + {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65, 0x2, 0xaf, 0x45, 0xe8, 0x8c, 0x21, 0xcb, 0x66, 0x3, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67, 0x4, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x5, 0xa8, 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x6, 0xab, 0x41, 0xec, 0x88, 0x25, 0xcf, 0x62, 0x7, 0xaa, 0x40, 0xed, 0x89, 0x24, 0xce, 0x63, 0x8, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c, 0x9, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0xa, 0xa7, 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0xb, 0xa6, 0x4c, 0xe1, 0x85, 0x28, 0xc2, 0x6f, 0xc, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, 0xc5, 0x68, 0xd, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69, 0xe, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0xf, 0xa2, 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa, 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76, 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9, 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff, 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73, 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4, 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0, 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78, 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3, 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5, 0x91, 0x3c, 0xd6, 0x7b}, + {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74, 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85, 0x6a, 0xc4, 0xa9, 0x7, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b, 0xe6, 0x48, 0xa7, 0x9, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b, 0x4f, 0xe1, 0xe, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66, 0x89, 0x27, 0x4a, 0xe4, 0xb, 0xa5, 0xd1, 0x7f, 0x90, 0x3e, 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0xc, 0x61, 0xcf, 0x20, 0x8e, 0xac, 0x2, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b, 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71, 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x5, 0xab, 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0xf, 0xa1, 0x4e, 0xe0, 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11, 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x8, 0xe7, 0x49, 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x6, 0x6b, 0xc5, 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d, 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c, 0x83, 0x2d, 0x40, 0xee, 0x1, 0xaf, 0x45, 0xeb, 0x4, 0xaa, 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a, 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x3, 0x21, 0x8f, 0x60, 0xce, 0xa3, 0xd, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7, 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, 0xd0, 0x7e, 0xa, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67}, + {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce, 0xa4, 0xb, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, 0xf6, 0x59, 0x44, 0xeb, 0x7, 0xa8, 0xc2, 0x6d, 0x81, 0x2e, 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9, 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0xc, 0x77, 0xd8, 0x34, 0x9b, 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0xe, 0xa1, 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3, 0xaa, 0x5, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14, 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20, 0x4a, 0xe5, 0x9, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, 0x18, 0xb7, 0xee, 0x41, 0xad, 0x2, 0x68, 0xc7, 0x2b, 0x84, 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0xd, 0xa2, 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0, 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x6, 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54, 0x49, 0xe6, 0xa, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7, 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87, 0xed, 0x42, 0xae, 0x1, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x3, 0xac, 0x40, 0xef, 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x8, 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a, 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, 0x4, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba, 0xe3, 0x4c, 0xa0, 0xf, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d, 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98}, + {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x2, 0x35, 0x85, 0x48, 0xf8, 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33, 0xfe, 0x4e, 0x79, 0xc9, 0x4, 0xb4, 0x6a, 0xda, 0x17, 0xa7, 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x6, 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92, 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42, 0x8f, 0x3f, 0x8, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19, 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, 0xba, 0xa, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf, 0x71, 0xc1, 0xc, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7, 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0xe, 0xc3, 0x73, 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, 0xb1, 0x1, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8, 0xf9, 0x49, 0x84, 0x34, 0x3, 0xb3, 0x7e, 0xce, 0x10, 0xa0, 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x5, 0xc8, 0x78, 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, 0xdb, 0x6b, 0x7a, 0xca, 0x7, 0xb7, 0x80, 0x30, 0xfd, 0x4d, 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d, 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x9, 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, 0x65, 0xd5, 0xb, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c, 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7, 0x3a, 0x8a, 0xbd, 0xd, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac, 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, 0xf, 0xbf}, + {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e, 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0, 0xc0, 0x71, 0xbf, 0xe, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0xd, 0xc3, 0x72, 0x7c, 0xcd, 0x3, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c, 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95, 0xa5, 0x14, 0xda, 0x6b, 0xba, 0xb, 0xc5, 0x74, 0x44, 0xf5, 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x5, 0xb4, 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49, 0x87, 0x36, 0x6, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7, 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x8, 0x38, 0x89, 0x47, 0xf6, 0xb6, 0x7, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6, 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7, 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, 0x9, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25, 0xf4, 0x45, 0x8b, 0x3a, 0xa, 0xbb, 0x75, 0xc4, 0xca, 0x7b, 0xb5, 0x4, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5, 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, 0x6c, 0xdd, 0xc, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c, 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x2, 0xd3, 0x62, 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80, 0xb0, 0x1, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1, 0x70, 0xc1, 0xf, 0xbe, 0x8e, 0x3f, 0xf1, 0x40}, + {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f, 0xc3, 0x71, 0xba, 0x8, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88, 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x3, 0x2c, 0x9e, 0x55, 0xe7, 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2, 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6, 0xd, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46, 0x7f, 0xcd, 0x6, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98, 0xb7, 0x5, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc, 0x37, 0x85, 0xbc, 0xe, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0, 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0xf, 0x36, 0x84, 0x4f, 0xfd, 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x4, 0xe8, 0x5a, 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda, 0xe3, 0x51, 0x9a, 0x28, 0x7, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0xc, 0xbe, 0x75, 0xc7, 0xb0, 0x2, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb, 0x30, 0x82, 0xbb, 0x9, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94, 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, 0x2d, 0x9f, 0x73, 0xc1, 0xa, 0xb8, 0x81, 0x33, 0xf8, 0x4a, 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x1, 0xb3, 0x9c, 0x2e, 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae, 0x97, 0x25, 0xee, 0x5c}, + {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf, 0xff, 0x4c, 0x84, 0x37, 0x9, 0xba, 0x72, 0xc1, 0xe, 0xbd, 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b, 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22, 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68, 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2, 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb, 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x6, 0xc9, 0x7a, 0xb2, 0x1, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0xf, 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, 0xbb, 0x8, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95, 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7, 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d, 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0xd, 0xc5, 0x76, 0xb9, 0xa, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87, 0xb7, 0x4, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5, 0x3d, 0x8e, 0xb0, 0x3, 0xcb, 0x78, 0x70, 0xc3, 0xb, 0xb8, 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, 0xc, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x2, 0xb1, 0x7e, 0xcd, 0x5, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20, 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa, 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3}, + {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e, 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, 0xd9, 0x6d, 0x3, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28, 0xca, 0x7e, 0xbf, 0xb, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38, 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84, 0xaf, 0x1b, 0xda, 0x6e, 0x6, 0xb2, 0x73, 0xc7, 0xec, 0x58, 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0xe, 0x25, 0x91, 0x50, 0xe4, 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4, 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x5, 0xb1, 0x70, 0xc4, 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0xd, 0x26, 0x92, 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1, 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0xc, 0xb8, 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x4, 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61, 0xf, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72, 0xb3, 0x7, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41, 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, 0xd6, 0x62, 0xa, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21, 0xc3, 0x77, 0xb6, 0x2, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31, 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d, 0xa6, 0x12, 0xd3, 0x67, 0x9, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x1, 0x2a, 0x9e, 0x5f, 0xeb, 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb, 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64}, + {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x6, 0xb3, 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x5, 0xc7, 0x72, 0x23, 0x96, 0x54, 0xe1, 0xcd, 0x78, 0xba, 0xf, 0xe2, 0x57, 0x95, 0x20, 0xc, 0xb9, 0x7b, 0xce, 0xbc, 0x9, 0xcb, 0x7e, 0x52, 0xe7, 0x25, 0x90, 0x7d, 0xc8, 0xa, 0xbf, 0x93, 0x26, 0xe4, 0x51, 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32, 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b, 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49, 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f, 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9, 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61, 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67, 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d, 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c, 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f, 0xbd, 0x8, 0x24, 0x91, 0x53, 0xe6, 0xb, 0xbe, 0x7c, 0xc9, 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0xe, 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0xd, 0xb8, 0xe9, 0x5c, 0x9e, 0x2b, 0x7, 0xb2, 0x70, 0xc5, 0x28, 0x9d, 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x4, 0x76, 0xc3, 0x1, 0xb4, 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x2, 0xc0, 0x75, 0x59, 0xec, 0x2e, 0x9b}, + {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19, 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x7, 0xb1, 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x9, 0xbf, 0xec, 0x5a, 0x9d, 0x2b, 0xe, 0xb8, 0x7f, 0xc9, 0x35, 0x83, 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41, 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0xb, 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0xc, 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73, 0xb4, 0x2, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb, 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, 0xf9, 0x4f, 0xb3, 0x5, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96, 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e, 0xb9, 0xf, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x8, 0xcf, 0x79, 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x6, 0xc1, 0x77, 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b, 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3, 0xc6, 0x70, 0xb7, 0x1, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, 0x4, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b, 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57, 0x90, 0x26, 0x3, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13, 0x36, 0x80, 0x47, 0xf1, 0xd, 0xbb, 0x7c, 0xca, 0xef, 0x59, 0x9e, 0x28, 0x7b, 0xcd, 0xa, 0xbc, 0x99, 0x2f, 0xe8, 0x5e, 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87}, + {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x8, 0xcc, 0x7b, 0x59, 0xee, 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c, 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x5, 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18, 0x3a, 0x8d, 0x49, 0xfe, 0xd, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x2, 0x20, 0x97, 0x53, 0xe4, 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce, 0xa, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c, 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, 0x30, 0x87, 0x74, 0xc3, 0x7, 0xb0, 0x92, 0x25, 0xe1, 0x56, 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c, 0xb8, 0xf, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55, 0x77, 0xc0, 0x4, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0xc, 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45, 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7, 0xc5, 0x72, 0xb6, 0x1, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x9, 0xbe, 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x6, 0xc2, 0x75, 0x86, 0x31, 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c, 0xe, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16, 0xe5, 0x52, 0x96, 0x21, 0x3, 0xb4, 0x70, 0xc7, 0x8b, 0x3c, 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e, 0xbc, 0xb, 0xcf, 0x78}, + {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6, 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e, 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b, 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x4, 0xb, 0xb3, 0x66, 0xde, 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99, 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d, 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x3, 0xbb, 0xb4, 0xc, 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8, 0xbf, 0x7, 0xd2, 0x6a, 0x65, 0xdd, 0x8, 0xb0, 0x16, 0xae, 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25, 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d, 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0xd, 0xb5, 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x2, 0xa4, 0x1c, 0xc9, 0x71, 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x9, 0x6, 0xbe, 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a, 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82, 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0xe, 0xb6, 0xb9, 0x1, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23, 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45, 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81, 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0xa, 0xdf, 0x67, 0x68, 0xd0, 0x5, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14}, + {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89, 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, 0x4f, 0xf6, 0xbe, 0x7, 0xd1, 0x68, 0x60, 0xd9, 0xf, 0xb6, 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58, 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96, 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0xe, 0xb7, 0xbf, 0x6, 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8, 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26, 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x9, 0x1, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88, 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b, 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0xc, 0xb5, 0xbd, 0x4, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34, 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64, 0xb2, 0xb, 0x3, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5, 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab, 0x2, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0xa, 0xfc, 0x45, 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b, 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, 0xac, 0x15, 0xbc, 0x5, 0xd3, 0x6a, 0x62, 0xdb, 0xd, 0xb4, 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a, 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb}, + {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8, 0x6f, 0xd5, 0x6, 0xbc, 0xbd, 0x7, 0xd4, 0x6e, 0xd6, 0x6c, 0xbf, 0x5, 0x4, 0xbe, 0x6d, 0xd7, 0xde, 0x64, 0xb7, 0xd, 0xc, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0xe, 0xb4, 0xb5, 0xf, 0xdc, 0x66, 0xb1, 0xb, 0xd8, 0x62, 0x63, 0xd9, 0xa, 0xb0, 0x8, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x9, 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2, 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d, 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e, 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa, 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a, 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7, 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33, 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52, 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef, 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44, 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94, 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29, 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23, 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c, 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, 0x4d, 0xf7}, + {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7, 0x7f, 0xc4, 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e, 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49, 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b, 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31, 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98, 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4, 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e, 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0xb, 0xb0, 0xb6, 0xd, 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x1, 0x7, 0xbc, 0x6c, 0xd7, 0xdf, 0x64, 0xb4, 0xf, 0x9, 0xb2, 0x62, 0xd9, 0x6e, 0xd5, 0x5, 0xbe, 0xb8, 0x3, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70, 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27, 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5, 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f, 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89, 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b, 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10, 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, 0xcc, 0x77, 0xbf, 0x4, 0xd4, 0x6f, 0x69, 0xd2, 0x2, 0xb9, 0xe, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x8}, + {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a, 0xf, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95, 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0xd, 0x97, 0x2b, 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8, 0xdb, 0x67, 0xbe, 0x2, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f, 0xb5, 0x9, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f, 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x6, 0xdf, 0x63, 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x4, 0xb8, 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18, 0xc1, 0x7d, 0x6e, 0xd2, 0xb, 0xb7, 0x78, 0xc4, 0x1d, 0xa1, 0xb2, 0xe, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x1, 0xd8, 0x64, 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda, 0x3, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36, 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0xc, 0xb0, 0xa3, 0x1f, 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3, 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71, 0xa8, 0x14, 0x7, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92, 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x8, 0xb4, 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49, 0xd3, 0x6f, 0xb6, 0xa, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9, 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x5, 0x16, 0xaa, 0x73, 0xcf}, + {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95, 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0xb, 0x9e, 0x23, 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4, 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x2, 0xd8, 0x65, 0x71, 0xcc, 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35, 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x9, 0xb4, 0x7c, 0xc1, 0x1b, 0xa6, 0xb2, 0xf, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27, 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x4, 0xb9, 0xad, 0x10, 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6, 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e, 0xa4, 0x19, 0xd, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87, 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x6, 0x12, 0xaf, 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec, 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0xa, 0xd0, 0x6d, 0xe7, 0x5a, 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x1, 0xbc, 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x8, 0xb5, 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53, 0xd9, 0x64, 0xbe, 0x3, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5, 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e, 0x4a, 0xf7, 0x2d, 0x90, 0x5, 0xb8, 0x62, 0xdf, 0xcb, 0x76, 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f, 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0xe, 0xba, 0x7, 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1, 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, 0xc, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30}, + {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0, 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x8, 0xd7, 0x69, 0x74, 0xca, 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43, 0xc7, 0x79, 0xa6, 0x18, 0x5, 0xbb, 0x64, 0xda, 0x71, 0xcf, 0x10, 0xae, 0xb3, 0xd, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37, 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x2, 0xdd, 0x63, 0x7e, 0xc0, 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38, 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0xa, 0xb4, 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d, 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x7, 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0xf, 0xb1, 0x6e, 0xd0, 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb, 0x4, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23, 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0xc, 0x11, 0xaf, 0x70, 0xce, 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c, 0xc3, 0x7d, 0x60, 0xde, 0x1, 0xbf, 0x14, 0xaa, 0x75, 0xcb, 0xd6, 0x68, 0xb7, 0x9, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x6, 0x1b, 0xa5, 0x7a, 0xc4, 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48, 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0xe, 0xb0, 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x3, 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0xb, 0xb5, 0x31, 0x8f, 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c}, + {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25, 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0xb, 0xb4, 0x7e, 0xc1, 0x1d, 0xa2, 0xb8, 0x7, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33, 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0xc, 0x16, 0xa9, 0x75, 0xca, 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2, 0xe, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f, 0x5, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98, 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x9, 0xbd, 0x2, 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0, 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0xd, 0xd1, 0x6e, 0xda, 0x65, 0xb9, 0x6, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4, 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47, 0x5d, 0xe2, 0x3e, 0x81, 0xa, 0xb5, 0x69, 0xd6, 0xcc, 0x73, 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x1, 0xbe, 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6, 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x3, 0x88, 0x37, 0xeb, 0x54, 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, 0x83, 0x3c, 0xb7, 0x8, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad, 0x67, 0xd8, 0x4, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49, 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84, 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0xf, 0xb0, 0x6c, 0xd3}, + {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c, 0x1, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f, 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x2, 0xc2, 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11, 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x3, 0xc3, 0xf7, 0x37, 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17, 0x6d, 0xad, 0xf0, 0x30, 0x4, 0xc4, 0x99, 0x59, 0x23, 0xe3, 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac, 0x98, 0x58, 0x5, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf, 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c, 0x6, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x7, 0xc7, 0x94, 0x54, 0x9, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a, 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x8, 0xc8, 0x95, 0x55, 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0xb, 0xcb, 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed, 0xb0, 0x70, 0xa, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e, 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, 0x64, 0xa4, 0x90, 0x50, 0xd, 0xcd, 0xb7, 0x77, 0x2a, 0xea, 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0xc, 0xcc, 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6, 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, 0xf, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d, 0x29, 0xe9, 0xb4, 0x74, 0xe, 0xce, 0x93, 0x53}, + {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, 0x30, 0xf1, 0xca, 0xb, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7, 0x5, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82, 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7, 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0xe, 0x50, 0x91, 0xec, 0x2d, 0x73, 0xb2, 0xa, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77, 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47, 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x1, 0x5f, 0x9e, 0xe3, 0x22, 0x7c, 0xbd, 0xf, 0xce, 0x90, 0x51, 0x2c, 0xed, 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34, 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x4, 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a, 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0xc, 0x71, 0xb0, 0xee, 0x2f, 0x98, 0x59, 0x7, 0xc6, 0xbb, 0x7a, 0x24, 0xe5, 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0, 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x9, 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x2, 0xc3, 0xbe, 0x7f, 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6, 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99, 0xc7, 0x6, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0xd, 0xcc, 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66, 0x5d, 0x9c, 0xc2, 0x3, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56, 0x8, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f, 0xf2, 0x33, 0x6d, 0xac}, + {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a, 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0xa, 0xc8, 0xe2, 0x20, 0x7b, 0xb9, 0xcd, 0xf, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e, 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad, 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x8, 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0xd, 0xcf, 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, 0xc0, 0x2, 0x28, 0xea, 0xb1, 0x73, 0x7, 0xc5, 0x9e, 0x5c, 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33, 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48, 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd, 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7, 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30, 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x1, 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x4, 0xc6, 0x50, 0x92, 0xc9, 0xb, 0x7f, 0xbd, 0xe6, 0x24, 0xe, 0xcc, 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18, 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b, 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4, 0xbf, 0x7d, 0x9, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23, 0x57, 0x95, 0xce, 0xc, 0x9a, 0x58, 0x3, 0xc1, 0xb5, 0x77, 0x2c, 0xee, 0xc4, 0x6, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0}, + {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4, 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36, 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a, 0x72, 0xb1, 0xc2, 0x1, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7, 0x94, 0x57, 0xf, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf, 0x26, 0xe5, 0xbd, 0x7e, 0xd, 0xce, 0x96, 0x55, 0x70, 0xb3, 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x3, 0xcf, 0xc, 0x54, 0x97, 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x2, 0xc1, 0xb2, 0x71, 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10, 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x9, 0xca, 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x7, 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80, 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9, 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8, 0xcb, 0x8, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, 0x6, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0, 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec, 0xb4, 0x77, 0x4, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21, 0x52, 0x91, 0xc9, 0xa, 0xc6, 0x5, 0x5d, 0x9e, 0xed, 0x2e, 0x76, 0xb5, 0x90, 0x53, 0xb, 0xc8, 0xbb, 0x78, 0x20, 0xe3, 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff, 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f}, + {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba, 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61, 0x30, 0xf4, 0x92, 0x56, 0x7, 0xc3, 0xcb, 0xf, 0x5e, 0x9a, 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71, 0x57, 0x93, 0xc2, 0x6, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd, 0xac, 0x68, 0xe, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda, 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x1, 0x50, 0x94, 0x9c, 0x58, 0x9, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea, 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11, 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, 0xc, 0xc8, 0xc0, 0x4, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6, 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8, 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0xb, 0xcf, 0x9e, 0x5a, 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, 0xc7, 0x3, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1, 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d, 0x6c, 0xa8, 0xce, 0xa, 0x5b, 0x9f, 0x97, 0x53, 0x2, 0xc6, 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d, 0x5c, 0x98, 0xc9, 0xd, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6, 0xa7, 0x63, 0x5, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1, 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, 0x4c, 0x88}, + {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7, 0xcc, 0x9, 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8, 0x99, 0x5c, 0xe, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82, 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea, 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45, 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba, 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57, 0x5, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6, 0xc7, 0x2, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0xc, 0x6d, 0xa8, 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0xb, 0xce, 0x9c, 0x59, 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d, 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0, 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0xf, 0xca, 0xcd, 0x8, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2, 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x6, 0x1, 0xc4, 0x96, 0x53, 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0xa, 0xcf, 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0xd, 0x6c, 0xa9, 0xfb, 0x3e, 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x3, 0x51, 0x94, 0x93, 0x56, 0x4, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee, 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11, 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77}, + {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x5, 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea, 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d, 0xa, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e, 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, 0xc9, 0xf, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf, 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x7, 0xc1, 0x2b, 0xed, 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x2, 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0xd, 0xcb, 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24, 0xce, 0x8, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76, 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4, 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0xb, 0x5c, 0x9a, 0xf2, 0x34, 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27, 0x31, 0xf7, 0xa0, 0x66, 0xe, 0xc8, 0x9f, 0x59, 0x56, 0x90, 0xc7, 0x1, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f, 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, 0x4, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc, 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0xc, 0x1a, 0xdc, 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x9, 0xcf, 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9, 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x6, 0x51, 0x97, 0x7d, 0xbb, 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x3, 0xc5, 0x92, 0x54, 0x3c, 0xfa, 0xad, 0x6b}, + {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19, 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d, 0x9, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x2, 0x56, 0x91, 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46, 0x5f, 0x98, 0xcc, 0xb, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50, 0x4, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5, 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, 0xd3, 0x14, 0xd, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62, 0x52, 0x95, 0xc1, 0x6, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3, 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea, 0x85, 0x42, 0x16, 0xd1, 0xc8, 0xf, 0x5b, 0x9c, 0xf3, 0x34, 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x8, 0xcf, 0x9b, 0x5c, 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18, 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd, 0x92, 0x55, 0x1, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0xa, 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef, 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab, 0xff, 0x38, 0x57, 0x90, 0xc4, 0x3, 0xa4, 0x63, 0x37, 0xf0, 0x9f, 0x58, 0xc, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27, 0x3e, 0xf9, 0xad, 0x6a, 0x5, 0xc2, 0x96, 0x51, 0x61, 0xa6, 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0xe, 0x17, 0xd0, 0x84, 0x43, 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x7, 0x53, 0x94}, + {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59, 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a, 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec, 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f, 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c, 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa, 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0xd, 0x4f, 0x87, 0xc2, 0xa, 0x46, 0x8e, 0xcb, 0x3, 0x41, 0x89, 0xcc, 0x4, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16, 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28, 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab, 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0, 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e, 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0xc, 0x49, 0x81, 0xc3, 0xb, 0x4e, 0x86, 0xca, 0x2, 0x47, 0x8f, 0xcd, 0x5, 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2, 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44, 0x1, 0xc9, 0x8b, 0x43, 0x6, 0xce, 0x82, 0x4a, 0xf, 0xc7, 0x85, 0x4d, 0x8, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4, 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72, 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8}, + {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43, 0xc, 0xc5, 0x83, 0x4a, 0xf, 0xc6, 0x80, 0x49, 0xa, 0xc3, 0x85, 0x4c, 0x9, 0xc0, 0x86, 0x4f, 0x18, 0xd1, 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58, 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57, 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff, 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a, 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d, 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed, 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64, 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23, 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3, 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e, 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31, 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99, 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10, 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f, 0x48, 0x81, 0xc7, 0xe, 0x4b, 0x82, 0xc4, 0xd, 0x4e, 0x87, 0xc1, 0x8, 0x4d, 0x84, 0xc2, 0xb, 0x44, 0x8d, 0xcb, 0x2, 0x47, 0x8e, 0xc8, 0x1, 0x42, 0x8b, 0xcd, 0x4, 0x41, 0x88, 0xce, 0x7}, + {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6, 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61, 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a, 0x44, 0x8e, 0xcd, 0x7, 0x4b, 0x81, 0xc2, 0x8, 0x5a, 0x90, 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3, 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, 0x68, 0xa2, 0xcc, 0x6, 0x45, 0x8f, 0xc3, 0x9, 0x4a, 0x80, 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42, 0x1, 0xcb, 0x87, 0x4d, 0xe, 0xc4, 0x96, 0x5c, 0x1f, 0xd5, 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6, 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29, 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0xb, 0x48, 0x82, 0xce, 0x4, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, 0x59, 0x93, 0x85, 0x4f, 0xc, 0xc6, 0x8a, 0x40, 0x3, 0xc9, 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73, 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4, 0xa8, 0x62, 0x21, 0xeb, 0xd, 0xc7, 0x84, 0x4e, 0x2, 0xc8, 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f, 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5, 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36, 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, 0xed, 0x27, 0x49, 0x83, 0xc0, 0xa, 0x46, 0x8c, 0xcf, 0x5, 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b}, + {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71, 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85, 0xc5, 0xe, 0x45, 0x8e, 0xce, 0x5, 0x74, 0xbf, 0xff, 0x34, 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb, 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57, 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x1, 0xca, 0x81, 0x4a, 0xa, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5, 0xc4, 0xf, 0x4f, 0x84, 0xcf, 0x4, 0x44, 0x8f, 0xd2, 0x19, 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d, 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a, 0x47, 0x8c, 0xcc, 0x7, 0x4c, 0x87, 0xc7, 0xc, 0x25, 0xee, 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73, 0x38, 0xf3, 0xb3, 0x78, 0x9, 0xc2, 0x82, 0x49, 0x2, 0xc9, 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54, 0xcd, 0x6, 0x46, 0x8d, 0xc6, 0xd, 0x4d, 0x86, 0xdb, 0x10, 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1, 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde, 0x83, 0x48, 0x8, 0xc3, 0x88, 0x43, 0x3, 0xc8, 0xb9, 0x72, 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef, 0xa4, 0x6f, 0x2f, 0xe4}, + {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70, 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x2, 0x72, 0xbe, 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1, 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, 0x4, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba, 0xca, 0x6, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1, 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0xa, 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x8, 0xc4, 0x8d, 0x41, 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0xe, 0x47, 0x8b, 0xfb, 0x37, 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0xc, 0xc0, 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x1, 0x48, 0x84, 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a, 0x3, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1, 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x5, 0x5b, 0x97, 0xde, 0x12, 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc, 0x95, 0x59, 0x7, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe, 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, 0xb, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5, 0xc5, 0x9, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0xf, 0xc3, 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68, 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, 0xc1, 0xd, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23}, + {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x6, 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0xc, 0xc1, 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19, 0x53, 0x9e, 0xc7, 0xa, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8, 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, 0xb9, 0x74, 0xb, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52, 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a, 0xc0, 0xd, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff, 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, 0x7, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0, 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x1, 0x4b, 0x86, 0x5a, 0x97, 0xdd, 0x10, 0x49, 0x84, 0xce, 0x3, 0x7c, 0xb1, 0xfb, 0x36, 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x5, 0xc8, 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69, 0xc2, 0xf, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29, 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x9, 0xc4, 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e, 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x8, 0x3b, 0xf6, 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57, 0xe, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, 0x7b, 0xb6, 0xc9, 0x4, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90, 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48, 0x2, 0xcf, 0x96, 0x5b, 0x11, 0xdc}, + {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c, 0x42, 0x8c, 0xc3, 0xd, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36, 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x8, 0x47, 0x89, 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x5, 0xcb, 0x9b, 0x55, 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea, 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d, 0x52, 0x9c, 0xcc, 0x2, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde, 0x8e, 0x40, 0xf, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0xa, 0xc4, 0x8b, 0x45, 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7, 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18, 0x48, 0x86, 0xc9, 0x7, 0xc7, 0x9, 0x46, 0x88, 0xd8, 0x16, 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9, 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b, 0x4, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70, 0x20, 0xee, 0xa1, 0x6f, 0x1, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0xc, 0x5c, 0x92, 0xdd, 0x13, 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4, 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b, 0xb, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, 0xc8, 0x6, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38, 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x3, 0x4c, 0x82, 0xec, 0x22, 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1, 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, 0xe, 0xc0}, + {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3, 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16, 0x41, 0x8e, 0xc2, 0xd, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0xc, 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9, 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d, 0x1, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1, 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, 0x3, 0xcc, 0xc1, 0xe, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96, 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba, 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0xf, 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x2, 0xcd, 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78, 0x47, 0x88, 0xc4, 0xb, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe, 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x6, 0xc9, 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x7, 0xc8, 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c, 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0xa, 0x46, 0x89, 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x8, 0x44, 0x8b, 0x86, 0x49, 0x5, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f, 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e, 0x29, 0xe6, 0xaa, 0x65, 0x4, 0xcb, 0x87, 0x48, 0x1f, 0xd0, 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x9, 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f}, + {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45, 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x1, 0x6c, 0xbc, 0xb6, 0x66, 0xb, 0xdb, 0x9e, 0x4e, 0x23, 0xf3, 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34, 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f, 0x2, 0xd2, 0xd8, 0x8, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c, 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5, 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe, 0xd3, 0x3, 0x9, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11, 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0xf, 0xdf, 0xd5, 0x5, 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7, 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3, 0xde, 0xe, 0x4, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0, 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26, 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c, 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae, 0xa4, 0x74, 0x19, 0xc9, 0xd, 0xdd, 0xb0, 0x60, 0x6a, 0xba, 0xd7, 0x7, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57, 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0xc, 0x61, 0xb1, 0xbb, 0x6b, 0x6, 0xd6, 0x12, 0xc2, 0xaf, 0x7f, 0x75, 0xa5, 0xc8, 0x18}, + {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb, 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86, 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51, 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3, 0x68, 0xb9, 0xd7, 0x6, 0xb, 0xda, 0xb4, 0x65, 0x7e, 0xaf, 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x7, 0xd6, 0xdb, 0xa, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24, 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56, 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x1, 0x6f, 0xbe, 0xb3, 0x62, 0xc, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1, 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc, 0xd2, 0x3, 0xe, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5, 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, 0x1f, 0xce, 0x5, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x8, 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45, 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec, 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e, 0xd5, 0x4, 0x6a, 0xbb, 0xb6, 0x67, 0x9, 0xd8, 0xbd, 0x6c, 0x2, 0xd3, 0xde, 0xf, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15, 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7}, + {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda, 0xa1, 0x73, 0x18, 0xca, 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b, 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c, 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b, 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x7, 0xd5, 0xd1, 0x3, 0x68, 0xba, 0x60, 0xb2, 0xd9, 0xb, 0xf, 0xdd, 0xb6, 0x64, 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13, 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a, 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44, 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3, 0xd8, 0xa, 0xe, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x6, 0xd4, 0xd0, 0x2, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a, 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32, 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4, 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, 0x97, 0x45, 0xdf, 0xd, 0x66, 0xb4, 0xb0, 0x62, 0x9, 0xdb, 0x1, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x5, 0x7e, 0xac, 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb, 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a, 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d, 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb}, + {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5, 0xb1, 0x62, 0xa, 0xd9, 0xda, 0x9, 0x61, 0xb2, 0x67, 0xb4, 0xdc, 0xf, 0xc, 0xdf, 0xb7, 0x64, 0x7f, 0xac, 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1, 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b, 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb, 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27, 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82, 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3, 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e, 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34, 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55, 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6, 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c, 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc, 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1, 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b, 0x60, 0xb3, 0xdb, 0x8, 0xb, 0xd8, 0xb0, 0x63, 0xb6, 0x65, 0xd, 0xde, 0xdd, 0xe, 0x66, 0xb5, 0xd1, 0x2, 0x6a, 0xb9, 0xba, 0x69, 0x1, 0xd2, 0x7, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, 0xd7, 0x4}, + {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15, 0x74, 0xa0, 0xb6, 0x62, 0x3, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e, 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x6, 0xd2, 0xb3, 0x67, 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64, 0x5, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42, 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, 0xf, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4, 0xc, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68, 0x9, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33, 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0xa, 0xde, 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85, 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c, 0x1d, 0xc9, 0xdf, 0xb, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6, 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x8, 0x1e, 0xca, 0xab, 0x7f, 0xd9, 0xd, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf, 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc, 0xad, 0x79, 0x6f, 0xbb, 0xda, 0xe, 0xf6, 0x22, 0x43, 0x97, 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x4, 0x12, 0xc6, 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d, 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x7, 0x66, 0xb2, 0x4a, 0x9e, 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b, 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, 0xd6, 0x2, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d, 0xd5, 0x1, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3}, + {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x4, 0x66, 0xb3, 0xa2, 0x77, 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26, 0xbf, 0x6a, 0x8, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c, 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0xc, 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x1, 0x10, 0xc5, 0xa7, 0x72, 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67, 0x5, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36, 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x9, 0x6b, 0xbe, 0xaf, 0x7a, 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b, 0xd, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e, 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4, 0xb5, 0x60, 0x2, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x6, 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac, 0xce, 0x1b, 0xa, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd, 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0xe, 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f, 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x3, 0x61, 0xb4, 0x43, 0x96, 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16, 0x7, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0xb, 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e, 0x7c, 0xa9, 0xb8, 0x6d, 0xf, 0xda, 0x2d, 0xf8, 0x9a, 0x4f, 0x5e, 0x8b, 0xe9, 0x3c}, + {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6, 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9, 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x7, 0xdf, 0x9, 0x6e, 0xb8, 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26, 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0xe, 0xd8, 0xa3, 0x75, 0x12, 0xc4, 0xdc, 0xa, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a, 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, 0x8c, 0x5a, 0xbc, 0x6a, 0xd, 0xdb, 0xc3, 0x15, 0x72, 0xa4, 0x7c, 0xaa, 0xcd, 0x1b, 0x3, 0xd5, 0xb2, 0x64, 0x82, 0x54, 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa, 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x4, 0x1c, 0xca, 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43, 0xa5, 0x73, 0x14, 0xc2, 0xda, 0xc, 0x6b, 0xbd, 0xba, 0x6c, 0xb, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23, 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x5, 0xd3, 0xb4, 0x62, 0x65, 0xb3, 0xd4, 0x2, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d, 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f, 0x87, 0x51, 0x36, 0xe0, 0x6, 0xd0, 0xb7, 0x61, 0x79, 0xaf, 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x1, 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1, 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0xf, 0x68, 0xbe, 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, 0x8, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20}, + {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95, 0x8a, 0x5d, 0x39, 0xee, 0x7, 0xd0, 0xb4, 0x63, 0x7c, 0xab, 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0, 0x9, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0xe, 0xd9, 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c, 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0xa, 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0xd, 0xe4, 0x33, 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78, 0x67, 0xb0, 0xd4, 0x3, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2, 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x4, 0xdb, 0xc, 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49, 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, 0xe2, 0x35, 0xdc, 0xb, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3, 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x5, 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x2, 0x66, 0xb1, 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27, 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x6, 0xd1, 0xc9, 0x1e, 0x7a, 0xad, 0xb2, 0x65, 0x1, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b, 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, 0xf, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e, 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17, 0x73, 0xa4, 0xbb, 0x6c, 0x8, 0xdf}, + {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc, 0x1, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33, 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x2, 0xda, 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9, 0xcb, 0x13, 0x66, 0xbe, 0x3, 0xdb, 0xae, 0x76, 0x44, 0x9c, 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf, 0x4, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52, 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x5, 0xdd, 0xa8, 0x70, 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, 0x61, 0xb9, 0x6, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34, 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x7, 0xdf, 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc, 0xce, 0x16, 0x63, 0xbb, 0x8, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4, 0x9, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f, 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0xa, 0xd2, 0xa7, 0x7f, 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, 0x6e, 0xb6, 0xb, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39, 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0xc, 0xd4, 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7, 0xc5, 0x1d, 0x68, 0xb0, 0xd, 0xd5, 0xa0, 0x78, 0x4a, 0x92, 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1, 0xe, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58, 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0xf, 0xd7, 0xa2, 0x7a, 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, 0x6b, 0xb3}, + {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8, 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1, 0xd4, 0xd, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, 0xce, 0x17, 0xa4, 0x7d, 0xb, 0xd2, 0xe7, 0x3e, 0x48, 0x91, 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x6, 0xb5, 0x6c, 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32, 0x7, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60, 0xd3, 0xa, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf, 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96, 0xa3, 0x7a, 0xc, 0xd5, 0x77, 0xae, 0xd8, 0x1, 0x34, 0xed, 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4, 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0xe, 0xd7, 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef, 0xda, 0x3, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, 0xf3, 0x2a, 0xaa, 0x73, 0x5, 0xdc, 0xe9, 0x30, 0x46, 0x9f, 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62, 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b, 0x7e, 0xa7, 0xd1, 0x8, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x9, 0xd0, 0xa6, 0x7f, 0xdd, 0x4, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82, 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98, 0xad, 0x74, 0x2, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca, 0x79, 0xa0, 0xd6, 0xf, 0x3a, 0xe3, 0x95, 0x4c}, + {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83, 0x42, 0x98, 0xeb, 0x31, 0xd, 0xd7, 0xa4, 0x7e, 0xdc, 0x6, 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10, 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8, 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f, 0xc, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48, 0x74, 0xae, 0xdd, 0x7, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64, 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x1, 0xdb, 0x79, 0xa3, 0xd0, 0xa, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66, 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x8, 0xaa, 0x70, 0x3, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d, 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba, 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x5, 0x39, 0xe3, 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0xe, 0xd4, 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x4, 0x77, 0xad, 0xf, 0xd5, 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3, 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, 0xc8, 0x12, 0xd3, 0x9, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef, 0x4d, 0x97, 0xe4, 0x3e, 0x2, 0xd8, 0xab, 0x71, 0xf2, 0x28, 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f, 0x23, 0xf9, 0x8a, 0x50}, + {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad, 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0xa, 0xa7, 0x7c, 0xc, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12, 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68, 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f, 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22, 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58, 0xa6, 0x7d, 0xd, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb, 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0xb, 0x97, 0x4c, 0x3c, 0xe7, 0xdc, 0x7, 0x77, 0xac, 0x1, 0xda, 0xaa, 0x71, 0x4a, 0x91, 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x5, 0x75, 0xae, 0x3, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f, 0xf, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42, 0x79, 0xa2, 0xd2, 0x9, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a, 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b, 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21, 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b, 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8, 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x8, 0xa5, 0x7e, 0xe, 0xd5, 0xee, 0x35, 0x45, 0x9e, 0x2, 0xd9, 0xa9, 0x72, 0x49, 0x92, 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x4, 0x74, 0xaf}, + {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38, 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x9, 0x70, 0xac, 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x2, 0xc3, 0x1f, 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14, 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99, 0x58, 0x84, 0xfd, 0x21, 0xf, 0xd3, 0xaa, 0x76, 0xf6, 0x2a, 0x53, 0x8f, 0xa1, 0x7d, 0x4, 0xd8, 0x9b, 0x47, 0x3e, 0xe2, 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, 0xc7, 0x1b, 0xda, 0x6, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4, 0x74, 0xa8, 0xd1, 0xd, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee, 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5, 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0xa, 0x24, 0xf8, 0x81, 0x5d, 0xdd, 0x1, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3, 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2, 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88, 0xa6, 0x7a, 0x3, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x8, 0xd4, 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x5, 0x85, 0x59, 0x20, 0xfc, 0xd2, 0xe, 0x77, 0xab, 0x6a, 0xb6, 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd, 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0xc, 0xd0, 0xfe, 0x22, 0x5b, 0x87, 0x7, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29, 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a, 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68}, + {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x2, 0xdf, 0xa5, 0x78, 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x3, 0xde, 0xa2, 0x7f, 0x5, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x4, 0xd9, 0xa3, 0x7e, 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, 0x7, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x6, 0xdb, 0xa1, 0x7c, 0x59, 0x84, 0xfe, 0x23, 0xa, 0xd7, 0xad, 0x70, 0xff, 0x22, 0x58, 0x85, 0xac, 0x71, 0xb, 0xd6, 0x8, 0xd5, 0xaf, 0x72, 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x9, 0xd4, 0xfd, 0x20, 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0xf, 0xd2, 0x5d, 0x80, 0xfa, 0x27, 0xe, 0xd3, 0xa9, 0x74, 0xaa, 0x77, 0xd, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0xc, 0xd1, 0xab, 0x76, 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d, 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98, 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a, 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68, 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36, 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37, 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35, 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32, 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62, 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, 0x4a, 0x97}, + {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf, 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x1, 0x7e, 0xa0, 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x3, 0x23, 0xfd, 0x82, 0x5c, 0xa3, 0x7d, 0x2, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3, 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6, 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, 0xd9, 0x7, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x6, 0xd8, 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85, 0xfa, 0x24, 0x4, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a, 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x5, 0x7a, 0xa4, 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0xf, 0x91, 0x4f, 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31, 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, 0xe, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd, 0x53, 0x8d, 0xf2, 0x2c, 0xc, 0xd2, 0xad, 0x73, 0x8c, 0x52, 0x2d, 0xf3, 0xd3, 0xd, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d, 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, 0x48, 0x96, 0x8, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28, 0xd7, 0x9, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7, 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0xb, 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35, 0xab, 0x75, 0xa, 0xd4, 0xf4, 0x2a, 0x55, 0x8b}, + {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0xd, 0x2a, 0xf5, 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0, 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b, 0xf7, 0x28, 0xf, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef, 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, 0xdd, 0x2, 0xd9, 0x6, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe, 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77, 0xb, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62, 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x9, 0x75, 0xaa, 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23, 0x5f, 0x80, 0xa7, 0x78, 0x4, 0xdb, 0xaf, 0x70, 0xc, 0xd3, 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, 0xe1, 0x3e, 0xde, 0x1, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9, 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92, 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87, 0xa0, 0x7f, 0x3, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0xe, 0x72, 0xad, 0x76, 0xa9, 0xd5, 0xa, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f, 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x7, 0xd8, 0xa4, 0x7b, 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3, 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x5, 0xe5, 0x3a, 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f, 0x8, 0xd7, 0xab, 0x74}, + {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9, 0xa6, 0x46, 0x7b, 0x9b, 0x1, 0xe1, 0xdc, 0x3c, 0xf5, 0x15, 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c, 0xf6, 0x16, 0x2b, 0xcb, 0x2, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d, 0xa4, 0x44, 0x79, 0x99, 0x3, 0xe3, 0xde, 0x3e, 0xa2, 0x42, 0x7f, 0x9f, 0x5, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc, 0x56, 0xb6, 0x8b, 0x6b, 0x4, 0xe4, 0xd9, 0x39, 0xa3, 0x43, 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd, 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40, 0x7d, 0x9d, 0x7, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68, 0xf2, 0x12, 0x2f, 0xcf, 0x6, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3, 0xa, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f, 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91, 0xb, 0xeb, 0xd6, 0x36, 0x8, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1, 0xae, 0x4e, 0x73, 0x93, 0x9, 0xe9, 0xd4, 0x34, 0xfd, 0x1d, 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6, 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0xf, 0xef, 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7, 0xe, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a, 0x77, 0x97, 0xd, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4, 0x5e, 0xbe, 0x83, 0x63, 0xc, 0xec, 0xd1, 0x31, 0xab, 0x4b, 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5}, + {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88, 0x15, 0xf4, 0xca, 0x2b, 0xed, 0xc, 0x32, 0xd3, 0x4e, 0xaf, 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0xd, 0xec, 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26, 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2, 0x3f, 0xde, 0xe0, 0x1, 0xe2, 0x3, 0x3d, 0xdc, 0x41, 0xa0, 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24, 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0xf, 0xee, 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad, 0x30, 0xd1, 0xef, 0xe, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8, 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x2, 0xe3, 0xd9, 0x38, 0x6, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc, 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0xa, 0x97, 0x76, 0x48, 0xa9, 0xa8, 0x49, 0x77, 0x96, 0xb, 0xea, 0xd4, 0x35, 0xf3, 0x12, 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20, 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x7, 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x5, 0x98, 0x79, 0x47, 0xa6, 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c, 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x9, 0xe8, 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x8, 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c, 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46, 0x78, 0x99, 0x4, 0xe5, 0xdb, 0x3a}, + {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12, 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3, 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69, 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, 0xe1, 0x3, 0xd4, 0x36, 0xd, 0xef, 0x7b, 0x99, 0xa2, 0x40, 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83, 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f, 0xb, 0xe9, 0xd2, 0x30, 0xe7, 0x5, 0x3e, 0xdc, 0x48, 0xaa, 0x91, 0x73, 0x33, 0xd1, 0xea, 0x8, 0x9c, 0x7e, 0x45, 0xa7, 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x6, 0xe4, 0xb5, 0x57, 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd, 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x9, 0x32, 0xd0, 0x7, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93, 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63, 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e, 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, 0x60, 0x82, 0xd3, 0x31, 0xa, 0xe8, 0x7c, 0x9e, 0xa5, 0x47, 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x4, 0x66, 0x84, 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e, 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x2, 0x39, 0xdb, 0x4f, 0xad, 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0xc, 0xee, 0xd5, 0x37, 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x1, 0xe3, 0x34, 0xd6, 0xed, 0xf, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca, 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, 0xc4, 0x26}, + {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8, 0x96, 0x75, 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x5, 0xdd, 0x3e, 0x6, 0xe5, 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x9, 0x9a, 0x79, 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0xa, 0xe9, 0xa7, 0x44, 0x7c, 0x9f, 0xc, 0xef, 0xd7, 0x34, 0xec, 0xf, 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a, 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67, 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0, 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20, 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d, 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c, 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a, 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66, 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80, 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10, 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0xd, 0xee, 0xd6, 0x35, 0xed, 0xe, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e, 0x30, 0xd3, 0xeb, 0x8, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98, 0xa0, 0x43, 0xd0, 0x33, 0xb, 0xe8, 0x97, 0x74, 0x4c, 0xaf, 0x3c, 0xdf, 0xe7, 0x4, 0xdc, 0x3f, 0x7, 0xe4, 0x77, 0x94, 0xac, 0x4f, 0x1, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92, 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x2, 0x3a, 0xd9}, + {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x2, 0x33, 0xd7, 0x51, 0xb5, 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13, 0xd1, 0x35, 0x4, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46, 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x6, 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x8, 0xec, 0xdd, 0x39, 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd, 0x8c, 0x68, 0xee, 0xa, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b, 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, 0xc, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b, 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0xe, 0xfb, 0x1f, 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52, 0xd4, 0x30, 0x1, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x3, 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56, 0x67, 0x83, 0x5, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0, 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x7, 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1, 0xdc, 0x38, 0x9, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b, 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0xb, 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, 0x2b, 0xcf, 0xd, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b, 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0xf, 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9, 0x2f, 0xcb, 0xfa, 0x1e}, + {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa, 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68, 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0xc, 0xf1, 0x14, 0x26, 0xc3, 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, 0xee, 0xb, 0x7, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86, 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a, 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6, 0x37, 0xd2, 0xe0, 0x5, 0x9, 0xec, 0xde, 0x3b, 0xba, 0x5f, 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3, 0xe, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90, 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca, 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, 0xe7, 0x2, 0xe3, 0x6, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62, 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0, 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c, 0xdd, 0x38, 0xa, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0xd, 0xe8, 0xe4, 0x1, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a, 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e, 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, 0x3, 0xe6, 0xea, 0xf, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b, 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x8, 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4, 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x4, 0xe1}, + {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb, 0xc6, 0x20, 0x17, 0xf1, 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19, 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1, 0x86, 0x60, 0xe8, 0xe, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x3, 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x8, 0x80, 0x66, 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x5, 0x32, 0xd4, 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c, 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99, 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x6, 0xe0, 0xb, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98, 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a, 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x7, 0xe1, 0xd6, 0x30, 0xdb, 0x3d, 0xa, 0xec, 0x64, 0x82, 0xb5, 0x53, 0xef, 0x9, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a, 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x4, 0x29, 0xcf, 0xf8, 0x1e, 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9, 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61, 0x56, 0xb0, 0x38, 0xde, 0xe9, 0xf, 0xe4, 0x2, 0x35, 0xd3, 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x1, 0xe7, 0x6f, 0x89, 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0xc, 0xea, 0xdd, 0x3b, 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93, 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd}, + {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4, 0xd6, 0x31, 0x5, 0xe2, 0x6d, 0x8a, 0xbe, 0x59, 0xbd, 0x5a, 0x6e, 0x89, 0x6, 0xe1, 0xd5, 0x32, 0xb1, 0x56, 0x62, 0x85, 0xa, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x9, 0xee, 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, 0xf, 0xe8, 0xc, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83, 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3, 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d, 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41, 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff, 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47, 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a, 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4, 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b, 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16, 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66, 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0xe, 0xea, 0xd, 0x39, 0xde, 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0xb, 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x8, 0x87, 0x60, 0x54, 0xb3, 0x30, 0xd7, 0xe3, 0x4, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc, 0x88, 0x6f, 0xe0, 0x7, 0x33, 0xd4, 0xe6, 0x1, 0x35, 0xd2, 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, 0xe5, 0x2}, + {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce, 0xeb, 0x3, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10, 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, 0x6, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd, 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x5, 0x20, 0xc8, 0x79, 0x91, 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd, 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0xc, 0xe4, 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c, 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0xf, 0xd4, 0x3c, 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0xa, 0xe2, 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, 0xb8, 0x50, 0xe1, 0x9, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43, 0x2d, 0xc5, 0xe0, 0x8, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6, 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0xb, 0xe3, 0xc6, 0x2e, 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0xe, 0x2b, 0xc3, 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf, 0x8a, 0x62, 0xc0, 0x28, 0xd, 0xe5, 0x54, 0xbc, 0x99, 0x71, 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x4, 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68, 0x4d, 0xa5, 0x7, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc, 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x2, 0x27, 0xcf, 0x6d, 0x85, 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d, 0xcc, 0x24, 0x1, 0xe9, 0x4b, 0xa3, 0x86, 0x6e}, + {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0xb, 0xae, 0x47, 0x61, 0x88, 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x6, 0x20, 0xc9, 0x77, 0x9e, 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c, 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, 0xd, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d, 0xc3, 0x2a, 0xc, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x7, 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3, 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0xa, 0x82, 0x6b, 0x4d, 0xa4, 0x1, 0xe8, 0xce, 0x27, 0x99, 0x70, 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b, 0x2e, 0xc7, 0xe1, 0x8, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e, 0x80, 0x69, 0x4f, 0xa6, 0x3, 0xea, 0xcc, 0x25, 0xc1, 0x28, 0xe, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc, 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, 0xbb, 0x52, 0xec, 0x5, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49, 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87, 0xa1, 0x48, 0xed, 0x4, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65, 0xc0, 0x29, 0xf, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc, 0x2, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6, 0xe0, 0x9, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12, 0xb7, 0x5e, 0x78, 0x91}, + {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf, 0x6, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x5, 0xef, 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0xc, 0xe6, 0xc5, 0x2f, 0x83, 0x69, 0x4a, 0xa0, 0xf, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, 0x49, 0xa3, 0xa, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6, 0x9, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2, 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38, 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1, 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd, 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31, 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c, 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc, 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16, 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93, 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3, 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0xb, 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x8, 0xa4, 0x4e, 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0xd, 0xa1, 0x4b, 0x68, 0x82, 0x2d, 0xc7, 0xe4, 0xe, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce, 0xed, 0x7, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x4, 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x1, 0xad, 0x47, 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x2, 0xae, 0x44, 0x67, 0x8d}, + {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36, 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0xc, 0xa7, 0x4c, 0x6c, 0x87, 0x27, 0xcc, 0xec, 0x7, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1, 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11, 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8, 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0xe, 0xe5, 0x45, 0xae, 0x8e, 0x65, 0xce, 0x25, 0x5, 0xee, 0x74, 0x9f, 0xbf, 0x54, 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x2, 0x22, 0xc9, 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x9, 0x29, 0xc2, 0xb0, 0x5b, 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b, 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, 0xe6, 0xd, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x6, 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c, 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa, 0x1, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0xa, 0xe1, 0xc1, 0x2a, 0xe8, 0x3, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43, 0xe3, 0x8, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15, 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5, 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0xf, 0xe4, 0x4f, 0xa4, 0x84, 0x6f, 0xcf, 0x24, 0x4, 0xef, 0x44, 0xaf, 0x8f, 0x64, 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32, 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72}, + {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8, 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x7, 0xeb, 0xcc, 0x20, 0x9, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6, 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0xe, 0xe2, 0xcb, 0x27, 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a, 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x8, 0xe3, 0xf, 0x26, 0xca, 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7, 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x1, 0x28, 0xc4, 0x2f, 0xc3, 0xea, 0x6, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35, 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0xd, 0xb3, 0x5f, 0x76, 0x9a, 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0xa, 0x23, 0xcf, 0x42, 0xae, 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2, 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x4, 0x2d, 0xc1, 0x7f, 0x93, 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x3, 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e, 0x57, 0xbb, 0x5, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88, 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, 0xa6, 0x4a, 0xc7, 0x2b, 0x2, 0xee, 0x50, 0xbc, 0x95, 0x79, 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0xc, 0xe0, 0x6d, 0x81, 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11, 0xaf, 0x43, 0x6a, 0x86, 0xb, 0xe7, 0xce, 0x22, 0x9c, 0x70, 0x59, 0xb5}, + {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b, 0xb1, 0x5c, 0xe5, 0x8, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67, 0xde, 0x33, 0x19, 0xf4, 0xec, 0x1, 0x2b, 0xc6, 0x7f, 0x92, 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e, 0x9a, 0x77, 0x5d, 0xb0, 0x9, 0xe4, 0xce, 0x23, 0xa1, 0x4c, 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x2, 0xef, 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0xa, 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4, 0xee, 0x3, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38, 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, 0xb, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd, 0x97, 0x7a, 0x50, 0xbd, 0x4, 0xe9, 0xc3, 0x2e, 0xac, 0x41, 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0xc, 0x26, 0xcb, 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x5, 0x2f, 0xc2, 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0xd, 0xe0, 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c, 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, 0x6, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0, 0x24, 0xc9, 0xe3, 0xe, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2, 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94, 0x2d, 0xc0, 0xea, 0x7, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, 0xd1, 0x3c, 0xc8, 0x25, 0xf, 0xe2, 0x5b, 0xb6, 0x9c, 0x71, 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a}, + {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5, 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41, 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0xb, 0xe5, 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x7, 0x28, 0xc6, 0x76, 0x98, 0xb7, 0x59, 0x5, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5, 0x26, 0xc8, 0xe7, 0x9, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad, 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f, 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a, 0xcf, 0x21, 0xe, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x2, 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0xa, 0xe4, 0xcb, 0x25, 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x6, 0xb6, 0x58, 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc, 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68, 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a, 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x1, 0xef, 0x5f, 0xb1, 0x9e, 0x70, 0xe3, 0xd, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53, 0xf, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2, 0xed, 0x3, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66, 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33, 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b, 0x4, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x8, 0x27, 0xc9, 0x79, 0x97, 0xb8, 0x56}, + {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c, 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0xe, 0xe1, 0x7d, 0x92, 0xbe, 0x51, 0xe6, 0x9, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80, 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d, 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa, 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42, 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f, 0xe9, 0x6, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d, 0x1, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93, 0x24, 0xcb, 0xe7, 0x8, 0x94, 0x7b, 0x57, 0xb8, 0xf, 0xe0, 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d, 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33, 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb, 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0xa, 0xbd, 0x52, 0x7e, 0x91, 0xd, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba, 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x4, 0x28, 0xc7, 0x5b, 0xb4, 0x98, 0x77, 0xc0, 0x2f, 0x3, 0xec, 0xcf, 0x20, 0xc, 0xe3, 0x54, 0xbb, 0x97, 0x78, 0xe4, 0xb, 0x27, 0xc8, 0x7f, 0x90, 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x2, 0xed, 0xc1, 0x2e, 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x5, 0x63, 0x8c, 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64, 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9}, + {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6, 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81, 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20, 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x3, 0xf3, 0xfe, 0xe, 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, 0xcc, 0x3c, 0x5, 0xf5, 0xf8, 0x8, 0xe2, 0x12, 0x1f, 0xef, 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e, 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0, 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, 0x74, 0x84, 0x6, 0xf6, 0xfb, 0xb, 0xe1, 0x11, 0x1c, 0xec, 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41, 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f, 0x85, 0x75, 0x78, 0x88, 0xa, 0xfa, 0xf7, 0x7, 0xed, 0x1d, 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33, 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x9, 0xf9, 0xf4, 0x4, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c, 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d, 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c, 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0xf, 0xff, 0xf2, 0x2, 0xe8, 0x18, 0x15, 0xe5, 0xc, 0xfc, 0xf1, 0x1, 0xeb, 0x1b, 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35, 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94, 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e}, + {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46, 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba, 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e, 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x3, 0xf2, 0xfc, 0xd, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6, 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc, 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33, 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x5, 0xf4, 0xfa, 0xb, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30, 0x6, 0xf7, 0xf9, 0x8, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87, 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3, 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x9, 0xf8, 0xf6, 0x7, 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0, 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74, 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c, 0xa, 0xfb, 0xf5, 0x4, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56, 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72, 0x9f, 0x6e, 0x60, 0x91, 0xc, 0xfd, 0xf3, 0x2, 0xef, 0x1e, 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a, 0xec, 0x1d, 0x13, 0xe2, 0xf, 0xfe, 0xf0, 0x1, 0x37, 0xc6, 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49, 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, 0x80, 0x71}, + {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69, 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53, 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x7, 0xf5, 0xfe, 0xc, 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81, 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d, 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29, 0xe, 0xfc, 0xf7, 0x5, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f, 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5, 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x9, 0xfb, 0xf0, 0x2, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1, 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d, 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c, 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x6, 0xd, 0xff, 0x1b, 0xe9, 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63, 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee, 0xe5, 0x17, 0xf3, 0x1, 0xa, 0xf8, 0xdf, 0x2d, 0x26, 0xd4, 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x8, 0x3, 0xf1, 0x15, 0xe7, 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd, 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50, 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda, 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0xf, 0x4, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae, 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d}, + {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3, 0xb, 0xf8, 0xf0, 0x3, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33, 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88, 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0xe, 0x6, 0xf5, 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e, 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e, 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x5, 0xd, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35, 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae, 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24, 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0xc, 0xff, 0xf7, 0x4, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44, 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4, 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4, 0x7, 0xf4, 0xfc, 0xf, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84, 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x2, 0xa, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9, 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2, 0xfa, 0x9, 0x1, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49, 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79, 0x9a, 0x69, 0x61, 0x92}, + {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5, 0xfb, 0xf, 0xe, 0xfa, 0xc, 0xf8, 0xf9, 0xd, 0x8, 0xfc, 0xfd, 0x9, 0xff, 0xb, 0xa, 0xfe, 0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55}, + {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa, 0xeb, 0x1e, 0x1c, 0xe9, 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a, 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5, 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9, 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81, 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e, 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42, 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a, 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0xb, 0xfe, 0xfc, 0x9, 0xf8, 0xd, 0xf, 0xfa, 0xf0, 0x5, 0x7, 0xf2, 0x3, 0xf6, 0xf4, 0x1, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea, 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce, 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29, 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71, 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e, 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92, 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41, 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae, 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa}, + {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23, 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d, 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f, 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b, 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e, 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97, 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18, 0x3, 0xf5, 0xf2, 0x4, 0xfc, 0xa, 0xd, 0xfb, 0x3b, 0xcd, 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf, 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d, 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58, 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a, 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e, 0x5, 0xf3, 0xf4, 0x2, 0xfa, 0xc, 0xb, 0xfd, 0xdd, 0x2b, 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39, 0xc1, 0x37, 0x30, 0xc6, 0x6, 0xf0, 0xf7, 0x1, 0xf9, 0xf, 0x8, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d, 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63, 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa, 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, 0x40, 0xb6}, + {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c, 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24, 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f, 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c, 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0xb, 0xfc, 0xf8, 0xf, 0xf0, 0x7, 0x3, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f, 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77, 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f, 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f, 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0xa, 0xe, 0xf9, 0x6, 0xf1, 0xf5, 0x2, 0xdd, 0x2a, 0x2e, 0xd9, 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62, 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1, 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9, 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, 0x15, 0xe2, 0xf6, 0x1, 0x5, 0xf2, 0xd, 0xfa, 0xfe, 0x9, 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca, 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92, 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2, 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49}, + {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x4, 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a, 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d, 0x8, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58, 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, 0xf4, 0xc, 0xec, 0x14, 0x1, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e, 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f, 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x5, 0x44, 0xbc, 0xa9, 0x51, 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, 0xb0, 0x48, 0x9, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb, 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca, 0xdf, 0x27, 0xf5, 0xd, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0, 0x2, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, 0x7c, 0x84, 0xfe, 0x6, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c, 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b, 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35, 0xe7, 0x1f, 0xa, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0xe, 0xdc, 0x24, 0x31, 0xc9, 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x3, 0xfb, 0xba, 0x42, 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x7, 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d, 0xcc, 0x34, 0x21, 0xd9, 0xb, 0xf3, 0xe6, 0x1e, 0x64, 0x9c, 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0xf, 0x1a, 0xe2, 0x30, 0xc8, 0xdd, 0x25}, + {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e, 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x7, 0xfe, 0xb0, 0x49, 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40, 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0xe, 0xf7, 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8, 0xe6, 0x1f, 0x9, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55, 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21, 0xf4, 0xd, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0xa, 0xdf, 0x26, 0x30, 0xc9, 0xfa, 0x3, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98, 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7, 0x12, 0xeb, 0xfd, 0x4, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90, 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0xb, 0x6e, 0x97, 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0xc, 0x1a, 0xe3, 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x5, 0xd0, 0x29, 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d, 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x2, 0x14, 0xed, 0xa3, 0x5a, 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x6, 0xff, 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x1, 0xf8, 0xee, 0x17, 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46, 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32, 0xe7, 0x1e, 0x8, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, 0xb8, 0x41, 0xf, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda}, + {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x8, 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x3, 0x10, 0xea, 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7, 0xc4, 0x3e, 0xe2, 0x18, 0xb, 0xf1, 0xae, 0x54, 0x47, 0xbd, 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, 0x4a, 0xb0, 0xef, 0x15, 0x6, 0xfc, 0x20, 0xda, 0xc9, 0x33, 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0xe, 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49, 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, 0xff, 0x5, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d, 0xc2, 0x38, 0x2b, 0xd1, 0xd, 0xf7, 0xe4, 0x1e, 0xd8, 0x22, 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x4, 0x5b, 0xa1, 0xb2, 0x48, 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0xc, 0xf6, 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c, 0xee, 0x14, 0x7, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97, 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0xf, 0x1c, 0xe6, 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68, 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x2, 0x11, 0xeb, 0xaf, 0x55, 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f, 0xe3, 0x19, 0xa, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, 0xa4, 0x5e, 0x1, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd, 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0, 0xf3, 0x9, 0xd5, 0x2f, 0x3c, 0xc6}, + {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50, 0xb, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0, 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed, 0xfd, 0x6, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d, 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0xd, 0xd6, 0x2d, 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d, 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0xc, 0xf7, 0xa7, 0x5c, 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37, 0xec, 0x17, 0x7, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0xa, 0x1a, 0xe1, 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca, 0xda, 0x21, 0xfa, 0x1, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa, 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x8, 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23, 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x3, 0x4e, 0xb5, 0xa5, 0x5e, 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0xe, 0xf5, 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e, 0xce, 0x35, 0x25, 0xde, 0x5, 0xfe, 0xee, 0x15, 0x74, 0x8f, 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x4, 0x14, 0xef, 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, 0x5f, 0xa4, 0xf4, 0xf, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f, 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12, 0x2, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79, 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x9, 0xf2, 0x29, 0xd2, 0xc2, 0x39}, + {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87, 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1, 0x1f, 0xe3, 0xfa, 0x6, 0xf6, 0xa, 0x13, 0xef, 0x21, 0xdd, 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b, 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2, 0xdb, 0x27, 0xe9, 0x15, 0xc, 0xf0, 0xf1, 0xd, 0x14, 0xe8, 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44, 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0xb, 0xf7, 0x7, 0xfb, 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad, 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x1, 0xff, 0x3, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0, 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d, 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, 0x5, 0xf9, 0x9, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7, 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e, 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8, 0x16, 0xea, 0xf3, 0xf, 0xe, 0xf2, 0xeb, 0x17, 0xd9, 0x25, 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73, 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a, 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x8, 0xf8, 0x4, 0x1d, 0xe1, 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d, 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x2, 0xfe}, + {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x3, 0xfe, 0xe4, 0x19, 0xd6, 0x2b, 0x31, 0xcc, 0x5, 0xf8, 0xe2, 0x1f, 0x6d, 0x90, 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7, 0x6e, 0x93, 0x89, 0x74, 0x6, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78, 0xa, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27, 0x3d, 0xc0, 0x9, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b, 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0xf, 0xf2, 0xe8, 0x15, 0xc, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a, 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65, 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, 0xf0, 0xd, 0x14, 0xe9, 0xf3, 0xe, 0xc7, 0x3a, 0x20, 0xdd, 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54, 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x8, 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, 0xf6, 0xb, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0, 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x7, 0x75, 0x88, 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf, 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x4, 0xcd, 0x30, 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x2, 0xcb, 0x36, 0x2c, 0xd1, 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e, 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2, 0x1b, 0xe6, 0xfc, 0x1}, + {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63, 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x6, 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9, 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0xa, 0xca, 0x34, 0x2b, 0xd5, 0xed, 0x13, 0xc, 0xf2, 0x32, 0xcc, 0xd3, 0x2d, 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f, 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd, 0xd, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0xb, 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49, 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x7, 0x64, 0x9a, 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83, 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, 0x1, 0xff, 0xe2, 0x1c, 0x3, 0xfd, 0x3d, 0xc3, 0xdc, 0x22, 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47, 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x5, 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, 0x6a, 0x94, 0xf7, 0x9, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37, 0xf, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52, 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c, 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, 0xe, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x8, 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb, 0xc4, 0x3a, 0xfa, 0x4, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99, 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x2, 0xfc, 0xe3, 0x1d}, + {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57, 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x3, 0xfc, 0x3b, 0xc4, 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51, 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x5, 0xfa, 0xdd, 0x22, 0x3e, 0xc1, 0x6, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a, 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, 0x9, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d, 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e, 0x32, 0xcd, 0xa, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb, 0x7c, 0x83, 0x9f, 0x60, 0xc, 0xf3, 0xef, 0x10, 0xd7, 0x28, 0x34, 0xcb, 0xec, 0x13, 0xf, 0xf0, 0x37, 0xc8, 0xd4, 0x2b, 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d, 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5, 0x12, 0xed, 0xf1, 0xe, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0xd, 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45, 0xf4, 0xb, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0, 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3, 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x8, 0xcf, 0x30, 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94, 0xf8, 0x7, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7, 0xfb, 0x4, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf, 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, 0xfd, 0x2, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9, 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda, 0xc6, 0x39, 0xfe, 0x1, 0x1d, 0xe2}} + +var mulTableLow = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}, + {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e}, + {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11}, + {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c}, + {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33}, + {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22}, + {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d}, + {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78}, + {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77}, + {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66}, + {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69}, + {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44}, + {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b}, + {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a}, + {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55}, + {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0}, + {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}, + {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee}, + {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1}, + {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc}, + {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3}, + {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2}, + {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd}, + {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88}, + {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87}, + {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96}, + {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99}, + {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4}, + {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb}, + {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa}, + {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5}, + {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd}, + {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2}, + {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3}, + {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec}, + {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1}, + {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce}, + {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf}, + {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0}, + {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85}, + {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a}, + {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b}, + {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94}, + {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9}, + {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6}, + {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7}, + {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8}, + {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd}, + {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2}, + {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13}, + {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c}, + {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31}, + {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e}, + {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f}, + {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20}, + {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75}, + {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a}, + {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b}, + {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64}, + {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49}, + {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46}, + {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57}, + {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58}, + {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7}, + {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8}, + {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9}, + {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6}, + {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb}, + {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4}, + {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5}, + {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca}, + {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f}, + {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90}, + {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81}, + {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e}, + {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3}, + {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac}, + {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd}, + {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2}, + {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17}, + {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18}, + {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9}, + {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6}, + {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b}, + {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24}, + {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35}, + {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a}, + {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f}, + {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60}, + {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71}, + {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e}, + {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53}, + {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c}, + {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d}, + {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42}, + {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a}, + {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15}, + {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4}, + {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb}, + {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26}, + {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29}, + {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38}, + {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37}, + {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62}, + {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d}, + {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c}, + {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73}, + {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e}, + {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51}, + {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40}, + {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f}, + {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea}, + {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5}, + {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4}, + {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb}, + {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6}, + {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9}, + {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8}, + {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7}, + {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92}, + {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d}, + {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c}, + {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83}, + {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae}, + {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1}, + {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0}, + {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf}, + {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3}, + {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc}, + {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd}, + {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2}, + {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef}, + {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0}, + {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1}, + {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe}, + {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab}, + {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4}, + {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5}, + {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba}, + {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97}, + {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98}, + {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89}, + {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86}, + {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23}, + {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c}, + {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d}, + {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32}, + {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f}, + {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10}, + {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1}, + {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe}, + {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b}, + {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54}, + {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45}, + {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a}, + {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67}, + {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68}, + {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79}, + {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76}, + {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e}, + {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21}, + {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30}, + {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f}, + {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12}, + {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d}, + {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc}, + {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3}, + {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56}, + {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59}, + {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48}, + {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47}, + {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a}, + {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65}, + {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74}, + {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b}, + {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde}, + {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1}, + {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0}, + {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf}, + {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2}, + {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed}, + {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc}, + {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3}, + {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6}, + {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9}, + {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8}, + {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7}, + {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a}, + {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95}, + {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84}, + {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b}, + {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34}, + {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b}, + {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a}, + {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25}, + {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8}, + {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7}, + {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16}, + {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19}, + {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c}, + {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43}, + {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52}, + {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d}, + {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70}, + {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f}, + {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e}, + {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61}, + {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4}, + {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb}, + {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda}, + {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5}, + {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8}, + {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7}, + {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6}, + {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9}, + {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc}, + {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3}, + {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2}, + {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad}, + {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80}, + {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f}, + {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e}, + {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91}, + {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9}, + {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6}, + {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7}, + {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8}, + {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5}, + {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa}, + {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb}, + {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4}, + {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1}, + {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe}, + {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf}, + {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0}, + {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d}, + {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82}, + {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93}, + {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c}, + {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39}, + {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36}, + {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27}, + {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28}, + {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5}, + {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa}, + {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b}, + {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14}, + {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41}, + {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e}, + {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f}, + {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50}, + {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d}, + {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72}, + {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63}, + {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c}} +var mulTableHigh = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0}, + {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd}, + {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd}, + {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7}, + {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17}, + {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a}, + {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea}, + {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3}, + {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23}, + {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e}, + {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde}, + {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34}, + {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4}, + {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9}, + {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39}, + {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb}, + {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b}, + {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46}, + {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6}, + {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c}, + {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac}, + {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1}, + {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51}, + {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68}, + {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98}, + {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95}, + {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65}, + {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f}, + {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f}, + {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72}, + {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82}, + {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b}, + {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b}, + {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96}, + {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66}, + {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c}, + {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c}, + {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71}, + {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81}, + {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8}, + {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48}, + {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45}, + {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5}, + {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f}, + {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf}, + {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2}, + {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52}, + {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0}, + {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20}, + {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d}, + {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd}, + {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37}, + {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7}, + {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca}, + {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a}, + {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3}, + {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3}, + {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe}, + {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe}, + {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4}, + {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14}, + {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19}, + {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9}, + {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6}, + {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26}, + {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b}, + {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb}, + {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31}, + {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1}, + {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc}, + {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c}, + {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5}, + {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5}, + {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8}, + {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8}, + {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2}, + {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12}, + {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f}, + {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef}, + {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d}, + {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d}, + {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90}, + {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60}, + {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a}, + {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a}, + {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77}, + {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87}, + {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe}, + {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e}, + {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43}, + {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3}, + {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59}, + {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9}, + {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4}, + {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54}, + {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd}, + {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d}, + {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40}, + {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0}, + {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a}, + {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa}, + {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7}, + {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57}, + {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e}, + {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e}, + {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93}, + {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63}, + {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89}, + {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79}, + {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74}, + {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84}, + {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6}, + {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6}, + {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb}, + {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb}, + {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1}, + {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11}, + {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c}, + {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec}, + {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5}, + {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25}, + {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28}, + {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8}, + {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32}, + {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2}, + {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf}, + {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f}, + {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1}, + {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41}, + {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c}, + {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc}, + {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56}, + {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6}, + {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab}, + {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b}, + {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62}, + {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92}, + {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f}, + {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f}, + {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85}, + {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75}, + {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78}, + {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88}, + {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa}, + {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa}, + {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7}, + {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7}, + {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed}, + {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d}, + {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10}, + {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0}, + {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9}, + {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29}, + {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24}, + {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4}, + {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e}, + {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce}, + {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3}, + {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33}, + {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda}, + {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a}, + {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27}, + {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7}, + {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d}, + {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd}, + {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0}, + {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30}, + {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9}, + {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9}, + {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4}, + {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4}, + {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee}, + {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e}, + {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13}, + {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3}, + {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61}, + {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91}, + {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c}, + {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c}, + {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86}, + {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76}, + {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b}, + {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b}, + {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2}, + {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42}, + {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f}, + {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf}, + {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55}, + {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5}, + {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8}, + {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58}, + {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67}, + {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97}, + {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a}, + {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a}, + {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80}, + {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70}, + {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d}, + {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d}, + {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4}, + {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44}, + {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49}, + {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9}, + {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53}, + {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3}, + {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae}, + {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e}, + {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc}, + {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c}, + {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21}, + {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1}, + {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b}, + {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb}, + {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6}, + {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36}, + {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf}, + {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}, + {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2}, + {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2}, + {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8}, + {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18}, + {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15}, + {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5}, + {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc}, + {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc}, + {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1}, + {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1}, + {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb}, + {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b}, + {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16}, + {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6}, + {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf}, + {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f}, + {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22}, + {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2}, + {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38}, + {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8}, + {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5}, + {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35}, + {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7}, + {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47}, + {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a}, + {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba}, + {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50}, + {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0}, + {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad}, + {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d}, + {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64}, + {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94}, + {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99}, + {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69}, + {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83}, + {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73}, + {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e}, + {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e}} + +// galMultiply multiplies to elements of the field. +// Uses lookup table ~40% faster +func galMultiply(a, b byte) byte { + return mulTable[a][b] +} + +// Original function: +/* +// galMultiply multiplies to elements of the field. +func galMultiply(a, b byte) byte { + if a == 0 || b == 0 { + return 0 + } + logA := int(logTable[a]) + logB := int(logTable[b]) + return expTable[logA+logB] +} +*/ + +// galDivide is inverse of galMultiply. +func galDivide(a, b byte) byte { + if a == 0 { + return 0 + } + if b == 0 { + panic("Argument 'divisor' is 0") + } + logA := int(logTable[a]) + logB := int(logTable[b]) + logResult := logA - logB + if logResult < 0 { + logResult += 255 + } + return expTable[logResult] +} + +// Computes a**n. +// +// The result will be the same as multiplying a times itself n times. +func galExp(a byte, n int) byte { + if n == 0 { + return 1 + } + if a == 0 { + return 0 + } + + logA := logTable[a] + logResult := int(logA) * n + for logResult >= 255 { + logResult -= 255 + } + return expTable[logResult] +} + +func genAvx2Matrix(matrixRows [][]byte, inputs, outputs int, dst []byte) []byte { + if !avx2CodeGen { + panic("codegen not enabled") + } + total := inputs * outputs + + // Duplicated in+out + wantBytes := total * 32 * 2 + if cap(dst) < wantBytes { + dst = make([]byte, wantBytes) + } else { + dst = dst[:wantBytes] + } + for i, row := range matrixRows[:outputs] { + for j, idx := range row[:inputs] { + dstIdx := (j*outputs + i) * 64 + lo := mulTableLow[idx][:] + hi := mulTableHigh[idx][:] + copy(dst[dstIdx:], lo) + copy(dst[dstIdx+16:], lo) + copy(dst[dstIdx+32:], hi) + copy(dst[dstIdx+48:], hi) + } + } + return dst +} diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go new file mode 100644 index 0000000..720196f --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go @@ -0,0 +1,338 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2019, Minio, Inc. + +package reedsolomon + +import ( + "sync" +) + +//go:noescape +func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool) + +//go:noescape +func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool) + +//go:noescape +func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool) + +const ( + dimIn = 8 // Number of input rows processed simultaneously + dimOut81 = 1 // Number of output rows processed simultaneously for x1 routine + dimOut82 = 2 // Number of output rows processed simultaneously for x2 routine + dimOut84 = 4 // Number of output rows processed simultaneously for x4 routine + matrixSize81 = (16 + 16) * dimIn * dimOut81 // Dimension of slice of matrix coefficient passed into x1 routine + matrixSize82 = (16 + 16) * dimIn * dimOut82 // Dimension of slice of matrix coefficient passed into x2 routine + matrixSize84 = (16 + 16) * dimIn * dimOut84 // Dimension of slice of matrix coefficient passed into x4 routine +) + +// Construct block of matrix coefficients for single output row in parallel +func setupMatrix81(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize81]byte) { + offset := 0 + for c := inputOffset; c < inputOffset+dimIn; c++ { + for iRow := outputOffset; iRow < outputOffset+dimOut81; iRow++ { + if c < len(matrixRows[iRow]) { + coeff := matrixRows[iRow][c] + copy(matrix[offset*32:], mulTableLow[coeff][:]) + copy(matrix[offset*32+16:], mulTableHigh[coeff][:]) + } else { + // coefficients not used for this input shard (so null out) + v := matrix[offset*32 : offset*32+32] + for i := range v { + v[i] = 0 + } + } + offset += dimIn + if offset >= dimIn*dimOut81 { + offset -= dimIn*dimOut81 - 1 + } + } + } +} + +// Construct block of matrix coefficients for 2 output rows in parallel +func setupMatrix82(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize82]byte) { + offset := 0 + for c := inputOffset; c < inputOffset+dimIn; c++ { + for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ { + if c < len(matrixRows[iRow]) { + coeff := matrixRows[iRow][c] + copy(matrix[offset*32:], mulTableLow[coeff][:]) + copy(matrix[offset*32+16:], mulTableHigh[coeff][:]) + } else { + // coefficients not used for this input shard (so null out) + v := matrix[offset*32 : offset*32+32] + for i := range v { + v[i] = 0 + } + } + offset += dimIn + if offset >= dimIn*dimOut82 { + offset -= dimIn*dimOut82 - 1 + } + } + } +} + +// Construct block of matrix coefficients for 4 output rows in parallel +func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize84]byte) { + offset := 0 + for c := inputOffset; c < inputOffset+dimIn; c++ { + for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ { + if c < len(matrixRows[iRow]) { + coeff := matrixRows[iRow][c] + copy(matrix[offset*32:], mulTableLow[coeff][:]) + copy(matrix[offset*32+16:], mulTableHigh[coeff][:]) + } else { + // coefficients not used for this input shard (so null out) + v := matrix[offset*32 : offset*32+32] + for i := range v { + v[i] = 0 + } + } + offset += dimIn + if offset >= dimIn*dimOut84 { + offset -= dimIn*dimOut84 - 1 + } + } + } +} + +// Invoke AVX512 routine for single output row in parallel +func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) { + done := stop - start + if done <= 0 { + return + } + + inputEnd := inputOffset + dimIn + if inputEnd > len(in) { + inputEnd = len(in) + } + outputEnd := outputOffset + dimOut81 + if outputEnd > len(out) { + outputEnd = len(out) + } + + // We know the max size, alloc temp array. + var inTmp [dimIn][]byte + for i, v := range in[inputOffset:inputEnd] { + inTmp[i] = v[start:stop] + } + var outTmp [dimOut81][]byte + for i, v := range out[outputOffset:outputEnd] { + outTmp[i] = v[start:stop] + } + + addTo := inputOffset != 0 // Except for the first input column, add to previous results + _galMulAVX512Parallel81(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix81, addTo) + + done = start + ((done >> 6) << 6) + if done < stop { + galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in) + } +} + +// Invoke AVX512 routine for 2 output rows in parallel +func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) { + done := stop - start + if done <= 0 { + return + } + + inputEnd := inputOffset + dimIn + if inputEnd > len(in) { + inputEnd = len(in) + } + outputEnd := outputOffset + dimOut82 + if outputEnd > len(out) { + outputEnd = len(out) + } + + // We know the max size, alloc temp array. + var inTmp [dimIn][]byte + for i, v := range in[inputOffset:inputEnd] { + inTmp[i] = v[start:stop] + } + var outTmp [dimOut82][]byte + for i, v := range out[outputOffset:outputEnd] { + outTmp[i] = v[start:stop] + } + + addTo := inputOffset != 0 // Except for the first input column, add to previous results + _galMulAVX512Parallel82(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix82, addTo) + + done = start + ((done >> 6) << 6) + if done < stop { + galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in) + } +} + +// Invoke AVX512 routine for 4 output rows in parallel +func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) { + done := stop - start + if done <= 0 { + return + } + + inputEnd := inputOffset + dimIn + if inputEnd > len(in) { + inputEnd = len(in) + } + outputEnd := outputOffset + dimOut84 + if outputEnd > len(out) { + outputEnd = len(out) + } + + // We know the max size, alloc temp array. + var inTmp [dimIn][]byte + for i, v := range in[inputOffset:inputEnd] { + inTmp[i] = v[start:stop] + } + var outTmp [dimOut84][]byte + for i, v := range out[outputOffset:outputEnd] { + outTmp[i] = v[start:stop] + } + + addTo := inputOffset != 0 // Except for the first input column, add to previous results + _galMulAVX512Parallel84(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix84, addTo) + + done = start + ((done >> 6) << 6) + if done < stop { + galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in) + } +} + +func galMulAVX512LastInput(inputOffset int, inputEnd int, outputOffset int, outputEnd int, matrixRows [][]byte, done int, stop int, out [][]byte, in [][]byte) { + for c := inputOffset; c < inputEnd; c++ { + for iRow := outputOffset; iRow < outputEnd; iRow++ { + if c < len(matrixRows[iRow]) { + mt := mulTable[matrixRows[iRow][c]][:256] + for i := done; i < stop; i++ { + if c == 0 { // only set value for first input column + out[iRow][i] = mt[in[c][i]] + } else { // and add for all others + out[iRow][i] ^= mt[in[c][i]] + } + } + } + } + } +} + +// Perform the same as codeSomeShards, but taking advantage of +// AVX512 parallelism for up to 4x faster execution as compared to AVX2 +func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) { + // Process using no goroutines + start, end := 0, r.o.perRound + if end > byteCount { + end = byteCount + } + for start < byteCount { + matrix84 := [matrixSize84]byte{} + matrix82 := [matrixSize82]byte{} + matrix81 := [matrixSize81]byte{} + + outputRow := 0 + // First process (multiple) batches of 4 output rows in parallel + if outputRow+dimOut84 <= outputCount { + for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 { + for inputRow := 0; inputRow < len(inputs); inputRow += dimIn { + setupMatrix84(matrixRows, inputRow, outputRow, &matrix84) + galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix84) + } + } + } + // Then process a (single) batch of 2 output rows in parallel + if outputRow+dimOut82 <= outputCount { + for inputRow := 0; inputRow < len(inputs); inputRow += dimIn { + setupMatrix82(matrixRows, inputRow, outputRow, &matrix82) + galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix82) + } + outputRow += dimOut82 + } + // Lastly, we may have a single output row left (for uneven parity) + if outputRow < outputCount { + for inputRow := 0; inputRow < len(inputs); inputRow += dimIn { + setupMatrix81(matrixRows, inputRow, outputRow, &matrix81) + galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix81) + } + } + + start = end + end += r.o.perRound + if end > byteCount { + end = byteCount + } + } +} + +// Perform the same as codeSomeShards, but taking advantage of +// AVX512 parallelism for up to 4x faster execution as compared to AVX2 +func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) { + var wg sync.WaitGroup + do := byteCount / r.o.maxGoroutines + if do < r.o.minSplitSize { + do = r.o.minSplitSize + } + // Make sizes divisible by 64 + do = (do + 63) & (^63) + start := 0 + for start < byteCount { + if start+do > byteCount { + do = byteCount - start + } + wg.Add(1) + go func(grStart, grStop int) { + start, stop := grStart, grStart+r.o.perRound + if stop > grStop { + stop = grStop + } + // Loop for each round. + matrix84 := [matrixSize84]byte{} + matrix82 := [matrixSize82]byte{} + matrix81 := [matrixSize81]byte{} + for start < grStop { + outputRow := 0 + // First process (multiple) batches of 4 output rows in parallel + if outputRow+dimOut84 <= outputCount { + // 1K matrix buffer + for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 { + for inputRow := 0; inputRow < len(inputs); inputRow += dimIn { + setupMatrix84(matrixRows, inputRow, outputRow, &matrix84) + galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix84) + } + } + } + // Then process a (single) batch of 2 output rows in parallel + if outputRow+dimOut82 <= outputCount { + // 512B matrix buffer + for inputRow := 0; inputRow < len(inputs); inputRow += dimIn { + setupMatrix82(matrixRows, inputRow, outputRow, &matrix82) + galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix82) + } + outputRow += dimOut82 + } + // Lastly, we may have a single output row left (for uneven parity) + if outputRow < outputCount { + for inputRow := 0; inputRow < len(inputs); inputRow += dimIn { + setupMatrix81(matrixRows, inputRow, outputRow, &matrix81) + galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix81) + } + } + start = stop + stop += r.o.perRound + if stop > grStop { + stop = grStop + } + } + wg.Done() + }(start, start+do) + start += do + } + wg.Wait() +} diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s new file mode 100644 index 0000000..97ad420 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s @@ -0,0 +1,400 @@ +//+build !noasm !appengine !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2019, Minio, Inc. + +#define LOAD(OFFSET) \ + MOVQ OFFSET(SI), BX \ + VMOVDQU64 (BX)(R11*1), Z0 \ + VPSRLQ $4, Z0, Z1 \ // high input + VPANDQ Z2, Z0, Z0 \ // low input + VPANDQ Z2, Z1, Z1 // high input + +#define GALOIS_MUL(MUL_LO, MUL_HI, LO, HI, OUT) \ + VPSHUFB Z0, MUL_LO, LO \ // mul low part + VPSHUFB Z1, MUL_HI, HI \ // mul high part + VPTERNLOGD $0x96, LO, HI, OUT + +#define GALOIS(C1, C2, IN, LO, HI, OUT) \ + VSHUFI64X2 $C1, IN, IN, LO \ + VSHUFI64X2 $C2, IN, IN, HI \ + GALOIS_MUL(LO, HI, LO, HI, OUT) + +// +// Process single output row from a total of 8 input rows +// +// func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool) +TEXT ·_galMulAVX512Parallel81(SB), 7, $0 + MOVQ in+0(FP), SI + MOVQ 8(SI), R9 // R9: len(in) + SHRQ $6, R9 // len(in) / 64 + TESTQ R9, R9 + JZ done_avx512_parallel81 + + MOVQ matrix+48(FP), SI + VMOVDQU64 0x000(SI), Z16 + VMOVDQU64 0x040(SI), Z17 + VMOVDQU64 0x080(SI), Z18 + VMOVDQU64 0x0c0(SI), Z19 + + // Initialize multiplication constants + VSHUFI64X2 $0x55, Z16, Z16, Z20 + VSHUFI64X2 $0xaa, Z16, Z16, Z24 + VSHUFI64X2 $0xff, Z16, Z16, Z28 + VSHUFI64X2 $0x00, Z16, Z16, Z16 + + VSHUFI64X2 $0x55, Z17, Z17, Z21 + VSHUFI64X2 $0xaa, Z17, Z17, Z25 + VSHUFI64X2 $0xff, Z17, Z17, Z29 + VSHUFI64X2 $0x00, Z17, Z17, Z17 + + VSHUFI64X2 $0x55, Z18, Z18, Z22 + VSHUFI64X2 $0xaa, Z18, Z18, Z26 + VSHUFI64X2 $0xff, Z18, Z18, Z30 + VSHUFI64X2 $0x00, Z18, Z18, Z18 + + VSHUFI64X2 $0x55, Z19, Z19, Z23 + VSHUFI64X2 $0xaa, Z19, Z19, Z27 + VSHUFI64X2 $0xff, Z19, Z19, Z31 + VSHUFI64X2 $0x00, Z19, Z19, Z19 + + MOVQ $15, BX + VPBROADCASTB BX, Z2 + + MOVB addTo+56(FP), AX + IMULQ $-0x1, AX + KMOVQ AX, K1 + MOVQ in+0(FP), SI // SI: &in + MOVQ in_len+8(FP), AX // number of inputs + XORQ R11, R11 + MOVQ out+24(FP), DX + MOVQ (DX), DX // DX: &out[0][0] + +loopback_avx512_parallel81: + VMOVDQU64.Z (DX), K1, Z4 + + LOAD(0x00) // &in[0][0] + GALOIS_MUL(Z16, Z20, Z14, Z15, Z4) + + CMPQ AX, $1 + JE skip_avx512_parallel81 + + LOAD(0x18) // &in[1][0] + GALOIS_MUL(Z24, Z28, Z14, Z15, Z4) + + CMPQ AX, $2 + JE skip_avx512_parallel81 + + LOAD(0x30) // &in[2][0] + GALOIS_MUL(Z17, Z21, Z14, Z15, Z4) + + CMPQ AX, $3 + JE skip_avx512_parallel81 + + LOAD(0x48) // &in[3][0] + GALOIS_MUL(Z25, Z29, Z14, Z15, Z4) + + CMPQ AX, $4 + JE skip_avx512_parallel81 + + LOAD(0x60) // &in[4][0] + GALOIS_MUL(Z18, Z22, Z14, Z15, Z4) + + CMPQ AX, $5 + JE skip_avx512_parallel81 + + LOAD(0x78) // &in[5][0] + GALOIS_MUL(Z26, Z30, Z14, Z15, Z4) + + CMPQ AX, $6 + JE skip_avx512_parallel81 + + LOAD(0x90) // &in[6][0] + GALOIS_MUL(Z19, Z23, Z14, Z15, Z4) + + CMPQ AX, $7 + JE skip_avx512_parallel81 + + LOAD(0xa8) // &in[7][0] + GALOIS_MUL(Z27, Z31, Z14, Z15, Z4) + +skip_avx512_parallel81: + VMOVDQU64 Z4, (DX) + + ADDQ $64, R11 // in4+=64 + + ADDQ $64, DX // out+=64 + + SUBQ $1, R9 + JNZ loopback_avx512_parallel81 + +done_avx512_parallel81: + VZEROUPPER + RET + +// +// Process 2 output rows in parallel from a total of 8 input rows +// +// func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool) +TEXT ·_galMulAVX512Parallel82(SB), 7, $0 + MOVQ in+0(FP), SI + MOVQ 8(SI), R9 // R9: len(in) + SHRQ $6, R9 // len(in) / 64 + TESTQ R9, R9 + JZ done_avx512_parallel82 + + MOVQ matrix+48(FP), SI + VMOVDQU64 0x000(SI), Z16 + VMOVDQU64 0x040(SI), Z17 + VMOVDQU64 0x080(SI), Z18 + VMOVDQU64 0x0c0(SI), Z19 + VMOVDQU64 0x100(SI), Z20 + VMOVDQU64 0x140(SI), Z21 + VMOVDQU64 0x180(SI), Z22 + VMOVDQU64 0x1c0(SI), Z23 + + // Initialize multiplication constants + VSHUFI64X2 $0x55, Z16, Z16, Z24 + VSHUFI64X2 $0xaa, Z16, Z16, Z25 + VSHUFI64X2 $0xff, Z16, Z16, Z26 + VSHUFI64X2 $0x00, Z16, Z16, Z16 + + VSHUFI64X2 $0x55, Z20, Z20, Z27 + VSHUFI64X2 $0xaa, Z20, Z20, Z28 + VSHUFI64X2 $0xff, Z20, Z20, Z29 + VSHUFI64X2 $0x00, Z20, Z20, Z20 + + VSHUFI64X2 $0x55, Z17, Z17, Z30 + VSHUFI64X2 $0xaa, Z17, Z17, Z31 + VSHUFI64X2 $0xff, Z17, Z17, Z11 + VSHUFI64X2 $0x00, Z17, Z17, Z17 + + VSHUFI64X2 $0x55, Z21, Z21, Z8 + VSHUFI64X2 $0xaa, Z21, Z21, Z9 + VSHUFI64X2 $0xff, Z21, Z21, Z10 + VSHUFI64X2 $0x00, Z21, Z21, Z21 + + MOVQ $15, BX + VPBROADCASTB BX, Z2 + + MOVB addTo+56(FP), AX + IMULQ $-0x1, AX + KMOVQ AX, K1 + MOVQ in+0(FP), SI // SI: &in + MOVQ in_len+8(FP), AX // number of inputs + XORQ R11, R11 + MOVQ out+24(FP), DX + MOVQ 24(DX), CX // CX: &out[1][0] + MOVQ (DX), DX // DX: &out[0][0] + +loopback_avx512_parallel82: + VMOVDQU64.Z (DX), K1, Z4 + VMOVDQU64.Z (CX), K1, Z5 + + LOAD(0x00) // &in[0][0] + GALOIS_MUL(Z16, Z24, Z14, Z15, Z4) + GALOIS_MUL(Z20, Z27, Z12, Z13, Z5) + + CMPQ AX, $1 + JE skip_avx512_parallel82 + + LOAD(0x18) // &in[1][0] + GALOIS_MUL(Z25, Z26, Z14, Z15, Z4) + GALOIS_MUL(Z28, Z29, Z12, Z13, Z5) + + CMPQ AX, $2 + JE skip_avx512_parallel82 + + LOAD(0x30) // &in[2][0] + GALOIS_MUL(Z17, Z30, Z14, Z15, Z4) + GALOIS_MUL(Z21, Z8, Z12, Z13, Z5) + + CMPQ AX, $3 + JE skip_avx512_parallel82 + + LOAD(0x48) // &in[3][0] + GALOIS_MUL(Z31, Z11, Z14, Z15, Z4) + GALOIS_MUL(Z9, Z10, Z12, Z13, Z5) + + CMPQ AX, $4 + JE skip_avx512_parallel82 + + LOAD(0x60) // &in[4][0] + GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4) + GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5) + + CMPQ AX, $5 + JE skip_avx512_parallel82 + + LOAD(0x78) // &in[5][0] + GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4) + GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5) + + CMPQ AX, $6 + JE skip_avx512_parallel82 + + LOAD(0x90) // &in[6][0] + GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4) + GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5) + + CMPQ AX, $7 + JE skip_avx512_parallel82 + + LOAD(0xa8) // &in[7][0] + GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4) + GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5) + +skip_avx512_parallel82: + VMOVDQU64 Z4, (DX) + VMOVDQU64 Z5, (CX) + + ADDQ $64, R11 // in4+=64 + + ADDQ $64, DX // out+=64 + ADDQ $64, CX // out2+=64 + + SUBQ $1, R9 + JNZ loopback_avx512_parallel82 + +done_avx512_parallel82: + VZEROUPPER + RET + +// +// Process 4 output rows in parallel from a total of 8 input rows +// +// func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool) +TEXT ·_galMulAVX512Parallel84(SB), 7, $0 + MOVQ in+0(FP), SI + MOVQ 8(SI), R9 // R9: len(in) + SHRQ $6, R9 // len(in) / 64 + TESTQ R9, R9 + JZ done_avx512_parallel84 + + MOVQ matrix+48(FP), SI + VMOVDQU64 0x000(SI), Z16 + VMOVDQU64 0x040(SI), Z17 + VMOVDQU64 0x080(SI), Z18 + VMOVDQU64 0x0c0(SI), Z19 + VMOVDQU64 0x100(SI), Z20 + VMOVDQU64 0x140(SI), Z21 + VMOVDQU64 0x180(SI), Z22 + VMOVDQU64 0x1c0(SI), Z23 + VMOVDQU64 0x200(SI), Z24 + VMOVDQU64 0x240(SI), Z25 + VMOVDQU64 0x280(SI), Z26 + VMOVDQU64 0x2c0(SI), Z27 + VMOVDQU64 0x300(SI), Z28 + VMOVDQU64 0x340(SI), Z29 + VMOVDQU64 0x380(SI), Z30 + VMOVDQU64 0x3c0(SI), Z31 + + MOVQ $15, BX + VPBROADCASTB BX, Z2 + + MOVB addTo+56(FP), AX + IMULQ $-0x1, AX + KMOVQ AX, K1 + MOVQ in+0(FP), SI // SI: &in + MOVQ in_len+8(FP), AX // number of inputs + XORQ R11, R11 + MOVQ out+24(FP), DX + MOVQ 24(DX), CX // CX: &out[1][0] + MOVQ 48(DX), R10 // R10: &out[2][0] + MOVQ 72(DX), R12 // R12: &out[3][0] + MOVQ (DX), DX // DX: &out[0][0] + +loopback_avx512_parallel84: + VMOVDQU64.Z (DX), K1, Z4 + VMOVDQU64.Z (CX), K1, Z5 + VMOVDQU64.Z (R10), K1, Z6 + VMOVDQU64.Z (R12), K1, Z7 + + LOAD(0x00) // &in[0][0] + GALOIS(0x00, 0x55, Z16, Z14, Z15, Z4) + GALOIS(0x00, 0x55, Z20, Z12, Z13, Z5) + GALOIS(0x00, 0x55, Z24, Z10, Z11, Z6) + GALOIS(0x00, 0x55, Z28, Z8, Z9, Z7) + + CMPQ AX, $1 + JE skip_avx512_parallel84 + + LOAD(0x18) // &in[1][0] + GALOIS(0xaa, 0xff, Z16, Z14, Z15, Z4) + GALOIS(0xaa, 0xff, Z20, Z12, Z13, Z5) + GALOIS(0xaa, 0xff, Z24, Z10, Z11, Z6) + GALOIS(0xaa, 0xff, Z28, Z8, Z9, Z7) + + CMPQ AX, $2 + JE skip_avx512_parallel84 + + LOAD(0x30) // &in[2][0] + GALOIS(0x00, 0x55, Z17, Z14, Z15, Z4) + GALOIS(0x00, 0x55, Z21, Z12, Z13, Z5) + GALOIS(0x00, 0x55, Z25, Z10, Z11, Z6) + GALOIS(0x00, 0x55, Z29, Z8, Z9, Z7) + + CMPQ AX, $3 + JE skip_avx512_parallel84 + + LOAD(0x48) // &in[3][0] + GALOIS(0xaa, 0xff, Z17, Z14, Z15, Z4) + GALOIS(0xaa, 0xff, Z21, Z12, Z13, Z5) + GALOIS(0xaa, 0xff, Z25, Z10, Z11, Z6) + GALOIS(0xaa, 0xff, Z29, Z8, Z9, Z7) + + CMPQ AX, $4 + JE skip_avx512_parallel84 + + LOAD(0x60) // &in[4][0] + GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4) + GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5) + GALOIS(0x00, 0x55, Z26, Z10, Z11, Z6) + GALOIS(0x00, 0x55, Z30, Z8, Z9, Z7) + + CMPQ AX, $5 + JE skip_avx512_parallel84 + + LOAD(0x78) // &in[5][0] + GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4) + GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5) + GALOIS(0xaa, 0xff, Z26, Z10, Z11, Z6) + GALOIS(0xaa, 0xff, Z30, Z8, Z9, Z7) + + CMPQ AX, $6 + JE skip_avx512_parallel84 + + LOAD(0x90) // &in[6][0] + GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4) + GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5) + GALOIS(0x00, 0x55, Z27, Z10, Z11, Z6) + GALOIS(0x00, 0x55, Z31, Z8, Z9, Z7) + + CMPQ AX, $7 + JE skip_avx512_parallel84 + + LOAD(0xa8) // &in[7][0] + GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4) + GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5) + GALOIS(0xaa, 0xff, Z27, Z10, Z11, Z6) + GALOIS(0xaa, 0xff, Z31, Z8, Z9, Z7) + +skip_avx512_parallel84: + VMOVDQU64 Z4, (DX) + VMOVDQU64 Z5, (CX) + VMOVDQU64 Z6, (R10) + VMOVDQU64 Z7, (R12) + + ADDQ $64, R11 // in4+=64 + + ADDQ $64, DX // out+=64 + ADDQ $64, CX // out2+=64 + ADDQ $64, R10 // out3+=64 + ADDQ $64, R12 // out4+=64 + + SUBQ $1, R9 + JNZ loopback_avx512_parallel84 + +done_avx512_parallel84: + VZEROUPPER + RET diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go new file mode 100644 index 0000000..f757f9d --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go @@ -0,0 +1,138 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. + +package reedsolomon + +//go:noescape +func galMulSSSE3(low, high, in, out []byte) + +//go:noescape +func galMulSSSE3Xor(low, high, in, out []byte) + +//go:noescape +func galMulAVX2Xor(low, high, in, out []byte) + +//go:noescape +func galMulAVX2(low, high, in, out []byte) + +//go:noescape +func sSE2XorSlice(in, out []byte) + +//go:noescape +func galMulAVX2Xor_64(low, high, in, out []byte) + +//go:noescape +func galMulAVX2_64(low, high, in, out []byte) + +//go:noescape +func sSE2XorSlice_64(in, out []byte) + +// This is what the assembler routines do in blocks of 16 bytes: +/* +func galMulSSSE3(low, high, in, out []byte) { + for n, input := range in { + l := input & 0xf + h := input >> 4 + out[n] = low[l] ^ high[h] + } +} + +func galMulSSSE3Xor(low, high, in, out []byte) { + for n, input := range in { + l := input & 0xf + h := input >> 4 + out[n] ^= low[l] ^ high[h] + } +} +*/ + +// bigSwitchover is the size where 64 bytes are processed per loop. +const bigSwitchover = 128 + +func galMulSlice(c byte, in, out []byte, o *options) { + if c == 1 { + copy(out, in) + return + } + if o.useAVX2 { + if len(in) >= bigSwitchover { + galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done := (len(in) >> 6) << 6 + in = in[done:] + out = out[done:] + } + if len(in) > 32 { + galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done := (len(in) >> 5) << 5 + in = in[done:] + out = out[done:] + } + } else if o.useSSSE3 { + galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done := (len(in) >> 4) << 4 + in = in[done:] + out = out[done:] + } + out = out[:len(in)] + mt := mulTable[c][:256] + for i := range in { + out[i] = mt[in[i]] + } +} + +func galMulSliceXor(c byte, in, out []byte, o *options) { + if c == 1 { + sliceXor(in, out, o) + return + } + + if o.useAVX2 { + if len(in) >= bigSwitchover { + galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done := (len(in) >> 6) << 6 + in = in[done:] + out = out[done:] + } + if len(in) >= 32 { + galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done := (len(in) >> 5) << 5 + in = in[done:] + out = out[done:] + } + } else if o.useSSSE3 { + galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done := (len(in) >> 4) << 4 + in = in[done:] + out = out[done:] + } + out = out[:len(in)] + mt := mulTable[c][:256] + for i := range in { + out[i] ^= mt[in[i]] + } +} + +// slice galois add +func sliceXor(in, out []byte, o *options) { + if o.useSSE2 { + if len(in) >= bigSwitchover { + sSE2XorSlice_64(in, out) + done := (len(in) >> 6) << 6 + in = in[done:] + out = out[done:] + } + if len(in) >= 16 { + sSE2XorSlice(in, out) + done := (len(in) >> 4) << 4 + in = in[done:] + out = out[done:] + } + } + out = out[:len(in)] + for i := range in { + out[i] ^= in[i] + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s new file mode 100644 index 0000000..3501110 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s @@ -0,0 +1,368 @@ +//+build !noasm !appengine !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. + +// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf +// and http://jerasure.org/jerasure/gf-complete/tree/master + +// func galMulSSSE3Xor(low, high, in, out []byte) +TEXT ·galMulSSSE3Xor(SB), 7, $0 + MOVQ low+0(FP), SI // SI: &low + MOVQ high+24(FP), DX // DX: &high + MOVOU (SI), X6 // X6 low + MOVOU (DX), X7 // X7: high + MOVQ $15, BX // BX: low mask + MOVQ BX, X8 + PXOR X5, X5 + MOVQ in+48(FP), SI // R11: &in + MOVQ in_len+56(FP), R9 // R9: len(in) + MOVQ out+72(FP), DX // DX: &out + PSHUFB X5, X8 // X8: lomask (unpacked) + SHRQ $4, R9 // len(in) / 16 + MOVQ SI, AX + MOVQ DX, BX + ANDQ $15, AX + ANDQ $15, BX + CMPQ R9, $0 + JEQ done_xor + ORQ AX, BX + CMPQ BX, $0 + JNZ loopback_xor + +loopback_xor_aligned: + MOVOA (SI), X0 // in[x] + MOVOA (DX), X4 // out[x] + MOVOA X0, X1 // in[x] + MOVOA X6, X2 // low copy + MOVOA X7, X3 // high copy + PSRLQ $4, X1 // X1: high input + PAND X8, X0 // X0: low input + PAND X8, X1 // X0: high input + PSHUFB X0, X2 // X2: mul low part + PSHUFB X1, X3 // X3: mul high part + PXOR X2, X3 // X3: Result + PXOR X4, X3 // X3: Result xor existing out + MOVOA X3, (DX) // Store + ADDQ $16, SI // in+=16 + ADDQ $16, DX // out+=16 + SUBQ $1, R9 + JNZ loopback_xor_aligned + JMP done_xor + +loopback_xor: + MOVOU (SI), X0 // in[x] + MOVOU (DX), X4 // out[x] + MOVOU X0, X1 // in[x] + MOVOU X6, X2 // low copy + MOVOU X7, X3 // high copy + PSRLQ $4, X1 // X1: high input + PAND X8, X0 // X0: low input + PAND X8, X1 // X0: high input + PSHUFB X0, X2 // X2: mul low part + PSHUFB X1, X3 // X3: mul high part + PXOR X2, X3 // X3: Result + PXOR X4, X3 // X3: Result xor existing out + MOVOU X3, (DX) // Store + ADDQ $16, SI // in+=16 + ADDQ $16, DX // out+=16 + SUBQ $1, R9 + JNZ loopback_xor + +done_xor: + RET + +// func galMulSSSE3(low, high, in, out []byte) +TEXT ·galMulSSSE3(SB), 7, $0 + MOVQ low+0(FP), SI // SI: &low + MOVQ high+24(FP), DX // DX: &high + MOVOU (SI), X6 // X6 low + MOVOU (DX), X7 // X7: high + MOVQ $15, BX // BX: low mask + MOVQ BX, X8 + PXOR X5, X5 + MOVQ in+48(FP), SI // R11: &in + MOVQ in_len+56(FP), R9 // R9: len(in) + MOVQ out+72(FP), DX // DX: &out + PSHUFB X5, X8 // X8: lomask (unpacked) + MOVQ SI, AX + MOVQ DX, BX + SHRQ $4, R9 // len(in) / 16 + ANDQ $15, AX + ANDQ $15, BX + CMPQ R9, $0 + JEQ done + ORQ AX, BX + CMPQ BX, $0 + JNZ loopback + +loopback_aligned: + MOVOA (SI), X0 // in[x] + MOVOA X0, X1 // in[x] + MOVOA X6, X2 // low copy + MOVOA X7, X3 // high copy + PSRLQ $4, X1 // X1: high input + PAND X8, X0 // X0: low input + PAND X8, X1 // X0: high input + PSHUFB X0, X2 // X2: mul low part + PSHUFB X1, X3 // X3: mul high part + PXOR X2, X3 // X3: Result + MOVOA X3, (DX) // Store + ADDQ $16, SI // in+=16 + ADDQ $16, DX // out+=16 + SUBQ $1, R9 + JNZ loopback_aligned + JMP done + +loopback: + MOVOU (SI), X0 // in[x] + MOVOU X0, X1 // in[x] + MOVOA X6, X2 // low copy + MOVOA X7, X3 // high copy + PSRLQ $4, X1 // X1: high input + PAND X8, X0 // X0: low input + PAND X8, X1 // X0: high input + PSHUFB X0, X2 // X2: mul low part + PSHUFB X1, X3 // X3: mul high part + PXOR X2, X3 // X3: Result + MOVOU X3, (DX) // Store + ADDQ $16, SI // in+=16 + ADDQ $16, DX // out+=16 + SUBQ $1, R9 + JNZ loopback + +done: + RET + +// func galMulAVX2Xor(low, high, in, out []byte) +TEXT ·galMulAVX2Xor(SB), 7, $0 + MOVQ low+0(FP), SI // SI: &low + MOVQ high+24(FP), DX // DX: &high + MOVQ $15, BX // BX: low mask + MOVQ BX, X5 + MOVOU (SI), X6 // X6: low + MOVOU (DX), X7 // X7: high + MOVQ in_len+56(FP), R9 // R9: len(in) + + VINSERTI128 $1, X6, Y6, Y6 // low + VINSERTI128 $1, X7, Y7, Y7 // high + VPBROADCASTB X5, Y8 // Y8: lomask (unpacked) + + SHRQ $5, R9 // len(in) / 32 + MOVQ out+72(FP), DX // DX: &out + MOVQ in+48(FP), SI // SI: &in + TESTQ R9, R9 + JZ done_xor_avx2 + +loopback_xor_avx2: + VMOVDQU (SI), Y0 + VMOVDQU (DX), Y4 + VPSRLQ $4, Y0, Y1 // Y1: high input + VPAND Y8, Y0, Y0 // Y0: low input + VPAND Y8, Y1, Y1 // Y1: high input + VPSHUFB Y0, Y6, Y2 // Y2: mul low part + VPSHUFB Y1, Y7, Y3 // Y3: mul high part + VPXOR Y3, Y2, Y3 // Y3: Result + VPXOR Y4, Y3, Y4 // Y4: Result + VMOVDQU Y4, (DX) + + ADDQ $32, SI // in+=32 + ADDQ $32, DX // out+=32 + SUBQ $1, R9 + JNZ loopback_xor_avx2 + +done_xor_avx2: + VZEROUPPER + RET + +// func galMulAVX2(low, high, in, out []byte) +TEXT ·galMulAVX2(SB), 7, $0 + MOVQ low+0(FP), SI // SI: &low + MOVQ high+24(FP), DX // DX: &high + MOVQ $15, BX // BX: low mask + MOVQ BX, X5 + MOVOU (SI), X6 // X6: low + MOVOU (DX), X7 // X7: high + MOVQ in_len+56(FP), R9 // R9: len(in) + + VINSERTI128 $1, X6, Y6, Y6 // low + VINSERTI128 $1, X7, Y7, Y7 // high + VPBROADCASTB X5, Y8 // Y8: lomask (unpacked) + + SHRQ $5, R9 // len(in) / 32 + MOVQ out+72(FP), DX // DX: &out + MOVQ in+48(FP), SI // SI: &in + TESTQ R9, R9 + JZ done_avx2 + +loopback_avx2: + VMOVDQU (SI), Y0 + VPSRLQ $4, Y0, Y1 // Y1: high input + VPAND Y8, Y0, Y0 // Y0: low input + VPAND Y8, Y1, Y1 // Y1: high input + VPSHUFB Y0, Y6, Y2 // Y2: mul low part + VPSHUFB Y1, Y7, Y3 // Y3: mul high part + VPXOR Y3, Y2, Y4 // Y4: Result + VMOVDQU Y4, (DX) + + ADDQ $32, SI // in+=32 + ADDQ $32, DX // out+=32 + SUBQ $1, R9 + JNZ loopback_avx2 + +done_avx2: + VZEROUPPER + RET + +// func sSE2XorSlice(in, out []byte) +TEXT ·sSE2XorSlice(SB), 7, $0 + MOVQ in+0(FP), SI // SI: &in + MOVQ in_len+8(FP), R9 // R9: len(in) + MOVQ out+24(FP), DX // DX: &out + SHRQ $4, R9 // len(in) / 16 + CMPQ R9, $0 + JEQ done_xor_sse2 + +loopback_xor_sse2: + MOVOU (SI), X0 // in[x] + MOVOU (DX), X1 // out[x] + PXOR X0, X1 + MOVOU X1, (DX) + ADDQ $16, SI // in+=16 + ADDQ $16, DX // out+=16 + SUBQ $1, R9 + JNZ loopback_xor_sse2 + +done_xor_sse2: + RET + +// func galMulAVX2Xor_64(low, high, in, out []byte) +TEXT ·galMulAVX2Xor_64(SB), 7, $0 + MOVQ low+0(FP), SI // SI: &low + MOVQ high+24(FP), DX // DX: &high + MOVQ $15, BX // BX: low mask + MOVQ BX, X5 + MOVOU (SI), X6 // X6: low + MOVOU (DX), X7 // X7: high + MOVQ in_len+56(FP), R9 // R9: len(in) + + VINSERTI128 $1, X6, Y6, Y6 // low + VINSERTI128 $1, X7, Y7, Y7 // high + VPBROADCASTB X5, Y8 // Y8: lomask (unpacked) + + SHRQ $6, R9 // len(in) / 64 + MOVQ out+72(FP), DX // DX: &out + MOVQ in+48(FP), SI // SI: &in + TESTQ R9, R9 + JZ done_xor_avx2_64 + +loopback_xor_avx2_64: + VMOVDQU (SI), Y0 + VMOVDQU 32(SI), Y10 + VMOVDQU (DX), Y4 + VMOVDQU 32(DX), Y14 + VPSRLQ $4, Y0, Y1 // Y1: high input + VPSRLQ $4, Y10, Y11 // Y11: high input 2 + VPAND Y8, Y0, Y0 // Y0: low input + VPAND Y8, Y10, Y10 // Y10: low input 2 + VPAND Y8, Y1, Y1 // Y11: high input + VPAND Y8, Y11, Y11 // Y11: high input 2 + VPSHUFB Y0, Y6, Y2 // Y2: mul low part + VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2 + VPSHUFB Y1, Y7, Y3 // Y3: mul high part + VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2 + VPXOR Y3, Y2, Y3 // Y3: Result + VPXOR Y13, Y12, Y13 // Y13: Result 2 + VPXOR Y4, Y3, Y4 // Y4: Result + VPXOR Y14, Y13, Y14 // Y4: Result 2 + VMOVDQU Y4, (DX) + VMOVDQU Y14, 32(DX) + + ADDQ $64, SI // in+=64 + ADDQ $64, DX // out+=64 + SUBQ $1, R9 + JNZ loopback_xor_avx2_64 + +done_xor_avx2_64: + VZEROUPPER + RET + +// func galMulAVX2_64(low, high, in, out []byte) +TEXT ·galMulAVX2_64(SB), 7, $0 + MOVQ low+0(FP), SI // SI: &low + MOVQ high+24(FP), DX // DX: &high + MOVQ $15, BX // BX: low mask + MOVQ BX, X5 + MOVOU (SI), X6 // X6: low + MOVOU (DX), X7 // X7: high + MOVQ in_len+56(FP), R9 // R9: len(in) + + VINSERTI128 $1, X6, Y6, Y6 // low + VINSERTI128 $1, X7, Y7, Y7 // high + VPBROADCASTB X5, Y8 // Y8: lomask (unpacked) + + SHRQ $6, R9 // len(in) / 64 + MOVQ out+72(FP), DX // DX: &out + MOVQ in+48(FP), SI // SI: &in + TESTQ R9, R9 + JZ done_avx2_64 + +loopback_avx2_64: + VMOVDQU (SI), Y0 + VMOVDQU 32(SI), Y10 + VPSRLQ $4, Y0, Y1 // Y1: high input + VPSRLQ $4, Y10, Y11 // Y11: high input 2 + VPAND Y8, Y0, Y0 // Y0: low input + VPAND Y8, Y10, Y10 // Y10: low input + VPAND Y8, Y1, Y1 // Y1: high input + VPAND Y8, Y11, Y11 // Y11: high input 2 + VPSHUFB Y0, Y6, Y2 // Y2: mul low part + VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2 + VPSHUFB Y1, Y7, Y3 // Y3: mul high part + VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2 + VPXOR Y3, Y2, Y4 // Y4: Result + VPXOR Y13, Y12, Y14 // Y14: Result 2 + VMOVDQU Y4, (DX) + VMOVDQU Y14, 32(DX) + + ADDQ $64, SI // in+=64 + ADDQ $64, DX // out+=64 + SUBQ $1, R9 + JNZ loopback_avx2_64 + +done_avx2_64: + VZEROUPPER + RET + +// func sSE2XorSlice_64(in, out []byte) +TEXT ·sSE2XorSlice_64(SB), 7, $0 + MOVQ in+0(FP), SI // SI: &in + MOVQ in_len+8(FP), R9 // R9: len(in) + MOVQ out+24(FP), DX // DX: &out + SHRQ $6, R9 // len(in) / 64 + CMPQ R9, $0 + JEQ done_xor_sse2_64 + +loopback_xor_sse2_64: + MOVOU (SI), X0 // in[x] + MOVOU 16(SI), X2 // in[x] + MOVOU 32(SI), X4 // in[x] + MOVOU 48(SI), X6 // in[x] + MOVOU (DX), X1 // out[x] + MOVOU 16(DX), X3 // out[x] + MOVOU 32(DX), X5 // out[x] + MOVOU 48(DX), X7 // out[x] + PXOR X0, X1 + PXOR X2, X3 + PXOR X4, X5 + PXOR X6, X7 + MOVOU X1, (DX) + MOVOU X3, 16(DX) + MOVOU X5, 32(DX) + MOVOU X7, 48(DX) + ADDQ $64, SI // in+=64 + ADDQ $64, DX // out+=64 + SUBQ $1, R9 + JNZ loopback_xor_sse2_64 + +done_xor_sse2_64: + RET diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go new file mode 100644 index 0000000..23a1dd2 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go @@ -0,0 +1,67 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2017, Minio, Inc. + +package reedsolomon + +//go:noescape +func galMulNEON(low, high, in, out []byte) + +//go:noescape +func galMulXorNEON(low, high, in, out []byte) + +//go:noescape +func galXorNEON(in, out []byte) + +func galMulSlice(c byte, in, out []byte, o *options) { + if c == 1 { + copy(out, in) + return + } + var done int + galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done = (len(in) >> 5) << 5 + + remain := len(in) - done + if remain > 0 { + mt := mulTable[c][:256] + for i := done; i < len(in); i++ { + out[i] = mt[in[i]] + } + } +} + +func galMulSliceXor(c byte, in, out []byte, o *options) { + if c == 1 { + sliceXor(in, out, o) + return + } + var done int + galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out) + done = (len(in) >> 5) << 5 + + remain := len(in) - done + if remain > 0 { + mt := mulTable[c][:256] + for i := done; i < len(in); i++ { + out[i] ^= mt[in[i]] + } + } +} + +// slice galois add +func sliceXor(in, out []byte, o *options) { + + galXorNEON(in, out) + done := (len(in) >> 5) << 5 + + remain := len(in) - done + if remain > 0 { + for i := done; i < len(in); i++ { + out[i] ^= in[i] + } + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s new file mode 100644 index 0000000..d2cac2c --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s @@ -0,0 +1,125 @@ +//+build !noasm !appengine !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2017, Minio, Inc. + +#define LOAD(LO1, LO2, HI1, HI2) \ + VLD1.P 32(R1), [LO1.B16, LO2.B16] \ + \ + \ // Get low input and high input + VUSHR $4, LO1.B16, HI1.B16 \ + VUSHR $4, LO2.B16, HI2.B16 \ + VAND V8.B16, LO1.B16, LO1.B16 \ + VAND V8.B16, LO2.B16, LO2.B16 + +#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \ + \ // Mul low part and mul high part + VTBL V0.B16, [MUL_LO.B16], OUT1.B16 \ + VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \ + VTBL V1.B16, [MUL_LO.B16], TMP1.B16 \ + VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \ + \ + \ // Combine results + VEOR OUT2.B16, OUT1.B16, OUT1.B16 \ + VEOR TMP2.B16, TMP1.B16, OUT2.B16 + +// func galMulNEON(low, high, in, out []byte) +TEXT ·galMulNEON(SB), 7, $0 + MOVD in_base+48(FP), R1 + MOVD in_len+56(FP), R2 // length of message + MOVD out_base+72(FP), R5 + SUBS $32, R2 + BMI complete + + MOVD low+0(FP), R10 // R10: &low + MOVD high+24(FP), R11 // R11: &high + VLD1 (R10), [V6.B16] + VLD1 (R11), [V7.B16] + + // + // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error + // WORD $0x4e010c68 // dup v8.16b, w3 + // + MOVD $0x0f, R3 + VMOV R3, V8.B[0] + VDUP V8.B[0], V8.B16 + +loop: + // Main loop + LOAD(V0, V1, V10, V11) + GALOIS_MUL(V6, V7, V4, V5, V14, V15) + + // Store result + VST1.P [V4.D2, V5.D2], 32(R5) + + SUBS $32, R2 + BPL loop + +complete: + RET + +// func galMulXorNEON(low, high, in, out []byte) +TEXT ·galMulXorNEON(SB), 7, $0 + MOVD in_base+48(FP), R1 + MOVD in_len+56(FP), R2 // length of message + MOVD out_base+72(FP), R5 + SUBS $32, R2 + BMI completeXor + + MOVD low+0(FP), R10 // R10: &low + MOVD high+24(FP), R11 // R11: &high + VLD1 (R10), [V6.B16] + VLD1 (R11), [V7.B16] + + // + // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error + // WORD $0x4e010c68 // dup v8.16b, w3 + // + MOVD $0x0f, R3 + VMOV R3, V8.B[0] + VDUP V8.B[0], V8.B16 + +loopXor: + // Main loop + VLD1 (R5), [V20.B16, V21.B16] + + LOAD(V0, V1, V10, V11) + GALOIS_MUL(V6, V7, V4, V5, V14, V15) + + VEOR V20.B16, V4.B16, V4.B16 + VEOR V21.B16, V5.B16, V5.B16 + + // Store result + VST1.P [V4.D2, V5.D2], 32(R5) + + SUBS $32, R2 + BPL loopXor + +completeXor: + RET + +// func galXorNEON(in, out []byte) +TEXT ·galXorNEON(SB), 7, $0 + MOVD in_base+0(FP), R1 + MOVD in_len+8(FP), R2 // length of message + MOVD out_base+24(FP), R5 + SUBS $32, R2 + BMI completeXor + +loopXor: + // Main loop + VLD1.P 32(R1), [V0.B16, V1.B16] + VLD1 (R5), [V20.B16, V21.B16] + + VEOR V20.B16, V0.B16, V4.B16 + VEOR V21.B16, V1.B16, V5.B16 + + // Store result + VST1.P [V4.D2, V5.D2], 32(R5) + + SUBS $32, R2 + BPL loopXor + +completeXor: + RET + diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go new file mode 100644 index 0000000..edd6376 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go @@ -0,0 +1,408 @@ +// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT. + +// +build !appengine +// +build !noasm +// +build !nogen +// +build gc + +package reedsolomon + +// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) + +// mulAvxTwo_10x8 takes 10 inputs and produces 8 outputs. +// The output is initialized to 0. +//go:noescape +func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s new file mode 100644 index 0000000..c76db3c --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s @@ -0,0 +1,18526 @@ +// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT. + +// +build !appengine +// +build !noasm +// +build !nogen +// +build gc + +// func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x1(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 6 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y2 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ $0x0000000f, BX + MOVQ BX, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), BX + +mulAvxTwo_1x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (CX)(BX*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y3, Y4, Y4 + VPAND Y3, Y5, Y5 + VPSHUFB Y4, Y1, Y4 + VPSHUFB Y5, Y2, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(BX*1) + + // Prepare for next loop + ADDQ $0x20, BX + DECQ AX + JNZ mulAvxTwo_1x1_loop + VZEROUPPER + +mulAvxTwo_1x1_end: + RET + +// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x2(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 11 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + VMOVDQU (CX), Y2 + VMOVDQU 32(CX), Y3 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ $0x0000000f, BP + MOVQ BP, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), BP + +mulAvxTwo_1x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (CX)(BP*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VPSHUFB Y9, Y2, Y7 + VPSHUFB Y10, Y3, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VPSHUFB Y9, Y4, Y7 + VPSHUFB Y10, Y5, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(BP*1) + VMOVDQU Y1, (DX)(BP*1) + + // Prepare for next loop + ADDQ $0x20, BP + DECQ AX + JNZ mulAvxTwo_1x2_loop + VZEROUPPER + +mulAvxTwo_1x2_end: + RET + +// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x3(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), CX + MOVQ $0x0000000f, SI + MOVQ SI, X9 + VPBROADCASTB X9, Y9 + MOVQ start+72(FP), SI + +mulAvxTwo_1x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (CX)(SI*1), Y12 + VPSRLQ $0x04, Y12, Y13 + VPAND Y9, Y12, Y12 + VPAND Y9, Y13, Y13 + VPSHUFB Y12, Y3, Y10 + VPSHUFB Y13, Y4, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y0, Y0 + VPSHUFB Y12, Y5, Y10 + VPSHUFB Y13, Y6, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y1, Y1 + VPSHUFB Y12, Y7, Y10 + VPSHUFB Y13, Y8, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(SI*1) + VMOVDQU Y1, (BP)(SI*1) + VMOVDQU Y2, (DX)(SI*1) + + // Prepare for next loop + ADDQ $0x20, SI + DECQ AX + JNZ mulAvxTwo_1x3_loop + VZEROUPPER + +mulAvxTwo_1x3_end: + RET + +// func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 17 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), DI + MOVQ $0x0000000f, R8 + MOVQ R8, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R8 + +mulAvxTwo_1x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (DI)(R8*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R8*1) + VMOVDQU Y1, (BP)(R8*1) + VMOVDQU Y2, (SI)(R8*1) + VMOVDQU Y3, (DX)(R8*1) + + // Prepare for next loop + ADDQ $0x20, R8 + DECQ AX + JNZ mulAvxTwo_1x4_loop + VZEROUPPER + +mulAvxTwo_1x4_end: + RET + +// func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R8 + MOVQ $0x0000000f, R9 + MOVQ R9, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R9 + +mulAvxTwo_1x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R8)(R9*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R9*1) + VMOVDQU Y1, (BP)(R9*1) + VMOVDQU Y2, (SI)(R9*1) + VMOVDQU Y3, (DI)(R9*1) + VMOVDQU Y4, (DX)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxTwo_1x5_loop + VZEROUPPER + +mulAvxTwo_1x5_end: + RET + +// func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x6_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), DX + MOVQ in_base+24(FP), R9 + MOVQ (R9), R9 + MOVQ $0x0000000f, R10 + MOVQ R10, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R10 + +mulAvxTwo_1x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (R9)(R10*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + VMOVDQU Y0, (BX)(R10*1) + VMOVDQU Y1, (BP)(R10*1) + VMOVDQU Y2, (SI)(R10*1) + VMOVDQU Y3, (DI)(R10*1) + VMOVDQU Y4, (R8)(R10*1) + VMOVDQU Y5, (DX)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_1x6_loop + VZEROUPPER + +mulAvxTwo_1x6_end: + RET + +// func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x7_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), DX + MOVQ in_base+24(FP), R10 + MOVQ (R10), R10 + MOVQ $0x0000000f, R11 + MOVQ R11, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R11 + +mulAvxTwo_1x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (R10)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + VMOVDQU Y0, (BX)(R11*1) + VMOVDQU Y1, (BP)(R11*1) + VMOVDQU Y2, (SI)(R11*1) + VMOVDQU Y3, (DI)(R11*1) + VMOVDQU Y4, (R8)(R11*1) + VMOVDQU Y5, (R9)(R11*1) + VMOVDQU Y6, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_1x7_loop + VZEROUPPER + +mulAvxTwo_1x7_end: + RET + +// func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_1x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 29 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_1x8_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), R10 + MOVQ 168(DX), DX + MOVQ in_base+24(FP), R11 + MOVQ (R11), R11 + MOVQ $0x0000000f, R12 + MOVQ R12, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R12 + +mulAvxTwo_1x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (R11)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (BP)(R12*1) + VMOVDQU Y2, (SI)(R12*1) + VMOVDQU Y3, (DI)(R12*1) + VMOVDQU Y4, (R8)(R12*1) + VMOVDQU Y5, (R9)(R12*1) + VMOVDQU Y6, (R10)(R12*1) + VMOVDQU Y7, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_1x8_loop + VZEROUPPER + +mulAvxTwo_1x8_end: + RET + +// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x1(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 8 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + MOVQ in_base+24(FP), CX + MOVQ (CX), BX + MOVQ 24(CX), CX + MOVQ $0x0000000f, BP + MOVQ BP, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), BP + +mulAvxTwo_2x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BX)(BP*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y5, Y6, Y6 + VPAND Y5, Y7, Y7 + VPSHUFB Y6, Y1, Y6 + VPSHUFB Y7, Y2, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (CX)(BP*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y5, Y6, Y6 + VPAND Y5, Y7, Y7 + VPSHUFB Y6, Y3, Y6 + VPSHUFB Y7, Y4, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(BP*1) + + // Prepare for next loop + ADDQ $0x20, BP + DECQ AX + JNZ mulAvxTwo_2x1_loop + VZEROUPPER + +mulAvxTwo_2x1_end: + RET + +// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x2(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 15 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + VMOVDQU (CX), Y2 + VMOVDQU 32(CX), Y3 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + MOVQ in_base+24(FP), CX + MOVQ (CX), BP + MOVQ 24(CX), CX + MOVQ $0x0000000f, SI + MOVQ SI, X10 + VPBROADCASTB X10, Y10 + MOVQ start+72(FP), SI + +mulAvxTwo_2x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (BP)(SI*1), Y13 + VPSRLQ $0x04, Y13, Y14 + VPAND Y10, Y13, Y13 + VPAND Y10, Y14, Y14 + VPSHUFB Y13, Y2, Y11 + VPSHUFB Y14, Y3, Y12 + VPXOR Y11, Y12, Y11 + VPXOR Y11, Y0, Y0 + VPSHUFB Y13, Y4, Y11 + VPSHUFB Y14, Y5, Y12 + VPXOR Y11, Y12, Y11 + VPXOR Y11, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (CX)(SI*1), Y13 + VPSRLQ $0x04, Y13, Y14 + VPAND Y10, Y13, Y13 + VPAND Y10, Y14, Y14 + VPSHUFB Y13, Y6, Y11 + VPSHUFB Y14, Y7, Y12 + VPXOR Y11, Y12, Y11 + VPXOR Y11, Y0, Y0 + VPSHUFB Y13, Y8, Y11 + VPSHUFB Y14, Y9, Y12 + VPXOR Y11, Y12, Y11 + VPXOR Y11, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(SI*1) + VMOVDQU Y1, (DX)(SI*1) + + // Prepare for next loop + ADDQ $0x20, SI + DECQ AX + JNZ mulAvxTwo_2x2_loop + VZEROUPPER + +mulAvxTwo_2x2_end: + RET + +// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), SI + MOVQ $0x0000000f, R8 + MOVQ R8, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R8 + +mulAvxTwo_2x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R8*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI)(R8*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R8*1) + VMOVDQU Y1, (BP)(R8*1) + VMOVDQU Y2, (DX)(R8*1) + + // Prepare for next loop + ADDQ $0x20, R8 + DECQ AX + JNZ mulAvxTwo_2x3_loop + VZEROUPPER + +mulAvxTwo_2x3_end: + RET + +// func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 25 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), DI + MOVQ $0x0000000f, R9 + MOVQ R9, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R9 + +mulAvxTwo_2x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R9*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (DI)(R9*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R9*1) + VMOVDQU Y1, (BP)(R9*1) + VMOVDQU Y2, (SI)(R9*1) + VMOVDQU Y3, (DX)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxTwo_2x4_loop + VZEROUPPER + +mulAvxTwo_2x4_end: + RET + +// func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 30 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R8 + MOVQ $0x0000000f, R10 + MOVQ R10, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R10 + +mulAvxTwo_2x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R9)(R10*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (R8)(R10*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R10*1) + VMOVDQU Y1, (BP)(R10*1) + VMOVDQU Y2, (SI)(R10*1) + VMOVDQU Y3, (DI)(R10*1) + VMOVDQU Y4, (DX)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_2x5_loop + VZEROUPPER + +mulAvxTwo_2x5_end: + RET + +// func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x6_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), DX + MOVQ in_base+24(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R9 + MOVQ $0x0000000f, R11 + MOVQ R11, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R11 + +mulAvxTwo_2x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (R10)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (R9)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + VMOVDQU Y0, (BX)(R11*1) + VMOVDQU Y1, (BP)(R11*1) + VMOVDQU Y2, (SI)(R11*1) + VMOVDQU Y3, (DI)(R11*1) + VMOVDQU Y4, (R8)(R11*1) + VMOVDQU Y5, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_2x6_loop + VZEROUPPER + +mulAvxTwo_2x6_end: + RET + +// func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 40 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x7_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), DX + MOVQ in_base+24(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R10 + MOVQ $0x0000000f, R12 + MOVQ R12, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R12 + +mulAvxTwo_2x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (R11)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (R10)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (BP)(R12*1) + VMOVDQU Y2, (SI)(R12*1) + VMOVDQU Y3, (DI)(R12*1) + VMOVDQU Y4, (R8)(R12*1) + VMOVDQU Y5, (R9)(R12*1) + VMOVDQU Y6, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_2x7_loop + VZEROUPPER + +mulAvxTwo_2x7_end: + RET + +// func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_2x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 45 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_2x8_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), R10 + MOVQ 168(DX), DX + MOVQ in_base+24(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R11 + MOVQ $0x0000000f, R13 + MOVQ R13, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R13 + +mulAvxTwo_2x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (R12)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (R11)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (BP)(R13*1) + VMOVDQU Y2, (SI)(R13*1) + VMOVDQU Y3, (DI)(R13*1) + VMOVDQU Y4, (R8)(R13*1) + VMOVDQU Y5, (R9)(R13*1) + VMOVDQU Y6, (R10)(R13*1) + VMOVDQU Y7, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_2x8_loop + VZEROUPPER + +mulAvxTwo_2x8_end: + RET + +// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x1(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 10 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + MOVQ in_base+24(FP), CX + MOVQ (CX), BX + MOVQ 24(CX), BP + MOVQ 48(CX), CX + MOVQ $0x0000000f, SI + MOVQ SI, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), SI + +mulAvxTwo_3x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BX)(SI*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y7, Y8, Y8 + VPAND Y7, Y9, Y9 + VPSHUFB Y8, Y1, Y8 + VPSHUFB Y9, Y2, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BP)(SI*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y7, Y8, Y8 + VPAND Y7, Y9, Y9 + VPSHUFB Y8, Y3, Y8 + VPSHUFB Y9, Y4, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (CX)(SI*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y7, Y8, Y8 + VPAND Y7, Y9, Y9 + VPSHUFB Y8, Y5, Y8 + VPSHUFB Y9, Y6, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(SI*1) + + // Prepare for next loop + ADDQ $0x20, SI + DECQ AX + JNZ mulAvxTwo_3x1_loop + VZEROUPPER + +mulAvxTwo_3x1_end: + RET + +// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 19 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), BP + MOVQ $0x0000000f, R8 + MOVQ R8, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R8 + +mulAvxTwo_3x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R8*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R8*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (BP)(R8*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R8*1) + VMOVDQU Y1, (DX)(R8*1) + + // Prepare for next loop + ADDQ $0x20, R8 + DECQ AX + JNZ mulAvxTwo_3x2_loop + VZEROUPPER + +mulAvxTwo_3x2_end: + RET + +// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 26 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), SI + MOVQ $0x0000000f, R9 + MOVQ R9, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R9 + +mulAvxTwo_3x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R9*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R9*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (SI)(R9*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R9*1) + VMOVDQU Y1, (BP)(R9*1) + VMOVDQU Y2, (DX)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxTwo_3x3_loop + VZEROUPPER + +mulAvxTwo_3x3_end: + RET + +// func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 33 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), DI + MOVQ $0x0000000f, R10 + MOVQ R10, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R10 + +mulAvxTwo_3x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R10*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (R9)(R10*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI)(R10*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R10*1) + VMOVDQU Y1, (BP)(R10*1) + VMOVDQU Y2, (SI)(R10*1) + VMOVDQU Y3, (DX)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_3x4_loop + VZEROUPPER + +mulAvxTwo_3x4_end: + RET + +// func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 40 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R8 + MOVQ $0x0000000f, R11 + MOVQ R11, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R11 + +mulAvxTwo_3x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R9)(R11*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (R10)(R11*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (R8)(R11*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R11*1) + VMOVDQU Y1, (BP)(R11*1) + VMOVDQU Y2, (SI)(R11*1) + VMOVDQU Y3, (DI)(R11*1) + VMOVDQU Y4, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_3x5_loop + VZEROUPPER + +mulAvxTwo_3x5_end: + RET + +// func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x6_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), DX + MOVQ in_base+24(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R9 + MOVQ $0x0000000f, R12 + MOVQ R12, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R12 + +mulAvxTwo_3x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (R10)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (R11)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (R9)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (BP)(R12*1) + VMOVDQU Y2, (SI)(R12*1) + VMOVDQU Y3, (DI)(R12*1) + VMOVDQU Y4, (R8)(R12*1) + VMOVDQU Y5, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_3x6_loop + VZEROUPPER + +mulAvxTwo_3x6_end: + RET + +// func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 54 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x7_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), DX + MOVQ in_base+24(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R10 + MOVQ $0x0000000f, R13 + MOVQ R13, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R13 + +mulAvxTwo_3x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (R11)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (R12)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (R10)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (BP)(R13*1) + VMOVDQU Y2, (SI)(R13*1) + VMOVDQU Y3, (DI)(R13*1) + VMOVDQU Y4, (R8)(R13*1) + VMOVDQU Y5, (R9)(R13*1) + VMOVDQU Y6, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_3x7_loop + VZEROUPPER + +mulAvxTwo_3x7_end: + RET + +// func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_3x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 61 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_3x8_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), R10 + MOVQ 168(DX), DX + MOVQ in_base+24(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R11 + MOVQ $0x0000000f, R14 + MOVQ R14, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R14 + +mulAvxTwo_3x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (R12)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (R13)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (R11)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (BP)(R14*1) + VMOVDQU Y2, (SI)(R14*1) + VMOVDQU Y3, (DI)(R14*1) + VMOVDQU Y4, (R8)(R14*1) + VMOVDQU Y5, (R9)(R14*1) + VMOVDQU Y6, (R10)(R14*1) + VMOVDQU Y7, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_3x8_loop + VZEROUPPER + +mulAvxTwo_3x8_end: + RET + +// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x1(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 12 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + MOVQ in_base+24(FP), CX + MOVQ (CX), BX + MOVQ 24(CX), BP + MOVQ 48(CX), SI + MOVQ 72(CX), CX + MOVQ $0x0000000f, DI + MOVQ DI, X9 + VPBROADCASTB X9, Y9 + MOVQ start+72(FP), DI + +mulAvxTwo_4x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BX)(DI*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y9, Y10, Y10 + VPAND Y9, Y11, Y11 + VPSHUFB Y10, Y1, Y10 + VPSHUFB Y11, Y2, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BP)(DI*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y9, Y10, Y10 + VPAND Y9, Y11, Y11 + VPSHUFB Y10, Y3, Y10 + VPSHUFB Y11, Y4, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI)(DI*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y9, Y10, Y10 + VPAND Y9, Y11, Y11 + VPSHUFB Y10, Y5, Y10 + VPSHUFB Y11, Y6, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (CX)(DI*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y9, Y10, Y10 + VPAND Y9, Y11, Y11 + VPSHUFB Y10, Y7, Y10 + VPSHUFB Y11, Y8, Y11 + VPXOR Y10, Y11, Y10 + VPXOR Y10, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(DI*1) + + // Prepare for next loop + ADDQ $0x20, DI + DECQ AX + JNZ mulAvxTwo_4x1_loop + VZEROUPPER + +mulAvxTwo_4x1_end: + RET + +// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 23 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), BP + MOVQ $0x0000000f, R9 + MOVQ R9, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R9 + +mulAvxTwo_4x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R9*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R9*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R9*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (BP)(R9*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R9*1) + VMOVDQU Y1, (DX)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxTwo_4x2_loop + VZEROUPPER + +mulAvxTwo_4x2_end: + RET + +// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 32 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), SI + MOVQ $0x0000000f, R10 + MOVQ R10, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R10 + +mulAvxTwo_4x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R10*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R10*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (R9)(R10*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (SI)(R10*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R10*1) + VMOVDQU Y1, (BP)(R10*1) + VMOVDQU Y2, (DX)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_4x3_loop + VZEROUPPER + +mulAvxTwo_4x3_end: + RET + +// func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 41 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), DI + MOVQ $0x0000000f, R11 + MOVQ R11, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R11 + +mulAvxTwo_4x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R11*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (R9)(R11*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (R10)(R11*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (DI)(R11*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R11*1) + VMOVDQU Y1, (BP)(R11*1) + VMOVDQU Y2, (SI)(R11*1) + VMOVDQU Y3, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_4x4_loop + VZEROUPPER + +mulAvxTwo_4x4_end: + RET + +// func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R8 + MOVQ $0x0000000f, R12 + MOVQ R12, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R12 + +mulAvxTwo_4x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R9)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (R10)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (R11)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (BP)(R12*1) + VMOVDQU Y2, (SI)(R12*1) + VMOVDQU Y3, (DI)(R12*1) + VMOVDQU Y4, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_4x5_loop + VZEROUPPER + +mulAvxTwo_4x5_end: + RET + +// func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 59 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x6_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), DX + MOVQ in_base+24(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R9 + MOVQ $0x0000000f, R13 + MOVQ R13, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R13 + +mulAvxTwo_4x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (R10)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (R11)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (R12)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R9)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (BP)(R13*1) + VMOVDQU Y2, (SI)(R13*1) + VMOVDQU Y3, (DI)(R13*1) + VMOVDQU Y4, (R8)(R13*1) + VMOVDQU Y5, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_4x6_loop + VZEROUPPER + +mulAvxTwo_4x6_end: + RET + +// func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x7_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), DX + MOVQ in_base+24(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R10 + MOVQ $0x0000000f, R14 + MOVQ R14, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R14 + +mulAvxTwo_4x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (R11)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (R12)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (R13)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R10)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (BP)(R14*1) + VMOVDQU Y2, (SI)(R14*1) + VMOVDQU Y3, (DI)(R14*1) + VMOVDQU Y4, (R8)(R14*1) + VMOVDQU Y5, (R9)(R14*1) + VMOVDQU Y6, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_4x7_loop + VZEROUPPER + +mulAvxTwo_4x7_end: + RET + +// func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_4x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 77 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_4x8_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), R10 + MOVQ 168(DX), DX + MOVQ in_base+24(FP), R11 + MOVQ (R11), R12 + MOVQ 24(R11), R13 + MOVQ 48(R11), R14 + MOVQ 72(R11), R11 + MOVQ $0x0000000f, R15 + MOVQ R15, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R15 + +mulAvxTwo_4x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (R12)(R15*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (R13)(R15*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (R14)(R15*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R11)(R15*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (BP)(R15*1) + VMOVDQU Y2, (SI)(R15*1) + VMOVDQU Y3, (DI)(R15*1) + VMOVDQU Y4, (R8)(R15*1) + VMOVDQU Y5, (R9)(R15*1) + VMOVDQU Y6, (R10)(R15*1) + VMOVDQU Y7, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_4x8_loop + VZEROUPPER + +mulAvxTwo_4x8_end: + RET + +// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x1(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 14 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + MOVQ in_base+24(FP), CX + MOVQ (CX), BX + MOVQ 24(CX), BP + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), CX + MOVQ $0x0000000f, R8 + MOVQ R8, X11 + VPBROADCASTB X11, Y11 + MOVQ start+72(FP), R8 + +mulAvxTwo_5x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BX)(R8*1), Y12 + VPSRLQ $0x04, Y12, Y13 + VPAND Y11, Y12, Y12 + VPAND Y11, Y13, Y13 + VPSHUFB Y12, Y1, Y12 + VPSHUFB Y13, Y2, Y13 + VPXOR Y12, Y13, Y12 + VPXOR Y12, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BP)(R8*1), Y12 + VPSRLQ $0x04, Y12, Y13 + VPAND Y11, Y12, Y12 + VPAND Y11, Y13, Y13 + VPSHUFB Y12, Y3, Y12 + VPSHUFB Y13, Y4, Y13 + VPXOR Y12, Y13, Y12 + VPXOR Y12, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI)(R8*1), Y12 + VPSRLQ $0x04, Y12, Y13 + VPAND Y11, Y12, Y12 + VPAND Y11, Y13, Y13 + VPSHUFB Y12, Y5, Y12 + VPSHUFB Y13, Y6, Y13 + VPXOR Y12, Y13, Y12 + VPXOR Y12, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI)(R8*1), Y12 + VPSRLQ $0x04, Y12, Y13 + VPAND Y11, Y12, Y12 + VPAND Y11, Y13, Y13 + VPSHUFB Y12, Y7, Y12 + VPSHUFB Y13, Y8, Y13 + VPXOR Y12, Y13, Y12 + VPXOR Y12, Y0, Y0 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (CX)(R8*1), Y12 + VPSRLQ $0x04, Y12, Y13 + VPAND Y11, Y12, Y12 + VPAND Y11, Y13, Y13 + VPSHUFB Y12, Y9, Y12 + VPSHUFB Y13, Y10, Y13 + VPXOR Y12, Y13, Y12 + VPXOR Y12, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(R8*1) + + // Prepare for next loop + ADDQ $0x20, R8 + DECQ AX + JNZ mulAvxTwo_5x1_loop + VZEROUPPER + +mulAvxTwo_5x1_end: + RET + +// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 27 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), R9 + MOVQ 96(BP), BP + MOVQ $0x0000000f, R10 + MOVQ R10, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R10 + +mulAvxTwo_5x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R10*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R10*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R10*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R9)(R10*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (BP)(R10*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R10*1) + VMOVDQU Y1, (DX)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_5x2_loop + VZEROUPPER + +mulAvxTwo_5x2_end: + RET + +// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 38 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), SI + MOVQ $0x0000000f, R11 + MOVQ R11, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R11 + +mulAvxTwo_5x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R11*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R11*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (R9)(R11*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R10)(R11*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (SI)(R11*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 768(CX), Y4 + VMOVDQU 800(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 832(CX), Y4 + VMOVDQU 864(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 896(CX), Y4 + VMOVDQU 928(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R11*1) + VMOVDQU Y1, (BP)(R11*1) + VMOVDQU Y2, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_5x3_loop + VZEROUPPER + +mulAvxTwo_5x3_end: + RET + +// func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 49 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), DI + MOVQ $0x0000000f, R12 + MOVQ R12, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R12 + +mulAvxTwo_5x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R12*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (R9)(R12*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (R10)(R12*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R11)(R12*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (DI)(R12*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1024(CX), Y5 + VMOVDQU 1056(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1088(CX), Y5 + VMOVDQU 1120(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1152(CX), Y5 + VMOVDQU 1184(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1216(CX), Y5 + VMOVDQU 1248(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (BP)(R12*1) + VMOVDQU Y2, (SI)(R12*1) + VMOVDQU Y3, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_5x4_loop + VZEROUPPER + +mulAvxTwo_5x4_end: + RET + +// func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 60 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R8 + MOVQ $0x0000000f, R13 + MOVQ R13, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R13 + +mulAvxTwo_5x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R9)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (R10)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (R11)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R12)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R8)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1280(CX), Y6 + VMOVDQU 1312(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1344(CX), Y6 + VMOVDQU 1376(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1408(CX), Y6 + VMOVDQU 1440(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1472(CX), Y6 + VMOVDQU 1504(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1536(CX), Y6 + VMOVDQU 1568(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (BP)(R13*1) + VMOVDQU Y2, (SI)(R13*1) + VMOVDQU Y3, (DI)(R13*1) + VMOVDQU Y4, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_5x5_loop + VZEROUPPER + +mulAvxTwo_5x5_end: + RET + +// func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 71 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x6_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), DX + MOVQ in_base+24(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R9 + MOVQ $0x0000000f, R14 + MOVQ R14, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R14 + +mulAvxTwo_5x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (R10)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (R11)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (R12)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R13)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1536(CX), Y7 + VMOVDQU 1568(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1600(CX), Y7 + VMOVDQU 1632(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1664(CX), Y7 + VMOVDQU 1696(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1728(CX), Y7 + VMOVDQU 1760(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1792(CX), Y7 + VMOVDQU 1824(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1856(CX), Y7 + VMOVDQU 1888(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (BP)(R14*1) + VMOVDQU Y2, (SI)(R14*1) + VMOVDQU Y3, (DI)(R14*1) + VMOVDQU Y4, (R8)(R14*1) + VMOVDQU Y5, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_5x6_loop + VZEROUPPER + +mulAvxTwo_5x6_end: + RET + +// func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 82 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x7_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), R9 + MOVQ 144(DX), DX + MOVQ in_base+24(FP), R10 + MOVQ (R10), R11 + MOVQ 24(R10), R12 + MOVQ 48(R10), R13 + MOVQ 72(R10), R14 + MOVQ 96(R10), R10 + MOVQ $0x0000000f, R15 + MOVQ R15, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R15 + +mulAvxTwo_5x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (R11)(R15*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (R12)(R15*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (R13)(R15*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R14)(R15*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R10)(R15*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1792(CX), Y8 + VMOVDQU 1824(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1856(CX), Y8 + VMOVDQU 1888(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1920(CX), Y8 + VMOVDQU 1952(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1984(CX), Y8 + VMOVDQU 2016(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2048(CX), Y8 + VMOVDQU 2080(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2112(CX), Y8 + VMOVDQU 2144(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2176(CX), Y8 + VMOVDQU 2208(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (BP)(R15*1) + VMOVDQU Y2, (SI)(R15*1) + VMOVDQU Y3, (DI)(R15*1) + VMOVDQU Y4, (R8)(R15*1) + VMOVDQU Y5, (R9)(R15*1) + VMOVDQU Y6, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_5x7_loop + VZEROUPPER + +mulAvxTwo_5x7_end: + RET + +// func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_5x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 93 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_5x8_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), BX + MOVQ $0x0000000f, R9 + MOVQ R9, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R9 + +mulAvxTwo_5x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BP)(R9*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI)(R9*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI)(R9*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8)(R9*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (BX)(R9*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2048(CX), Y9 + VMOVDQU 2080(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2112(CX), Y9 + VMOVDQU 2144(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2176(CX), Y9 + VMOVDQU 2208(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2240(CX), Y9 + VMOVDQU 2272(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2304(CX), Y9 + VMOVDQU 2336(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2368(CX), Y9 + VMOVDQU 2400(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2432(CX), Y9 + VMOVDQU 2464(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 2496(CX), Y9 + VMOVDQU 2528(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + MOVQ (DX), R10 + VMOVDQU Y0, (R10)(R9*1) + MOVQ 24(DX), R10 + VMOVDQU Y1, (R10)(R9*1) + MOVQ 48(DX), R10 + VMOVDQU Y2, (R10)(R9*1) + MOVQ 72(DX), R10 + VMOVDQU Y3, (R10)(R9*1) + MOVQ 96(DX), R10 + VMOVDQU Y4, (R10)(R9*1) + MOVQ 120(DX), R10 + VMOVDQU Y5, (R10)(R9*1) + MOVQ 144(DX), R10 + VMOVDQU Y6, (R10)(R9*1) + MOVQ 168(DX), R10 + VMOVDQU Y7, (R10)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxTwo_5x8_loop + VZEROUPPER + +mulAvxTwo_5x8_end: + RET + +// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x1(SB), $0-88 + // Loading all tables to registers + // Full registers estimated 16 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + VMOVDQU (CX), Y1 + VMOVDQU 32(CX), Y2 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VMOVDQU 320(CX), Y11 + VMOVDQU 352(CX), Y12 + MOVQ in_base+24(FP), CX + MOVQ (CX), BX + MOVQ 24(CX), BP + MOVQ 48(CX), SI + MOVQ 72(CX), DI + MOVQ 96(CX), R8 + MOVQ 120(CX), CX + MOVQ $0x0000000f, R9 + MOVQ R9, X13 + VPBROADCASTB X13, Y13 + MOVQ start+72(FP), R9 + +mulAvxTwo_6x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BX)(R9*1), Y14 + VPSRLQ $0x04, Y14, Y15 + VPAND Y13, Y14, Y14 + VPAND Y13, Y15, Y15 + VPSHUFB Y14, Y1, Y14 + VPSHUFB Y15, Y2, Y15 + VPXOR Y14, Y15, Y14 + VPXOR Y14, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (BP)(R9*1), Y14 + VPSRLQ $0x04, Y14, Y15 + VPAND Y13, Y14, Y14 + VPAND Y13, Y15, Y15 + VPSHUFB Y14, Y3, Y14 + VPSHUFB Y15, Y4, Y15 + VPXOR Y14, Y15, Y14 + VPXOR Y14, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (SI)(R9*1), Y14 + VPSRLQ $0x04, Y14, Y15 + VPAND Y13, Y14, Y14 + VPAND Y13, Y15, Y15 + VPSHUFB Y14, Y5, Y14 + VPSHUFB Y15, Y6, Y15 + VPXOR Y14, Y15, Y14 + VPXOR Y14, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (DI)(R9*1), Y14 + VPSRLQ $0x04, Y14, Y15 + VPAND Y13, Y14, Y14 + VPAND Y13, Y15, Y15 + VPSHUFB Y14, Y7, Y14 + VPSHUFB Y15, Y8, Y15 + VPXOR Y14, Y15, Y14 + VPXOR Y14, Y0, Y0 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R8)(R9*1), Y14 + VPSRLQ $0x04, Y14, Y15 + VPAND Y13, Y14, Y14 + VPAND Y13, Y15, Y15 + VPSHUFB Y14, Y9, Y14 + VPSHUFB Y15, Y10, Y15 + VPXOR Y14, Y15, Y14 + VPXOR Y14, Y0, Y0 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (CX)(R9*1), Y14 + VPSRLQ $0x04, Y14, Y15 + VPAND Y13, Y14, Y14 + VPAND Y13, Y15, Y15 + VPSHUFB Y14, Y11, Y14 + VPSHUFB Y15, Y12, Y15 + VPXOR Y14, Y15, Y14 + VPXOR Y14, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(R9*1) + + // Prepare for next loop + ADDQ $0x20, R9 + DECQ AX + JNZ mulAvxTwo_6x1_loop + VZEROUPPER + +mulAvxTwo_6x1_end: + RET + +// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 31 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), R9 + MOVQ 96(BP), R10 + MOVQ 120(BP), BP + MOVQ $0x0000000f, R11 + MOVQ R11, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R11 + +mulAvxTwo_6x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R11*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R11*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R11*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R9)(R11*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R10)(R11*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (BP)(R11*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R11*1) + VMOVDQU Y1, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_6x2_loop + VZEROUPPER + +mulAvxTwo_6x2_end: + RET + +// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 44 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), SI + MOVQ $0x0000000f, R12 + MOVQ R12, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R12 + +mulAvxTwo_6x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R12*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R12*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (R9)(R12*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R10)(R12*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R11)(R12*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 768(CX), Y4 + VMOVDQU 800(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 832(CX), Y4 + VMOVDQU 864(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 896(CX), Y4 + VMOVDQU 928(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (SI)(R12*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 960(CX), Y4 + VMOVDQU 992(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1024(CX), Y4 + VMOVDQU 1056(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1088(CX), Y4 + VMOVDQU 1120(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (BP)(R12*1) + VMOVDQU Y2, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_6x3_loop + VZEROUPPER + +mulAvxTwo_6x3_end: + RET + +// func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 57 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), DI + MOVQ $0x0000000f, R13 + MOVQ R13, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R13 + +mulAvxTwo_6x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (R9)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (R10)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R11)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R12)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1024(CX), Y5 + VMOVDQU 1056(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1088(CX), Y5 + VMOVDQU 1120(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1152(CX), Y5 + VMOVDQU 1184(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1216(CX), Y5 + VMOVDQU 1248(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (DI)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1280(CX), Y5 + VMOVDQU 1312(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1344(CX), Y5 + VMOVDQU 1376(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1408(CX), Y5 + VMOVDQU 1440(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1472(CX), Y5 + VMOVDQU 1504(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (BP)(R13*1) + VMOVDQU Y2, (SI)(R13*1) + VMOVDQU Y3, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_6x4_loop + VZEROUPPER + +mulAvxTwo_6x4_end: + RET + +// func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 70 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R8 + MOVQ $0x0000000f, R14 + MOVQ R14, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R14 + +mulAvxTwo_6x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R9)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (R10)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (R11)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R12)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R13)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1280(CX), Y6 + VMOVDQU 1312(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1344(CX), Y6 + VMOVDQU 1376(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1408(CX), Y6 + VMOVDQU 1440(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1472(CX), Y6 + VMOVDQU 1504(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1536(CX), Y6 + VMOVDQU 1568(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R8)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1600(CX), Y6 + VMOVDQU 1632(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1664(CX), Y6 + VMOVDQU 1696(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1728(CX), Y6 + VMOVDQU 1760(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1792(CX), Y6 + VMOVDQU 1824(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1856(CX), Y6 + VMOVDQU 1888(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (BP)(R14*1) + VMOVDQU Y2, (SI)(R14*1) + VMOVDQU Y3, (DI)(R14*1) + VMOVDQU Y4, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_6x5_loop + VZEROUPPER + +mulAvxTwo_6x5_end: + RET + +// func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 83 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x6_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), R8 + MOVQ 120(DX), DX + MOVQ in_base+24(FP), R9 + MOVQ (R9), R10 + MOVQ 24(R9), R11 + MOVQ 48(R9), R12 + MOVQ 72(R9), R13 + MOVQ 96(R9), R14 + MOVQ 120(R9), R9 + MOVQ $0x0000000f, R15 + MOVQ R15, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R15 + +mulAvxTwo_6x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (R10)(R15*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (R11)(R15*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (R12)(R15*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R13)(R15*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R14)(R15*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1536(CX), Y7 + VMOVDQU 1568(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1600(CX), Y7 + VMOVDQU 1632(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1664(CX), Y7 + VMOVDQU 1696(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1728(CX), Y7 + VMOVDQU 1760(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1792(CX), Y7 + VMOVDQU 1824(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1856(CX), Y7 + VMOVDQU 1888(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R9)(R15*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1920(CX), Y7 + VMOVDQU 1952(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1984(CX), Y7 + VMOVDQU 2016(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2048(CX), Y7 + VMOVDQU 2080(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2112(CX), Y7 + VMOVDQU 2144(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2176(CX), Y7 + VMOVDQU 2208(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2240(CX), Y7 + VMOVDQU 2272(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (BP)(R15*1) + VMOVDQU Y2, (SI)(R15*1) + VMOVDQU Y3, (DI)(R15*1) + VMOVDQU Y4, (R8)(R15*1) + VMOVDQU Y5, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_6x6_loop + VZEROUPPER + +mulAvxTwo_6x6_end: + RET + +// func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 96 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x7_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), BX + MOVQ $0x0000000f, R10 + MOVQ R10, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R10 + +mulAvxTwo_6x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BP)(R10*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI)(R10*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI)(R10*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8)(R10*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9)(R10*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1792(CX), Y8 + VMOVDQU 1824(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1856(CX), Y8 + VMOVDQU 1888(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1920(CX), Y8 + VMOVDQU 1952(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1984(CX), Y8 + VMOVDQU 2016(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2048(CX), Y8 + VMOVDQU 2080(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2112(CX), Y8 + VMOVDQU 2144(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2176(CX), Y8 + VMOVDQU 2208(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (BX)(R10*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2240(CX), Y8 + VMOVDQU 2272(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2304(CX), Y8 + VMOVDQU 2336(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2368(CX), Y8 + VMOVDQU 2400(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2432(CX), Y8 + VMOVDQU 2464(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2496(CX), Y8 + VMOVDQU 2528(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2560(CX), Y8 + VMOVDQU 2592(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2624(CX), Y8 + VMOVDQU 2656(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + MOVQ (DX), R11 + VMOVDQU Y0, (R11)(R10*1) + MOVQ 24(DX), R11 + VMOVDQU Y1, (R11)(R10*1) + MOVQ 48(DX), R11 + VMOVDQU Y2, (R11)(R10*1) + MOVQ 72(DX), R11 + VMOVDQU Y3, (R11)(R10*1) + MOVQ 96(DX), R11 + VMOVDQU Y4, (R11)(R10*1) + MOVQ 120(DX), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 144(DX), R11 + VMOVDQU Y6, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_6x7_loop + VZEROUPPER + +mulAvxTwo_6x7_end: + RET + +// func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_6x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 109 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_6x8_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), BX + MOVQ $0x0000000f, R10 + MOVQ R10, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R10 + +mulAvxTwo_6x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BP)(R10*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI)(R10*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI)(R10*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8)(R10*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9)(R10*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2048(CX), Y9 + VMOVDQU 2080(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2112(CX), Y9 + VMOVDQU 2144(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2176(CX), Y9 + VMOVDQU 2208(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2240(CX), Y9 + VMOVDQU 2272(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2304(CX), Y9 + VMOVDQU 2336(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2368(CX), Y9 + VMOVDQU 2400(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2432(CX), Y9 + VMOVDQU 2464(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 2496(CX), Y9 + VMOVDQU 2528(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (BX)(R10*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2560(CX), Y9 + VMOVDQU 2592(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2624(CX), Y9 + VMOVDQU 2656(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2688(CX), Y9 + VMOVDQU 2720(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2752(CX), Y9 + VMOVDQU 2784(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2816(CX), Y9 + VMOVDQU 2848(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2880(CX), Y9 + VMOVDQU 2912(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2944(CX), Y9 + VMOVDQU 2976(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3008(CX), Y9 + VMOVDQU 3040(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + MOVQ (DX), R11 + VMOVDQU Y0, (R11)(R10*1) + MOVQ 24(DX), R11 + VMOVDQU Y1, (R11)(R10*1) + MOVQ 48(DX), R11 + VMOVDQU Y2, (R11)(R10*1) + MOVQ 72(DX), R11 + VMOVDQU Y3, (R11)(R10*1) + MOVQ 96(DX), R11 + VMOVDQU Y4, (R11)(R10*1) + MOVQ 120(DX), R11 + VMOVDQU Y5, (R11)(R10*1) + MOVQ 144(DX), R11 + VMOVDQU Y6, (R11)(R10*1) + MOVQ 168(DX), R11 + VMOVDQU Y7, (R11)(R10*1) + + // Prepare for next loop + ADDQ $0x20, R10 + DECQ AX + JNZ mulAvxTwo_6x8_loop + VZEROUPPER + +mulAvxTwo_6x8_end: + RET + +// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x1(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 18 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), BX + MOVQ $0x0000000f, R11 + MOVQ R11, X1 + VPBROADCASTB X1, Y1 + MOVQ start+72(FP), R11 + +mulAvxTwo_7x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BP)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU (CX), Y2 + VMOVDQU 32(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (SI)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 64(CX), Y2 + VMOVDQU 96(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (DI)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 128(CX), Y2 + VMOVDQU 160(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (R8)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 192(CX), Y2 + VMOVDQU 224(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R9)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 256(CX), Y2 + VMOVDQU 288(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R10)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 320(CX), Y2 + VMOVDQU 352(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (BX)(R11*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 384(CX), Y2 + VMOVDQU 416(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_7x1_loop + VZEROUPPER + +mulAvxTwo_7x1_end: + RET + +// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 35 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), R9 + MOVQ 96(BP), R10 + MOVQ 120(BP), R11 + MOVQ 144(BP), BP + MOVQ $0x0000000f, R12 + MOVQ R12, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R12 + +mulAvxTwo_7x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R9)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R10)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R11)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (BP)(R12*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 768(CX), Y3 + VMOVDQU 800(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 832(CX), Y3 + VMOVDQU 864(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R12*1) + VMOVDQU Y1, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_7x2_loop + VZEROUPPER + +mulAvxTwo_7x2_end: + RET + +// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 50 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), SI + MOVQ $0x0000000f, R13 + MOVQ R13, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R13 + +mulAvxTwo_7x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (R9)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R10)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R11)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 768(CX), Y4 + VMOVDQU 800(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 832(CX), Y4 + VMOVDQU 864(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 896(CX), Y4 + VMOVDQU 928(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R12)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 960(CX), Y4 + VMOVDQU 992(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1024(CX), Y4 + VMOVDQU 1056(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1088(CX), Y4 + VMOVDQU 1120(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (SI)(R13*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1152(CX), Y4 + VMOVDQU 1184(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1216(CX), Y4 + VMOVDQU 1248(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1280(CX), Y4 + VMOVDQU 1312(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (BP)(R13*1) + VMOVDQU Y2, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_7x3_loop + VZEROUPPER + +mulAvxTwo_7x3_end: + RET + +// func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 65 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), DI + MOVQ $0x0000000f, R14 + MOVQ R14, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R14 + +mulAvxTwo_7x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (R9)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (R10)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R11)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R12)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1024(CX), Y5 + VMOVDQU 1056(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1088(CX), Y5 + VMOVDQU 1120(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1152(CX), Y5 + VMOVDQU 1184(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1216(CX), Y5 + VMOVDQU 1248(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R13)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1280(CX), Y5 + VMOVDQU 1312(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1344(CX), Y5 + VMOVDQU 1376(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1408(CX), Y5 + VMOVDQU 1440(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1472(CX), Y5 + VMOVDQU 1504(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (DI)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1536(CX), Y5 + VMOVDQU 1568(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1600(CX), Y5 + VMOVDQU 1632(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1664(CX), Y5 + VMOVDQU 1696(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1728(CX), Y5 + VMOVDQU 1760(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (BP)(R14*1) + VMOVDQU Y2, (SI)(R14*1) + VMOVDQU Y3, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_7x4_loop + VZEROUPPER + +mulAvxTwo_7x4_end: + RET + +// func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 80 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x5_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DI + MOVQ 96(DX), DX + MOVQ in_base+24(FP), R8 + MOVQ (R8), R9 + MOVQ 24(R8), R10 + MOVQ 48(R8), R11 + MOVQ 72(R8), R12 + MOVQ 96(R8), R13 + MOVQ 120(R8), R14 + MOVQ 144(R8), R8 + MOVQ $0x0000000f, R15 + MOVQ R15, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R15 + +mulAvxTwo_7x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (R9)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (R10)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (R11)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R12)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R13)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1280(CX), Y6 + VMOVDQU 1312(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1344(CX), Y6 + VMOVDQU 1376(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1408(CX), Y6 + VMOVDQU 1440(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1472(CX), Y6 + VMOVDQU 1504(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1536(CX), Y6 + VMOVDQU 1568(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R14)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1600(CX), Y6 + VMOVDQU 1632(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1664(CX), Y6 + VMOVDQU 1696(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1728(CX), Y6 + VMOVDQU 1760(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1792(CX), Y6 + VMOVDQU 1824(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1856(CX), Y6 + VMOVDQU 1888(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R8)(R15*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1920(CX), Y6 + VMOVDQU 1952(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1984(CX), Y6 + VMOVDQU 2016(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2048(CX), Y6 + VMOVDQU 2080(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2112(CX), Y6 + VMOVDQU 2144(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2176(CX), Y6 + VMOVDQU 2208(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (BP)(R15*1) + VMOVDQU Y2, (SI)(R15*1) + VMOVDQU Y3, (DI)(R15*1) + VMOVDQU Y4, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_7x5_loop + VZEROUPPER + +mulAvxTwo_7x5_end: + RET + +// func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 95 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x6_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), BX + MOVQ $0x0000000f, R11 + MOVQ R11, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R11 + +mulAvxTwo_7x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BP)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1536(CX), Y7 + VMOVDQU 1568(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1600(CX), Y7 + VMOVDQU 1632(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1664(CX), Y7 + VMOVDQU 1696(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1728(CX), Y7 + VMOVDQU 1760(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1792(CX), Y7 + VMOVDQU 1824(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1856(CX), Y7 + VMOVDQU 1888(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1920(CX), Y7 + VMOVDQU 1952(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1984(CX), Y7 + VMOVDQU 2016(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2048(CX), Y7 + VMOVDQU 2080(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2112(CX), Y7 + VMOVDQU 2144(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2176(CX), Y7 + VMOVDQU 2208(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2240(CX), Y7 + VMOVDQU 2272(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (BX)(R11*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2304(CX), Y7 + VMOVDQU 2336(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2368(CX), Y7 + VMOVDQU 2400(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2432(CX), Y7 + VMOVDQU 2464(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2496(CX), Y7 + VMOVDQU 2528(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2560(CX), Y7 + VMOVDQU 2592(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2624(CX), Y7 + VMOVDQU 2656(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + MOVQ (DX), R12 + VMOVDQU Y0, (R12)(R11*1) + MOVQ 24(DX), R12 + VMOVDQU Y1, (R12)(R11*1) + MOVQ 48(DX), R12 + VMOVDQU Y2, (R12)(R11*1) + MOVQ 72(DX), R12 + VMOVDQU Y3, (R12)(R11*1) + MOVQ 96(DX), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 120(DX), R12 + VMOVDQU Y5, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_7x6_loop + VZEROUPPER + +mulAvxTwo_7x6_end: + RET + +// func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 110 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x7_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), BX + MOVQ $0x0000000f, R11 + MOVQ R11, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R11 + +mulAvxTwo_7x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BP)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1792(CX), Y8 + VMOVDQU 1824(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1856(CX), Y8 + VMOVDQU 1888(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1920(CX), Y8 + VMOVDQU 1952(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1984(CX), Y8 + VMOVDQU 2016(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2048(CX), Y8 + VMOVDQU 2080(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2112(CX), Y8 + VMOVDQU 2144(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2176(CX), Y8 + VMOVDQU 2208(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2240(CX), Y8 + VMOVDQU 2272(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2304(CX), Y8 + VMOVDQU 2336(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2368(CX), Y8 + VMOVDQU 2400(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2432(CX), Y8 + VMOVDQU 2464(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2496(CX), Y8 + VMOVDQU 2528(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2560(CX), Y8 + VMOVDQU 2592(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2624(CX), Y8 + VMOVDQU 2656(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (BX)(R11*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2688(CX), Y8 + VMOVDQU 2720(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2752(CX), Y8 + VMOVDQU 2784(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2816(CX), Y8 + VMOVDQU 2848(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2880(CX), Y8 + VMOVDQU 2912(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2944(CX), Y8 + VMOVDQU 2976(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3008(CX), Y8 + VMOVDQU 3040(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3072(CX), Y8 + VMOVDQU 3104(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + MOVQ (DX), R12 + VMOVDQU Y0, (R12)(R11*1) + MOVQ 24(DX), R12 + VMOVDQU Y1, (R12)(R11*1) + MOVQ 48(DX), R12 + VMOVDQU Y2, (R12)(R11*1) + MOVQ 72(DX), R12 + VMOVDQU Y3, (R12)(R11*1) + MOVQ 96(DX), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 120(DX), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 144(DX), R12 + VMOVDQU Y6, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_7x7_loop + VZEROUPPER + +mulAvxTwo_7x7_end: + RET + +// func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_7x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 125 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_7x8_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), BX + MOVQ $0x0000000f, R11 + MOVQ R11, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R11 + +mulAvxTwo_7x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BP)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2048(CX), Y9 + VMOVDQU 2080(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2112(CX), Y9 + VMOVDQU 2144(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2176(CX), Y9 + VMOVDQU 2208(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2240(CX), Y9 + VMOVDQU 2272(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2304(CX), Y9 + VMOVDQU 2336(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2368(CX), Y9 + VMOVDQU 2400(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2432(CX), Y9 + VMOVDQU 2464(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 2496(CX), Y9 + VMOVDQU 2528(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2560(CX), Y9 + VMOVDQU 2592(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2624(CX), Y9 + VMOVDQU 2656(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2688(CX), Y9 + VMOVDQU 2720(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2752(CX), Y9 + VMOVDQU 2784(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2816(CX), Y9 + VMOVDQU 2848(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2880(CX), Y9 + VMOVDQU 2912(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2944(CX), Y9 + VMOVDQU 2976(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3008(CX), Y9 + VMOVDQU 3040(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (BX)(R11*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3072(CX), Y9 + VMOVDQU 3104(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3136(CX), Y9 + VMOVDQU 3168(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3200(CX), Y9 + VMOVDQU 3232(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3264(CX), Y9 + VMOVDQU 3296(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3328(CX), Y9 + VMOVDQU 3360(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3392(CX), Y9 + VMOVDQU 3424(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3456(CX), Y9 + VMOVDQU 3488(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3520(CX), Y9 + VMOVDQU 3552(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + MOVQ (DX), R12 + VMOVDQU Y0, (R12)(R11*1) + MOVQ 24(DX), R12 + VMOVDQU Y1, (R12)(R11*1) + MOVQ 48(DX), R12 + VMOVDQU Y2, (R12)(R11*1) + MOVQ 72(DX), R12 + VMOVDQU Y3, (R12)(R11*1) + MOVQ 96(DX), R12 + VMOVDQU Y4, (R12)(R11*1) + MOVQ 120(DX), R12 + VMOVDQU Y5, (R12)(R11*1) + MOVQ 144(DX), R12 + VMOVDQU Y6, (R12)(R11*1) + MOVQ 168(DX), R12 + VMOVDQU Y7, (R12)(R11*1) + + // Prepare for next loop + ADDQ $0x20, R11 + DECQ AX + JNZ mulAvxTwo_7x8_loop + VZEROUPPER + +mulAvxTwo_7x8_end: + RET + +// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x1(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 20 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), BX + MOVQ $0x0000000f, R12 + MOVQ R12, X1 + VPBROADCASTB X1, Y1 + MOVQ start+72(FP), R12 + +mulAvxTwo_8x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BP)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU (CX), Y2 + VMOVDQU 32(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (SI)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 64(CX), Y2 + VMOVDQU 96(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (DI)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 128(CX), Y2 + VMOVDQU 160(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (R8)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 192(CX), Y2 + VMOVDQU 224(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R9)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 256(CX), Y2 + VMOVDQU 288(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R10)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 320(CX), Y2 + VMOVDQU 352(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R11)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 384(CX), Y2 + VMOVDQU 416(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (BX)(R12*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 448(CX), Y2 + VMOVDQU 480(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_8x1_loop + VZEROUPPER + +mulAvxTwo_8x1_end: + RET + +// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 39 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), R9 + MOVQ 96(BP), R10 + MOVQ 120(BP), R11 + MOVQ 144(BP), R12 + MOVQ 168(BP), BP + MOVQ $0x0000000f, R13 + MOVQ R13, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R13 + +mulAvxTwo_8x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R9)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R10)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R11)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R12)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 768(CX), Y3 + VMOVDQU 800(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 832(CX), Y3 + VMOVDQU 864(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (BP)(R13*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 896(CX), Y3 + VMOVDQU 928(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 960(CX), Y3 + VMOVDQU 992(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R13*1) + VMOVDQU Y1, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_8x2_loop + VZEROUPPER + +mulAvxTwo_8x2_end: + RET + +// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 56 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), SI + MOVQ $0x0000000f, R14 + MOVQ R14, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R14 + +mulAvxTwo_8x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (R9)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R10)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R11)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 768(CX), Y4 + VMOVDQU 800(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 832(CX), Y4 + VMOVDQU 864(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 896(CX), Y4 + VMOVDQU 928(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R12)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 960(CX), Y4 + VMOVDQU 992(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1024(CX), Y4 + VMOVDQU 1056(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1088(CX), Y4 + VMOVDQU 1120(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R13)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1152(CX), Y4 + VMOVDQU 1184(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1216(CX), Y4 + VMOVDQU 1248(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1280(CX), Y4 + VMOVDQU 1312(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (SI)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1344(CX), Y4 + VMOVDQU 1376(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1408(CX), Y4 + VMOVDQU 1440(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1472(CX), Y4 + VMOVDQU 1504(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (BP)(R14*1) + VMOVDQU Y2, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_8x3_loop + VZEROUPPER + +mulAvxTwo_8x3_end: + RET + +// func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 73 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x4_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), SI + MOVQ 72(DX), DX + MOVQ in_base+24(FP), DI + MOVQ (DI), R8 + MOVQ 24(DI), R9 + MOVQ 48(DI), R10 + MOVQ 72(DI), R11 + MOVQ 96(DI), R12 + MOVQ 120(DI), R13 + MOVQ 144(DI), R14 + MOVQ 168(DI), DI + MOVQ $0x0000000f, R15 + MOVQ R15, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R15 + +mulAvxTwo_8x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (R8)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (R9)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (R10)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R11)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R12)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1024(CX), Y5 + VMOVDQU 1056(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1088(CX), Y5 + VMOVDQU 1120(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1152(CX), Y5 + VMOVDQU 1184(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1216(CX), Y5 + VMOVDQU 1248(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R13)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1280(CX), Y5 + VMOVDQU 1312(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1344(CX), Y5 + VMOVDQU 1376(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1408(CX), Y5 + VMOVDQU 1440(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1472(CX), Y5 + VMOVDQU 1504(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R14)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1536(CX), Y5 + VMOVDQU 1568(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1600(CX), Y5 + VMOVDQU 1632(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1664(CX), Y5 + VMOVDQU 1696(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1728(CX), Y5 + VMOVDQU 1760(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (DI)(R15*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1792(CX), Y5 + VMOVDQU 1824(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1856(CX), Y5 + VMOVDQU 1888(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1920(CX), Y5 + VMOVDQU 1952(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1984(CX), Y5 + VMOVDQU 2016(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (BP)(R15*1) + VMOVDQU Y2, (SI)(R15*1) + VMOVDQU Y3, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_8x4_loop + VZEROUPPER + +mulAvxTwo_8x4_end: + RET + +// func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 90 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x5_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), BX + MOVQ $0x0000000f, R12 + MOVQ R12, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R12 + +mulAvxTwo_8x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BP)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1280(CX), Y6 + VMOVDQU 1312(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1344(CX), Y6 + VMOVDQU 1376(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1408(CX), Y6 + VMOVDQU 1440(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1472(CX), Y6 + VMOVDQU 1504(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1536(CX), Y6 + VMOVDQU 1568(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1600(CX), Y6 + VMOVDQU 1632(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1664(CX), Y6 + VMOVDQU 1696(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1728(CX), Y6 + VMOVDQU 1760(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1792(CX), Y6 + VMOVDQU 1824(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1856(CX), Y6 + VMOVDQU 1888(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1920(CX), Y6 + VMOVDQU 1952(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1984(CX), Y6 + VMOVDQU 2016(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2048(CX), Y6 + VMOVDQU 2080(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2112(CX), Y6 + VMOVDQU 2144(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2176(CX), Y6 + VMOVDQU 2208(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (BX)(R12*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 2240(CX), Y6 + VMOVDQU 2272(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 2304(CX), Y6 + VMOVDQU 2336(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2368(CX), Y6 + VMOVDQU 2400(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2432(CX), Y6 + VMOVDQU 2464(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2496(CX), Y6 + VMOVDQU 2528(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + MOVQ (DX), R13 + VMOVDQU Y0, (R13)(R12*1) + MOVQ 24(DX), R13 + VMOVDQU Y1, (R13)(R12*1) + MOVQ 48(DX), R13 + VMOVDQU Y2, (R13)(R12*1) + MOVQ 72(DX), R13 + VMOVDQU Y3, (R13)(R12*1) + MOVQ 96(DX), R13 + VMOVDQU Y4, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_8x5_loop + VZEROUPPER + +mulAvxTwo_8x5_end: + RET + +// func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 107 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x6_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), BX + MOVQ $0x0000000f, R12 + MOVQ R12, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R12 + +mulAvxTwo_8x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BP)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1536(CX), Y7 + VMOVDQU 1568(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1600(CX), Y7 + VMOVDQU 1632(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1664(CX), Y7 + VMOVDQU 1696(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1728(CX), Y7 + VMOVDQU 1760(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1792(CX), Y7 + VMOVDQU 1824(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1856(CX), Y7 + VMOVDQU 1888(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1920(CX), Y7 + VMOVDQU 1952(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1984(CX), Y7 + VMOVDQU 2016(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2048(CX), Y7 + VMOVDQU 2080(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2112(CX), Y7 + VMOVDQU 2144(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2176(CX), Y7 + VMOVDQU 2208(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2240(CX), Y7 + VMOVDQU 2272(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2304(CX), Y7 + VMOVDQU 2336(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2368(CX), Y7 + VMOVDQU 2400(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2432(CX), Y7 + VMOVDQU 2464(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2496(CX), Y7 + VMOVDQU 2528(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2560(CX), Y7 + VMOVDQU 2592(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2624(CX), Y7 + VMOVDQU 2656(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (BX)(R12*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2688(CX), Y7 + VMOVDQU 2720(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2752(CX), Y7 + VMOVDQU 2784(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2816(CX), Y7 + VMOVDQU 2848(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2880(CX), Y7 + VMOVDQU 2912(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2944(CX), Y7 + VMOVDQU 2976(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 3008(CX), Y7 + VMOVDQU 3040(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + MOVQ (DX), R13 + VMOVDQU Y0, (R13)(R12*1) + MOVQ 24(DX), R13 + VMOVDQU Y1, (R13)(R12*1) + MOVQ 48(DX), R13 + VMOVDQU Y2, (R13)(R12*1) + MOVQ 72(DX), R13 + VMOVDQU Y3, (R13)(R12*1) + MOVQ 96(DX), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 120(DX), R13 + VMOVDQU Y5, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_8x6_loop + VZEROUPPER + +mulAvxTwo_8x6_end: + RET + +// func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 124 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x7_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), BX + MOVQ $0x0000000f, R12 + MOVQ R12, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R12 + +mulAvxTwo_8x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BP)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1792(CX), Y8 + VMOVDQU 1824(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1856(CX), Y8 + VMOVDQU 1888(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1920(CX), Y8 + VMOVDQU 1952(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1984(CX), Y8 + VMOVDQU 2016(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2048(CX), Y8 + VMOVDQU 2080(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2112(CX), Y8 + VMOVDQU 2144(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2176(CX), Y8 + VMOVDQU 2208(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2240(CX), Y8 + VMOVDQU 2272(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2304(CX), Y8 + VMOVDQU 2336(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2368(CX), Y8 + VMOVDQU 2400(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2432(CX), Y8 + VMOVDQU 2464(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2496(CX), Y8 + VMOVDQU 2528(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2560(CX), Y8 + VMOVDQU 2592(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2624(CX), Y8 + VMOVDQU 2656(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2688(CX), Y8 + VMOVDQU 2720(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2752(CX), Y8 + VMOVDQU 2784(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2816(CX), Y8 + VMOVDQU 2848(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2880(CX), Y8 + VMOVDQU 2912(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2944(CX), Y8 + VMOVDQU 2976(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3008(CX), Y8 + VMOVDQU 3040(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3072(CX), Y8 + VMOVDQU 3104(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (BX)(R12*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 3136(CX), Y8 + VMOVDQU 3168(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 3200(CX), Y8 + VMOVDQU 3232(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 3264(CX), Y8 + VMOVDQU 3296(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 3328(CX), Y8 + VMOVDQU 3360(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 3392(CX), Y8 + VMOVDQU 3424(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3456(CX), Y8 + VMOVDQU 3488(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3520(CX), Y8 + VMOVDQU 3552(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + MOVQ (DX), R13 + VMOVDQU Y0, (R13)(R12*1) + MOVQ 24(DX), R13 + VMOVDQU Y1, (R13)(R12*1) + MOVQ 48(DX), R13 + VMOVDQU Y2, (R13)(R12*1) + MOVQ 72(DX), R13 + VMOVDQU Y3, (R13)(R12*1) + MOVQ 96(DX), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 120(DX), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 144(DX), R13 + VMOVDQU Y6, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_8x7_loop + VZEROUPPER + +mulAvxTwo_8x7_end: + RET + +// func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_8x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 141 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_8x8_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), BX + MOVQ $0x0000000f, R12 + MOVQ R12, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R12 + +mulAvxTwo_8x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BP)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2048(CX), Y9 + VMOVDQU 2080(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2112(CX), Y9 + VMOVDQU 2144(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2176(CX), Y9 + VMOVDQU 2208(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2240(CX), Y9 + VMOVDQU 2272(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2304(CX), Y9 + VMOVDQU 2336(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2368(CX), Y9 + VMOVDQU 2400(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2432(CX), Y9 + VMOVDQU 2464(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 2496(CX), Y9 + VMOVDQU 2528(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2560(CX), Y9 + VMOVDQU 2592(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2624(CX), Y9 + VMOVDQU 2656(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2688(CX), Y9 + VMOVDQU 2720(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2752(CX), Y9 + VMOVDQU 2784(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2816(CX), Y9 + VMOVDQU 2848(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2880(CX), Y9 + VMOVDQU 2912(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2944(CX), Y9 + VMOVDQU 2976(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3008(CX), Y9 + VMOVDQU 3040(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3072(CX), Y9 + VMOVDQU 3104(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3136(CX), Y9 + VMOVDQU 3168(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3200(CX), Y9 + VMOVDQU 3232(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3264(CX), Y9 + VMOVDQU 3296(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3328(CX), Y9 + VMOVDQU 3360(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3392(CX), Y9 + VMOVDQU 3424(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3456(CX), Y9 + VMOVDQU 3488(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3520(CX), Y9 + VMOVDQU 3552(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (BX)(R12*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3584(CX), Y9 + VMOVDQU 3616(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3648(CX), Y9 + VMOVDQU 3680(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3712(CX), Y9 + VMOVDQU 3744(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3776(CX), Y9 + VMOVDQU 3808(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3840(CX), Y9 + VMOVDQU 3872(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3904(CX), Y9 + VMOVDQU 3936(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3968(CX), Y9 + VMOVDQU 4000(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 4032(CX), Y9 + VMOVDQU 4064(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + MOVQ (DX), R13 + VMOVDQU Y0, (R13)(R12*1) + MOVQ 24(DX), R13 + VMOVDQU Y1, (R13)(R12*1) + MOVQ 48(DX), R13 + VMOVDQU Y2, (R13)(R12*1) + MOVQ 72(DX), R13 + VMOVDQU Y3, (R13)(R12*1) + MOVQ 96(DX), R13 + VMOVDQU Y4, (R13)(R12*1) + MOVQ 120(DX), R13 + VMOVDQU Y5, (R13)(R12*1) + MOVQ 144(DX), R13 + VMOVDQU Y6, (R13)(R12*1) + MOVQ 168(DX), R13 + VMOVDQU Y7, (R13)(R12*1) + + // Prepare for next loop + ADDQ $0x20, R12 + DECQ AX + JNZ mulAvxTwo_8x8_loop + VZEROUPPER + +mulAvxTwo_8x8_end: + RET + +// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x1(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 22 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), BX + MOVQ $0x0000000f, R13 + MOVQ R13, X1 + VPBROADCASTB X1, Y1 + MOVQ start+72(FP), R13 + +mulAvxTwo_9x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BP)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU (CX), Y2 + VMOVDQU 32(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (SI)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 64(CX), Y2 + VMOVDQU 96(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (DI)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 128(CX), Y2 + VMOVDQU 160(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (R8)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 192(CX), Y2 + VMOVDQU 224(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R9)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 256(CX), Y2 + VMOVDQU 288(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R10)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 320(CX), Y2 + VMOVDQU 352(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R11)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 384(CX), Y2 + VMOVDQU 416(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R12)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 448(CX), Y2 + VMOVDQU 480(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (BX)(R13*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 512(CX), Y2 + VMOVDQU 544(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_9x1_loop + VZEROUPPER + +mulAvxTwo_9x1_end: + RET + +// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 43 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), R9 + MOVQ 96(BP), R10 + MOVQ 120(BP), R11 + MOVQ 144(BP), R12 + MOVQ 168(BP), R13 + MOVQ 192(BP), BP + MOVQ $0x0000000f, R14 + MOVQ R14, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R14 + +mulAvxTwo_9x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R9)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R10)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R11)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R12)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 768(CX), Y3 + VMOVDQU 800(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 832(CX), Y3 + VMOVDQU 864(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R13)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 896(CX), Y3 + VMOVDQU 928(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 960(CX), Y3 + VMOVDQU 992(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (BP)(R14*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 1024(CX), Y3 + VMOVDQU 1056(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 1088(CX), Y3 + VMOVDQU 1120(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R14*1) + VMOVDQU Y1, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_9x2_loop + VZEROUPPER + +mulAvxTwo_9x2_end: + RET + +// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 62 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x3_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), BP + MOVQ 48(DX), DX + MOVQ in_base+24(FP), SI + MOVQ (SI), DI + MOVQ 24(SI), R8 + MOVQ 48(SI), R9 + MOVQ 72(SI), R10 + MOVQ 96(SI), R11 + MOVQ 120(SI), R12 + MOVQ 144(SI), R13 + MOVQ 168(SI), R14 + MOVQ 192(SI), SI + MOVQ $0x0000000f, R15 + MOVQ R15, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R15 + +mulAvxTwo_9x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (DI)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (R8)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (R9)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R10)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R11)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 768(CX), Y4 + VMOVDQU 800(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 832(CX), Y4 + VMOVDQU 864(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 896(CX), Y4 + VMOVDQU 928(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R12)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 960(CX), Y4 + VMOVDQU 992(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1024(CX), Y4 + VMOVDQU 1056(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1088(CX), Y4 + VMOVDQU 1120(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R13)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1152(CX), Y4 + VMOVDQU 1184(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1216(CX), Y4 + VMOVDQU 1248(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1280(CX), Y4 + VMOVDQU 1312(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R14)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1344(CX), Y4 + VMOVDQU 1376(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1408(CX), Y4 + VMOVDQU 1440(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1472(CX), Y4 + VMOVDQU 1504(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (SI)(R15*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1536(CX), Y4 + VMOVDQU 1568(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1600(CX), Y4 + VMOVDQU 1632(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1664(CX), Y4 + VMOVDQU 1696(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (BP)(R15*1) + VMOVDQU Y2, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_9x3_loop + VZEROUPPER + +mulAvxTwo_9x3_end: + RET + +// func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 81 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x4_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), BX + MOVQ $0x0000000f, R13 + MOVQ R13, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R13 + +mulAvxTwo_9x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BP)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1024(CX), Y5 + VMOVDQU 1056(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1088(CX), Y5 + VMOVDQU 1120(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1152(CX), Y5 + VMOVDQU 1184(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1216(CX), Y5 + VMOVDQU 1248(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1280(CX), Y5 + VMOVDQU 1312(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1344(CX), Y5 + VMOVDQU 1376(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1408(CX), Y5 + VMOVDQU 1440(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1472(CX), Y5 + VMOVDQU 1504(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1536(CX), Y5 + VMOVDQU 1568(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1600(CX), Y5 + VMOVDQU 1632(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1664(CX), Y5 + VMOVDQU 1696(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1728(CX), Y5 + VMOVDQU 1760(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R12)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1792(CX), Y5 + VMOVDQU 1824(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1856(CX), Y5 + VMOVDQU 1888(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1920(CX), Y5 + VMOVDQU 1952(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1984(CX), Y5 + VMOVDQU 2016(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (BX)(R13*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 2048(CX), Y5 + VMOVDQU 2080(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 2112(CX), Y5 + VMOVDQU 2144(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 2176(CX), Y5 + VMOVDQU 2208(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 2240(CX), Y5 + VMOVDQU 2272(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + MOVQ (DX), R14 + VMOVDQU Y0, (R14)(R13*1) + MOVQ 24(DX), R14 + VMOVDQU Y1, (R14)(R13*1) + MOVQ 48(DX), R14 + VMOVDQU Y2, (R14)(R13*1) + MOVQ 72(DX), R14 + VMOVDQU Y3, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_9x4_loop + VZEROUPPER + +mulAvxTwo_9x4_end: + RET + +// func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 100 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x5_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), BX + MOVQ $0x0000000f, R13 + MOVQ R13, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R13 + +mulAvxTwo_9x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BP)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1280(CX), Y6 + VMOVDQU 1312(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1344(CX), Y6 + VMOVDQU 1376(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1408(CX), Y6 + VMOVDQU 1440(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1472(CX), Y6 + VMOVDQU 1504(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1536(CX), Y6 + VMOVDQU 1568(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1600(CX), Y6 + VMOVDQU 1632(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1664(CX), Y6 + VMOVDQU 1696(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1728(CX), Y6 + VMOVDQU 1760(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1792(CX), Y6 + VMOVDQU 1824(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1856(CX), Y6 + VMOVDQU 1888(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1920(CX), Y6 + VMOVDQU 1952(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1984(CX), Y6 + VMOVDQU 2016(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2048(CX), Y6 + VMOVDQU 2080(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2112(CX), Y6 + VMOVDQU 2144(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2176(CX), Y6 + VMOVDQU 2208(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 2240(CX), Y6 + VMOVDQU 2272(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 2304(CX), Y6 + VMOVDQU 2336(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2368(CX), Y6 + VMOVDQU 2400(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2432(CX), Y6 + VMOVDQU 2464(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2496(CX), Y6 + VMOVDQU 2528(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (BX)(R13*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 2560(CX), Y6 + VMOVDQU 2592(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 2624(CX), Y6 + VMOVDQU 2656(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2688(CX), Y6 + VMOVDQU 2720(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2752(CX), Y6 + VMOVDQU 2784(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2816(CX), Y6 + VMOVDQU 2848(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + MOVQ (DX), R14 + VMOVDQU Y0, (R14)(R13*1) + MOVQ 24(DX), R14 + VMOVDQU Y1, (R14)(R13*1) + MOVQ 48(DX), R14 + VMOVDQU Y2, (R14)(R13*1) + MOVQ 72(DX), R14 + VMOVDQU Y3, (R14)(R13*1) + MOVQ 96(DX), R14 + VMOVDQU Y4, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_9x5_loop + VZEROUPPER + +mulAvxTwo_9x5_end: + RET + +// func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 119 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x6_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), BX + MOVQ $0x0000000f, R13 + MOVQ R13, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R13 + +mulAvxTwo_9x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BP)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1536(CX), Y7 + VMOVDQU 1568(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1600(CX), Y7 + VMOVDQU 1632(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1664(CX), Y7 + VMOVDQU 1696(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1728(CX), Y7 + VMOVDQU 1760(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1792(CX), Y7 + VMOVDQU 1824(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1856(CX), Y7 + VMOVDQU 1888(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1920(CX), Y7 + VMOVDQU 1952(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1984(CX), Y7 + VMOVDQU 2016(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2048(CX), Y7 + VMOVDQU 2080(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2112(CX), Y7 + VMOVDQU 2144(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2176(CX), Y7 + VMOVDQU 2208(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2240(CX), Y7 + VMOVDQU 2272(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2304(CX), Y7 + VMOVDQU 2336(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2368(CX), Y7 + VMOVDQU 2400(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2432(CX), Y7 + VMOVDQU 2464(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2496(CX), Y7 + VMOVDQU 2528(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2560(CX), Y7 + VMOVDQU 2592(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2624(CX), Y7 + VMOVDQU 2656(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2688(CX), Y7 + VMOVDQU 2720(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2752(CX), Y7 + VMOVDQU 2784(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2816(CX), Y7 + VMOVDQU 2848(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2880(CX), Y7 + VMOVDQU 2912(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2944(CX), Y7 + VMOVDQU 2976(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 3008(CX), Y7 + VMOVDQU 3040(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (BX)(R13*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 3072(CX), Y7 + VMOVDQU 3104(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 3136(CX), Y7 + VMOVDQU 3168(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 3200(CX), Y7 + VMOVDQU 3232(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 3264(CX), Y7 + VMOVDQU 3296(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 3328(CX), Y7 + VMOVDQU 3360(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 3392(CX), Y7 + VMOVDQU 3424(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + MOVQ (DX), R14 + VMOVDQU Y0, (R14)(R13*1) + MOVQ 24(DX), R14 + VMOVDQU Y1, (R14)(R13*1) + MOVQ 48(DX), R14 + VMOVDQU Y2, (R14)(R13*1) + MOVQ 72(DX), R14 + VMOVDQU Y3, (R14)(R13*1) + MOVQ 96(DX), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 120(DX), R14 + VMOVDQU Y5, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_9x6_loop + VZEROUPPER + +mulAvxTwo_9x6_end: + RET + +// func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 138 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x7_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), BX + MOVQ $0x0000000f, R13 + MOVQ R13, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R13 + +mulAvxTwo_9x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BP)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1792(CX), Y8 + VMOVDQU 1824(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1856(CX), Y8 + VMOVDQU 1888(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1920(CX), Y8 + VMOVDQU 1952(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1984(CX), Y8 + VMOVDQU 2016(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2048(CX), Y8 + VMOVDQU 2080(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2112(CX), Y8 + VMOVDQU 2144(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2176(CX), Y8 + VMOVDQU 2208(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2240(CX), Y8 + VMOVDQU 2272(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2304(CX), Y8 + VMOVDQU 2336(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2368(CX), Y8 + VMOVDQU 2400(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2432(CX), Y8 + VMOVDQU 2464(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2496(CX), Y8 + VMOVDQU 2528(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2560(CX), Y8 + VMOVDQU 2592(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2624(CX), Y8 + VMOVDQU 2656(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2688(CX), Y8 + VMOVDQU 2720(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2752(CX), Y8 + VMOVDQU 2784(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2816(CX), Y8 + VMOVDQU 2848(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2880(CX), Y8 + VMOVDQU 2912(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2944(CX), Y8 + VMOVDQU 2976(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3008(CX), Y8 + VMOVDQU 3040(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3072(CX), Y8 + VMOVDQU 3104(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 3136(CX), Y8 + VMOVDQU 3168(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 3200(CX), Y8 + VMOVDQU 3232(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 3264(CX), Y8 + VMOVDQU 3296(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 3328(CX), Y8 + VMOVDQU 3360(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 3392(CX), Y8 + VMOVDQU 3424(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3456(CX), Y8 + VMOVDQU 3488(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3520(CX), Y8 + VMOVDQU 3552(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (BX)(R13*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 3584(CX), Y8 + VMOVDQU 3616(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 3648(CX), Y8 + VMOVDQU 3680(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 3712(CX), Y8 + VMOVDQU 3744(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 3776(CX), Y8 + VMOVDQU 3808(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 3840(CX), Y8 + VMOVDQU 3872(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3904(CX), Y8 + VMOVDQU 3936(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3968(CX), Y8 + VMOVDQU 4000(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + MOVQ (DX), R14 + VMOVDQU Y0, (R14)(R13*1) + MOVQ 24(DX), R14 + VMOVDQU Y1, (R14)(R13*1) + MOVQ 48(DX), R14 + VMOVDQU Y2, (R14)(R13*1) + MOVQ 72(DX), R14 + VMOVDQU Y3, (R14)(R13*1) + MOVQ 96(DX), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 120(DX), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 144(DX), R14 + VMOVDQU Y6, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_9x7_loop + VZEROUPPER + +mulAvxTwo_9x7_end: + RET + +// func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_9x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 157 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_9x8_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), BX + MOVQ $0x0000000f, R13 + MOVQ R13, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R13 + +mulAvxTwo_9x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BP)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2048(CX), Y9 + VMOVDQU 2080(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2112(CX), Y9 + VMOVDQU 2144(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2176(CX), Y9 + VMOVDQU 2208(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2240(CX), Y9 + VMOVDQU 2272(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2304(CX), Y9 + VMOVDQU 2336(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2368(CX), Y9 + VMOVDQU 2400(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2432(CX), Y9 + VMOVDQU 2464(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 2496(CX), Y9 + VMOVDQU 2528(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2560(CX), Y9 + VMOVDQU 2592(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2624(CX), Y9 + VMOVDQU 2656(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2688(CX), Y9 + VMOVDQU 2720(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2752(CX), Y9 + VMOVDQU 2784(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2816(CX), Y9 + VMOVDQU 2848(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2880(CX), Y9 + VMOVDQU 2912(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2944(CX), Y9 + VMOVDQU 2976(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3008(CX), Y9 + VMOVDQU 3040(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3072(CX), Y9 + VMOVDQU 3104(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3136(CX), Y9 + VMOVDQU 3168(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3200(CX), Y9 + VMOVDQU 3232(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3264(CX), Y9 + VMOVDQU 3296(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3328(CX), Y9 + VMOVDQU 3360(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3392(CX), Y9 + VMOVDQU 3424(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3456(CX), Y9 + VMOVDQU 3488(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3520(CX), Y9 + VMOVDQU 3552(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3584(CX), Y9 + VMOVDQU 3616(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3648(CX), Y9 + VMOVDQU 3680(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3712(CX), Y9 + VMOVDQU 3744(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3776(CX), Y9 + VMOVDQU 3808(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3840(CX), Y9 + VMOVDQU 3872(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3904(CX), Y9 + VMOVDQU 3936(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3968(CX), Y9 + VMOVDQU 4000(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 4032(CX), Y9 + VMOVDQU 4064(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (BX)(R13*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 4096(CX), Y9 + VMOVDQU 4128(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 4160(CX), Y9 + VMOVDQU 4192(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 4224(CX), Y9 + VMOVDQU 4256(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 4288(CX), Y9 + VMOVDQU 4320(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 4352(CX), Y9 + VMOVDQU 4384(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 4416(CX), Y9 + VMOVDQU 4448(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 4480(CX), Y9 + VMOVDQU 4512(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 4544(CX), Y9 + VMOVDQU 4576(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + MOVQ (DX), R14 + VMOVDQU Y0, (R14)(R13*1) + MOVQ 24(DX), R14 + VMOVDQU Y1, (R14)(R13*1) + MOVQ 48(DX), R14 + VMOVDQU Y2, (R14)(R13*1) + MOVQ 72(DX), R14 + VMOVDQU Y3, (R14)(R13*1) + MOVQ 96(DX), R14 + VMOVDQU Y4, (R14)(R13*1) + MOVQ 120(DX), R14 + VMOVDQU Y5, (R14)(R13*1) + MOVQ 144(DX), R14 + VMOVDQU Y6, (R14)(R13*1) + MOVQ 168(DX), R14 + VMOVDQU Y7, (R14)(R13*1) + + // Prepare for next loop + ADDQ $0x20, R13 + DECQ AX + JNZ mulAvxTwo_9x8_loop + VZEROUPPER + +mulAvxTwo_9x8_end: + RET + +// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x1(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 24 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x1_end + MOVQ out_base+48(FP), DX + MOVQ (DX), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X1 + VPBROADCASTB X1, Y1 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x1_loop: + // Clear 1 outputs + VPXOR Y0, Y0, Y0 + + // Load and process 32 bytes from input 0 to 1 outputs + VMOVDQU (BP)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU (CX), Y2 + VMOVDQU 32(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 1 to 1 outputs + VMOVDQU (SI)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 64(CX), Y2 + VMOVDQU 96(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 2 to 1 outputs + VMOVDQU (DI)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 128(CX), Y2 + VMOVDQU 160(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 3 to 1 outputs + VMOVDQU (R8)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 192(CX), Y2 + VMOVDQU 224(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 4 to 1 outputs + VMOVDQU (R9)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 256(CX), Y2 + VMOVDQU 288(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 5 to 1 outputs + VMOVDQU (R10)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 320(CX), Y2 + VMOVDQU 352(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 6 to 1 outputs + VMOVDQU (R11)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 384(CX), Y2 + VMOVDQU 416(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 7 to 1 outputs + VMOVDQU (R12)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 448(CX), Y2 + VMOVDQU 480(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 8 to 1 outputs + VMOVDQU (R13)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 512(CX), Y2 + VMOVDQU 544(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Load and process 32 bytes from input 9 to 1 outputs + VMOVDQU (BX)(R14*1), Y4 + VPSRLQ $0x04, Y4, Y5 + VPAND Y1, Y4, Y4 + VPAND Y1, Y5, Y5 + VMOVDQU 576(CX), Y2 + VMOVDQU 608(CX), Y3 + VPSHUFB Y4, Y2, Y2 + VPSHUFB Y5, Y3, Y3 + VPXOR Y2, Y3, Y2 + VPXOR Y2, Y0, Y0 + + // Store 1 outputs + VMOVDQU Y0, (DX)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x1_loop + VZEROUPPER + +mulAvxTwo_10x1_end: + RET + +// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x2(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 47 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x2_end + MOVQ out_base+48(FP), DX + MOVQ (DX), BX + MOVQ 24(DX), DX + MOVQ in_base+24(FP), BP + MOVQ (BP), SI + MOVQ 24(BP), DI + MOVQ 48(BP), R8 + MOVQ 72(BP), R9 + MOVQ 96(BP), R10 + MOVQ 120(BP), R11 + MOVQ 144(BP), R12 + MOVQ 168(BP), R13 + MOVQ 192(BP), R14 + MOVQ 216(BP), BP + MOVQ $0x0000000f, R15 + MOVQ R15, X2 + VPBROADCASTB X2, Y2 + MOVQ start+72(FP), R15 + +mulAvxTwo_10x2_loop: + // Clear 2 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + + // Load and process 32 bytes from input 0 to 2 outputs + VMOVDQU (SI)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU (CX), Y3 + VMOVDQU 32(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 64(CX), Y3 + VMOVDQU 96(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 1 to 2 outputs + VMOVDQU (DI)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 128(CX), Y3 + VMOVDQU 160(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 192(CX), Y3 + VMOVDQU 224(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 2 to 2 outputs + VMOVDQU (R8)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 256(CX), Y3 + VMOVDQU 288(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 320(CX), Y3 + VMOVDQU 352(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 3 to 2 outputs + VMOVDQU (R9)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 384(CX), Y3 + VMOVDQU 416(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 448(CX), Y3 + VMOVDQU 480(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 4 to 2 outputs + VMOVDQU (R10)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 512(CX), Y3 + VMOVDQU 544(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 576(CX), Y3 + VMOVDQU 608(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 5 to 2 outputs + VMOVDQU (R11)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 640(CX), Y3 + VMOVDQU 672(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 704(CX), Y3 + VMOVDQU 736(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 6 to 2 outputs + VMOVDQU (R12)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 768(CX), Y3 + VMOVDQU 800(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 832(CX), Y3 + VMOVDQU 864(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 7 to 2 outputs + VMOVDQU (R13)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 896(CX), Y3 + VMOVDQU 928(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 960(CX), Y3 + VMOVDQU 992(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 8 to 2 outputs + VMOVDQU (R14)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 1024(CX), Y3 + VMOVDQU 1056(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 1088(CX), Y3 + VMOVDQU 1120(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Load and process 32 bytes from input 9 to 2 outputs + VMOVDQU (BP)(R15*1), Y5 + VPSRLQ $0x04, Y5, Y6 + VPAND Y2, Y5, Y5 + VPAND Y2, Y6, Y6 + VMOVDQU 1152(CX), Y3 + VMOVDQU 1184(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y0, Y0 + VMOVDQU 1216(CX), Y3 + VMOVDQU 1248(CX), Y4 + VPSHUFB Y5, Y3, Y3 + VPSHUFB Y6, Y4, Y4 + VPXOR Y3, Y4, Y3 + VPXOR Y3, Y1, Y1 + + // Store 2 outputs + VMOVDQU Y0, (BX)(R15*1) + VMOVDQU Y1, (DX)(R15*1) + + // Prepare for next loop + ADDQ $0x20, R15 + DECQ AX + JNZ mulAvxTwo_10x2_loop + VZEROUPPER + +mulAvxTwo_10x2_end: + RET + +// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x3(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 68 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x3_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X3 + VPBROADCASTB X3, Y3 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x3_loop: + // Clear 3 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + + // Load and process 32 bytes from input 0 to 3 outputs + VMOVDQU (BP)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU (CX), Y4 + VMOVDQU 32(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 64(CX), Y4 + VMOVDQU 96(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 128(CX), Y4 + VMOVDQU 160(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 1 to 3 outputs + VMOVDQU (SI)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 192(CX), Y4 + VMOVDQU 224(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 256(CX), Y4 + VMOVDQU 288(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 320(CX), Y4 + VMOVDQU 352(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 2 to 3 outputs + VMOVDQU (DI)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 384(CX), Y4 + VMOVDQU 416(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 448(CX), Y4 + VMOVDQU 480(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 512(CX), Y4 + VMOVDQU 544(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 3 to 3 outputs + VMOVDQU (R8)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 576(CX), Y4 + VMOVDQU 608(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 640(CX), Y4 + VMOVDQU 672(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 704(CX), Y4 + VMOVDQU 736(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 4 to 3 outputs + VMOVDQU (R9)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 768(CX), Y4 + VMOVDQU 800(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 832(CX), Y4 + VMOVDQU 864(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 896(CX), Y4 + VMOVDQU 928(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 5 to 3 outputs + VMOVDQU (R10)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 960(CX), Y4 + VMOVDQU 992(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1024(CX), Y4 + VMOVDQU 1056(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1088(CX), Y4 + VMOVDQU 1120(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 6 to 3 outputs + VMOVDQU (R11)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1152(CX), Y4 + VMOVDQU 1184(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1216(CX), Y4 + VMOVDQU 1248(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1280(CX), Y4 + VMOVDQU 1312(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 7 to 3 outputs + VMOVDQU (R12)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1344(CX), Y4 + VMOVDQU 1376(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1408(CX), Y4 + VMOVDQU 1440(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1472(CX), Y4 + VMOVDQU 1504(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 8 to 3 outputs + VMOVDQU (R13)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1536(CX), Y4 + VMOVDQU 1568(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1600(CX), Y4 + VMOVDQU 1632(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1664(CX), Y4 + VMOVDQU 1696(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Load and process 32 bytes from input 9 to 3 outputs + VMOVDQU (BX)(R14*1), Y6 + VPSRLQ $0x04, Y6, Y7 + VPAND Y3, Y6, Y6 + VPAND Y3, Y7, Y7 + VMOVDQU 1728(CX), Y4 + VMOVDQU 1760(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y0, Y0 + VMOVDQU 1792(CX), Y4 + VMOVDQU 1824(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y1, Y1 + VMOVDQU 1856(CX), Y4 + VMOVDQU 1888(CX), Y5 + VPSHUFB Y6, Y4, Y4 + VPSHUFB Y7, Y5, Y5 + VPXOR Y4, Y5, Y4 + VPXOR Y4, Y2, Y2 + + // Store 3 outputs + MOVQ (DX), R15 + VMOVDQU Y0, (R15)(R14*1) + MOVQ 24(DX), R15 + VMOVDQU Y1, (R15)(R14*1) + MOVQ 48(DX), R15 + VMOVDQU Y2, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x3_loop + VZEROUPPER + +mulAvxTwo_10x3_end: + RET + +// func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x4(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 89 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x4_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X4 + VPBROADCASTB X4, Y4 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x4_loop: + // Clear 4 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + + // Load and process 32 bytes from input 0 to 4 outputs + VMOVDQU (BP)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU (CX), Y5 + VMOVDQU 32(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 64(CX), Y5 + VMOVDQU 96(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 128(CX), Y5 + VMOVDQU 160(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 192(CX), Y5 + VMOVDQU 224(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 1 to 4 outputs + VMOVDQU (SI)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 256(CX), Y5 + VMOVDQU 288(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 320(CX), Y5 + VMOVDQU 352(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 384(CX), Y5 + VMOVDQU 416(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 448(CX), Y5 + VMOVDQU 480(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 2 to 4 outputs + VMOVDQU (DI)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 512(CX), Y5 + VMOVDQU 544(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 576(CX), Y5 + VMOVDQU 608(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 640(CX), Y5 + VMOVDQU 672(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 704(CX), Y5 + VMOVDQU 736(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 3 to 4 outputs + VMOVDQU (R8)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 768(CX), Y5 + VMOVDQU 800(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 832(CX), Y5 + VMOVDQU 864(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 896(CX), Y5 + VMOVDQU 928(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 960(CX), Y5 + VMOVDQU 992(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 4 to 4 outputs + VMOVDQU (R9)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1024(CX), Y5 + VMOVDQU 1056(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1088(CX), Y5 + VMOVDQU 1120(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1152(CX), Y5 + VMOVDQU 1184(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1216(CX), Y5 + VMOVDQU 1248(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 5 to 4 outputs + VMOVDQU (R10)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1280(CX), Y5 + VMOVDQU 1312(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1344(CX), Y5 + VMOVDQU 1376(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1408(CX), Y5 + VMOVDQU 1440(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1472(CX), Y5 + VMOVDQU 1504(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 6 to 4 outputs + VMOVDQU (R11)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1536(CX), Y5 + VMOVDQU 1568(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1600(CX), Y5 + VMOVDQU 1632(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1664(CX), Y5 + VMOVDQU 1696(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1728(CX), Y5 + VMOVDQU 1760(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 7 to 4 outputs + VMOVDQU (R12)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 1792(CX), Y5 + VMOVDQU 1824(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 1856(CX), Y5 + VMOVDQU 1888(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 1920(CX), Y5 + VMOVDQU 1952(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 1984(CX), Y5 + VMOVDQU 2016(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 8 to 4 outputs + VMOVDQU (R13)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 2048(CX), Y5 + VMOVDQU 2080(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 2112(CX), Y5 + VMOVDQU 2144(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 2176(CX), Y5 + VMOVDQU 2208(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 2240(CX), Y5 + VMOVDQU 2272(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Load and process 32 bytes from input 9 to 4 outputs + VMOVDQU (BX)(R14*1), Y7 + VPSRLQ $0x04, Y7, Y8 + VPAND Y4, Y7, Y7 + VPAND Y4, Y8, Y8 + VMOVDQU 2304(CX), Y5 + VMOVDQU 2336(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y0, Y0 + VMOVDQU 2368(CX), Y5 + VMOVDQU 2400(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y1, Y1 + VMOVDQU 2432(CX), Y5 + VMOVDQU 2464(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y2, Y2 + VMOVDQU 2496(CX), Y5 + VMOVDQU 2528(CX), Y6 + VPSHUFB Y7, Y5, Y5 + VPSHUFB Y8, Y6, Y6 + VPXOR Y5, Y6, Y5 + VPXOR Y5, Y3, Y3 + + // Store 4 outputs + MOVQ (DX), R15 + VMOVDQU Y0, (R15)(R14*1) + MOVQ 24(DX), R15 + VMOVDQU Y1, (R15)(R14*1) + MOVQ 48(DX), R15 + VMOVDQU Y2, (R15)(R14*1) + MOVQ 72(DX), R15 + VMOVDQU Y3, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x4_loop + VZEROUPPER + +mulAvxTwo_10x4_end: + RET + +// func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x5(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 110 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x5_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X5 + VPBROADCASTB X5, Y5 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x5_loop: + // Clear 5 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + + // Load and process 32 bytes from input 0 to 5 outputs + VMOVDQU (BP)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU (CX), Y6 + VMOVDQU 32(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 64(CX), Y6 + VMOVDQU 96(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 128(CX), Y6 + VMOVDQU 160(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 192(CX), Y6 + VMOVDQU 224(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 256(CX), Y6 + VMOVDQU 288(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 1 to 5 outputs + VMOVDQU (SI)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 320(CX), Y6 + VMOVDQU 352(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 384(CX), Y6 + VMOVDQU 416(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 448(CX), Y6 + VMOVDQU 480(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 512(CX), Y6 + VMOVDQU 544(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 576(CX), Y6 + VMOVDQU 608(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 2 to 5 outputs + VMOVDQU (DI)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 640(CX), Y6 + VMOVDQU 672(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 704(CX), Y6 + VMOVDQU 736(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 768(CX), Y6 + VMOVDQU 800(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 832(CX), Y6 + VMOVDQU 864(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 896(CX), Y6 + VMOVDQU 928(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 3 to 5 outputs + VMOVDQU (R8)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 960(CX), Y6 + VMOVDQU 992(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1024(CX), Y6 + VMOVDQU 1056(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1088(CX), Y6 + VMOVDQU 1120(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1152(CX), Y6 + VMOVDQU 1184(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1216(CX), Y6 + VMOVDQU 1248(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 4 to 5 outputs + VMOVDQU (R9)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1280(CX), Y6 + VMOVDQU 1312(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1344(CX), Y6 + VMOVDQU 1376(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1408(CX), Y6 + VMOVDQU 1440(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1472(CX), Y6 + VMOVDQU 1504(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1536(CX), Y6 + VMOVDQU 1568(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 5 to 5 outputs + VMOVDQU (R10)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1600(CX), Y6 + VMOVDQU 1632(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1664(CX), Y6 + VMOVDQU 1696(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 1728(CX), Y6 + VMOVDQU 1760(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 1792(CX), Y6 + VMOVDQU 1824(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 1856(CX), Y6 + VMOVDQU 1888(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 6 to 5 outputs + VMOVDQU (R11)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 1920(CX), Y6 + VMOVDQU 1952(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 1984(CX), Y6 + VMOVDQU 2016(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2048(CX), Y6 + VMOVDQU 2080(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2112(CX), Y6 + VMOVDQU 2144(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2176(CX), Y6 + VMOVDQU 2208(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 7 to 5 outputs + VMOVDQU (R12)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 2240(CX), Y6 + VMOVDQU 2272(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 2304(CX), Y6 + VMOVDQU 2336(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2368(CX), Y6 + VMOVDQU 2400(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2432(CX), Y6 + VMOVDQU 2464(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2496(CX), Y6 + VMOVDQU 2528(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 8 to 5 outputs + VMOVDQU (R13)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 2560(CX), Y6 + VMOVDQU 2592(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 2624(CX), Y6 + VMOVDQU 2656(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 2688(CX), Y6 + VMOVDQU 2720(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 2752(CX), Y6 + VMOVDQU 2784(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 2816(CX), Y6 + VMOVDQU 2848(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Load and process 32 bytes from input 9 to 5 outputs + VMOVDQU (BX)(R14*1), Y8 + VPSRLQ $0x04, Y8, Y9 + VPAND Y5, Y8, Y8 + VPAND Y5, Y9, Y9 + VMOVDQU 2880(CX), Y6 + VMOVDQU 2912(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y0, Y0 + VMOVDQU 2944(CX), Y6 + VMOVDQU 2976(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y1, Y1 + VMOVDQU 3008(CX), Y6 + VMOVDQU 3040(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y2, Y2 + VMOVDQU 3072(CX), Y6 + VMOVDQU 3104(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y3, Y3 + VMOVDQU 3136(CX), Y6 + VMOVDQU 3168(CX), Y7 + VPSHUFB Y8, Y6, Y6 + VPSHUFB Y9, Y7, Y7 + VPXOR Y6, Y7, Y6 + VPXOR Y6, Y4, Y4 + + // Store 5 outputs + MOVQ (DX), R15 + VMOVDQU Y0, (R15)(R14*1) + MOVQ 24(DX), R15 + VMOVDQU Y1, (R15)(R14*1) + MOVQ 48(DX), R15 + VMOVDQU Y2, (R15)(R14*1) + MOVQ 72(DX), R15 + VMOVDQU Y3, (R15)(R14*1) + MOVQ 96(DX), R15 + VMOVDQU Y4, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x5_loop + VZEROUPPER + +mulAvxTwo_10x5_end: + RET + +// func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x6(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 131 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x6_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X6 + VPBROADCASTB X6, Y6 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x6_loop: + // Clear 6 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + + // Load and process 32 bytes from input 0 to 6 outputs + VMOVDQU (BP)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU (CX), Y7 + VMOVDQU 32(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 64(CX), Y7 + VMOVDQU 96(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 128(CX), Y7 + VMOVDQU 160(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 192(CX), Y7 + VMOVDQU 224(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 256(CX), Y7 + VMOVDQU 288(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 320(CX), Y7 + VMOVDQU 352(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 1 to 6 outputs + VMOVDQU (SI)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 384(CX), Y7 + VMOVDQU 416(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 448(CX), Y7 + VMOVDQU 480(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 512(CX), Y7 + VMOVDQU 544(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 576(CX), Y7 + VMOVDQU 608(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 640(CX), Y7 + VMOVDQU 672(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 704(CX), Y7 + VMOVDQU 736(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 2 to 6 outputs + VMOVDQU (DI)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 768(CX), Y7 + VMOVDQU 800(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 832(CX), Y7 + VMOVDQU 864(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 896(CX), Y7 + VMOVDQU 928(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 960(CX), Y7 + VMOVDQU 992(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1024(CX), Y7 + VMOVDQU 1056(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1088(CX), Y7 + VMOVDQU 1120(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 3 to 6 outputs + VMOVDQU (R8)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1152(CX), Y7 + VMOVDQU 1184(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1216(CX), Y7 + VMOVDQU 1248(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1280(CX), Y7 + VMOVDQU 1312(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1344(CX), Y7 + VMOVDQU 1376(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1408(CX), Y7 + VMOVDQU 1440(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1472(CX), Y7 + VMOVDQU 1504(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 4 to 6 outputs + VMOVDQU (R9)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1536(CX), Y7 + VMOVDQU 1568(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1600(CX), Y7 + VMOVDQU 1632(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 1664(CX), Y7 + VMOVDQU 1696(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 1728(CX), Y7 + VMOVDQU 1760(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 1792(CX), Y7 + VMOVDQU 1824(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 1856(CX), Y7 + VMOVDQU 1888(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 5 to 6 outputs + VMOVDQU (R10)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 1920(CX), Y7 + VMOVDQU 1952(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 1984(CX), Y7 + VMOVDQU 2016(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2048(CX), Y7 + VMOVDQU 2080(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2112(CX), Y7 + VMOVDQU 2144(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2176(CX), Y7 + VMOVDQU 2208(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2240(CX), Y7 + VMOVDQU 2272(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 6 to 6 outputs + VMOVDQU (R11)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2304(CX), Y7 + VMOVDQU 2336(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2368(CX), Y7 + VMOVDQU 2400(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2432(CX), Y7 + VMOVDQU 2464(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2496(CX), Y7 + VMOVDQU 2528(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2560(CX), Y7 + VMOVDQU 2592(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 2624(CX), Y7 + VMOVDQU 2656(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 7 to 6 outputs + VMOVDQU (R12)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 2688(CX), Y7 + VMOVDQU 2720(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 2752(CX), Y7 + VMOVDQU 2784(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 2816(CX), Y7 + VMOVDQU 2848(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 2880(CX), Y7 + VMOVDQU 2912(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 2944(CX), Y7 + VMOVDQU 2976(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 3008(CX), Y7 + VMOVDQU 3040(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 8 to 6 outputs + VMOVDQU (R13)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 3072(CX), Y7 + VMOVDQU 3104(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 3136(CX), Y7 + VMOVDQU 3168(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 3200(CX), Y7 + VMOVDQU 3232(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 3264(CX), Y7 + VMOVDQU 3296(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 3328(CX), Y7 + VMOVDQU 3360(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 3392(CX), Y7 + VMOVDQU 3424(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Load and process 32 bytes from input 9 to 6 outputs + VMOVDQU (BX)(R14*1), Y9 + VPSRLQ $0x04, Y9, Y10 + VPAND Y6, Y9, Y9 + VPAND Y6, Y10, Y10 + VMOVDQU 3456(CX), Y7 + VMOVDQU 3488(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y0, Y0 + VMOVDQU 3520(CX), Y7 + VMOVDQU 3552(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y1, Y1 + VMOVDQU 3584(CX), Y7 + VMOVDQU 3616(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y2, Y2 + VMOVDQU 3648(CX), Y7 + VMOVDQU 3680(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y3, Y3 + VMOVDQU 3712(CX), Y7 + VMOVDQU 3744(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y4, Y4 + VMOVDQU 3776(CX), Y7 + VMOVDQU 3808(CX), Y8 + VPSHUFB Y9, Y7, Y7 + VPSHUFB Y10, Y8, Y8 + VPXOR Y7, Y8, Y7 + VPXOR Y7, Y5, Y5 + + // Store 6 outputs + MOVQ (DX), R15 + VMOVDQU Y0, (R15)(R14*1) + MOVQ 24(DX), R15 + VMOVDQU Y1, (R15)(R14*1) + MOVQ 48(DX), R15 + VMOVDQU Y2, (R15)(R14*1) + MOVQ 72(DX), R15 + VMOVDQU Y3, (R15)(R14*1) + MOVQ 96(DX), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 120(DX), R15 + VMOVDQU Y5, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x6_loop + VZEROUPPER + +mulAvxTwo_10x6_end: + RET + +// func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x7(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 152 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x7_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X7 + VPBROADCASTB X7, Y7 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x7_loop: + // Clear 7 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + + // Load and process 32 bytes from input 0 to 7 outputs + VMOVDQU (BP)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU (CX), Y8 + VMOVDQU 32(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 64(CX), Y8 + VMOVDQU 96(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 128(CX), Y8 + VMOVDQU 160(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 192(CX), Y8 + VMOVDQU 224(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 256(CX), Y8 + VMOVDQU 288(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 320(CX), Y8 + VMOVDQU 352(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 384(CX), Y8 + VMOVDQU 416(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 1 to 7 outputs + VMOVDQU (SI)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 448(CX), Y8 + VMOVDQU 480(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 512(CX), Y8 + VMOVDQU 544(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 576(CX), Y8 + VMOVDQU 608(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 640(CX), Y8 + VMOVDQU 672(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 704(CX), Y8 + VMOVDQU 736(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 768(CX), Y8 + VMOVDQU 800(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 832(CX), Y8 + VMOVDQU 864(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 2 to 7 outputs + VMOVDQU (DI)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 896(CX), Y8 + VMOVDQU 928(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 960(CX), Y8 + VMOVDQU 992(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1024(CX), Y8 + VMOVDQU 1056(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1088(CX), Y8 + VMOVDQU 1120(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1152(CX), Y8 + VMOVDQU 1184(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1216(CX), Y8 + VMOVDQU 1248(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1280(CX), Y8 + VMOVDQU 1312(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 3 to 7 outputs + VMOVDQU (R8)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1344(CX), Y8 + VMOVDQU 1376(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1408(CX), Y8 + VMOVDQU 1440(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1472(CX), Y8 + VMOVDQU 1504(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1536(CX), Y8 + VMOVDQU 1568(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 1600(CX), Y8 + VMOVDQU 1632(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 1664(CX), Y8 + VMOVDQU 1696(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 1728(CX), Y8 + VMOVDQU 1760(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 4 to 7 outputs + VMOVDQU (R9)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 1792(CX), Y8 + VMOVDQU 1824(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 1856(CX), Y8 + VMOVDQU 1888(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 1920(CX), Y8 + VMOVDQU 1952(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 1984(CX), Y8 + VMOVDQU 2016(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2048(CX), Y8 + VMOVDQU 2080(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2112(CX), Y8 + VMOVDQU 2144(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2176(CX), Y8 + VMOVDQU 2208(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 5 to 7 outputs + VMOVDQU (R10)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2240(CX), Y8 + VMOVDQU 2272(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2304(CX), Y8 + VMOVDQU 2336(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2368(CX), Y8 + VMOVDQU 2400(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2432(CX), Y8 + VMOVDQU 2464(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2496(CX), Y8 + VMOVDQU 2528(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 2560(CX), Y8 + VMOVDQU 2592(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 2624(CX), Y8 + VMOVDQU 2656(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 6 to 7 outputs + VMOVDQU (R11)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 2688(CX), Y8 + VMOVDQU 2720(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 2752(CX), Y8 + VMOVDQU 2784(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 2816(CX), Y8 + VMOVDQU 2848(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 2880(CX), Y8 + VMOVDQU 2912(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 2944(CX), Y8 + VMOVDQU 2976(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3008(CX), Y8 + VMOVDQU 3040(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3072(CX), Y8 + VMOVDQU 3104(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 7 to 7 outputs + VMOVDQU (R12)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 3136(CX), Y8 + VMOVDQU 3168(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 3200(CX), Y8 + VMOVDQU 3232(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 3264(CX), Y8 + VMOVDQU 3296(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 3328(CX), Y8 + VMOVDQU 3360(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 3392(CX), Y8 + VMOVDQU 3424(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3456(CX), Y8 + VMOVDQU 3488(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3520(CX), Y8 + VMOVDQU 3552(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 8 to 7 outputs + VMOVDQU (R13)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 3584(CX), Y8 + VMOVDQU 3616(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 3648(CX), Y8 + VMOVDQU 3680(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 3712(CX), Y8 + VMOVDQU 3744(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 3776(CX), Y8 + VMOVDQU 3808(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 3840(CX), Y8 + VMOVDQU 3872(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 3904(CX), Y8 + VMOVDQU 3936(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 3968(CX), Y8 + VMOVDQU 4000(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Load and process 32 bytes from input 9 to 7 outputs + VMOVDQU (BX)(R14*1), Y10 + VPSRLQ $0x04, Y10, Y11 + VPAND Y7, Y10, Y10 + VPAND Y7, Y11, Y11 + VMOVDQU 4032(CX), Y8 + VMOVDQU 4064(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y0, Y0 + VMOVDQU 4096(CX), Y8 + VMOVDQU 4128(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y1, Y1 + VMOVDQU 4160(CX), Y8 + VMOVDQU 4192(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y2, Y2 + VMOVDQU 4224(CX), Y8 + VMOVDQU 4256(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y3, Y3 + VMOVDQU 4288(CX), Y8 + VMOVDQU 4320(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y4, Y4 + VMOVDQU 4352(CX), Y8 + VMOVDQU 4384(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y5, Y5 + VMOVDQU 4416(CX), Y8 + VMOVDQU 4448(CX), Y9 + VPSHUFB Y10, Y8, Y8 + VPSHUFB Y11, Y9, Y9 + VPXOR Y8, Y9, Y8 + VPXOR Y8, Y6, Y6 + + // Store 7 outputs + MOVQ (DX), R15 + VMOVDQU Y0, (R15)(R14*1) + MOVQ 24(DX), R15 + VMOVDQU Y1, (R15)(R14*1) + MOVQ 48(DX), R15 + VMOVDQU Y2, (R15)(R14*1) + MOVQ 72(DX), R15 + VMOVDQU Y3, (R15)(R14*1) + MOVQ 96(DX), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 120(DX), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 144(DX), R15 + VMOVDQU Y6, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x7_loop + VZEROUPPER + +mulAvxTwo_10x7_end: + RET + +// func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int) +// Requires: AVX, AVX2, SSE2 +TEXT ·mulAvxTwo_10x8(SB), $0-88 + // Loading no tables to registers + // Full registers estimated 173 YMM used + MOVQ n+80(FP), AX + MOVQ matrix_base+0(FP), CX + SHRQ $0x05, AX + TESTQ AX, AX + JZ mulAvxTwo_10x8_end + MOVQ out_base+48(FP), DX + MOVQ in_base+24(FP), BX + MOVQ (BX), BP + MOVQ 24(BX), SI + MOVQ 48(BX), DI + MOVQ 72(BX), R8 + MOVQ 96(BX), R9 + MOVQ 120(BX), R10 + MOVQ 144(BX), R11 + MOVQ 168(BX), R12 + MOVQ 192(BX), R13 + MOVQ 216(BX), BX + MOVQ $0x0000000f, R14 + MOVQ R14, X8 + VPBROADCASTB X8, Y8 + MOVQ start+72(FP), R14 + +mulAvxTwo_10x8_loop: + // Clear 8 outputs + VPXOR Y0, Y0, Y0 + VPXOR Y1, Y1, Y1 + VPXOR Y2, Y2, Y2 + VPXOR Y3, Y3, Y3 + VPXOR Y4, Y4, Y4 + VPXOR Y5, Y5, Y5 + VPXOR Y6, Y6, Y6 + VPXOR Y7, Y7, Y7 + + // Load and process 32 bytes from input 0 to 8 outputs + VMOVDQU (BP)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU (CX), Y9 + VMOVDQU 32(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 64(CX), Y9 + VMOVDQU 96(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 128(CX), Y9 + VMOVDQU 160(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 192(CX), Y9 + VMOVDQU 224(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 256(CX), Y9 + VMOVDQU 288(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 320(CX), Y9 + VMOVDQU 352(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 384(CX), Y9 + VMOVDQU 416(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 448(CX), Y9 + VMOVDQU 480(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 1 to 8 outputs + VMOVDQU (SI)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 512(CX), Y9 + VMOVDQU 544(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 576(CX), Y9 + VMOVDQU 608(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 640(CX), Y9 + VMOVDQU 672(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 704(CX), Y9 + VMOVDQU 736(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 768(CX), Y9 + VMOVDQU 800(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 832(CX), Y9 + VMOVDQU 864(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 896(CX), Y9 + VMOVDQU 928(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 960(CX), Y9 + VMOVDQU 992(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 2 to 8 outputs + VMOVDQU (DI)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1024(CX), Y9 + VMOVDQU 1056(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1088(CX), Y9 + VMOVDQU 1120(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1152(CX), Y9 + VMOVDQU 1184(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1216(CX), Y9 + VMOVDQU 1248(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1280(CX), Y9 + VMOVDQU 1312(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1344(CX), Y9 + VMOVDQU 1376(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1408(CX), Y9 + VMOVDQU 1440(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1472(CX), Y9 + VMOVDQU 1504(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 3 to 8 outputs + VMOVDQU (R8)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 1536(CX), Y9 + VMOVDQU 1568(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 1600(CX), Y9 + VMOVDQU 1632(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 1664(CX), Y9 + VMOVDQU 1696(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 1728(CX), Y9 + VMOVDQU 1760(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 1792(CX), Y9 + VMOVDQU 1824(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 1856(CX), Y9 + VMOVDQU 1888(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 1920(CX), Y9 + VMOVDQU 1952(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 1984(CX), Y9 + VMOVDQU 2016(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 4 to 8 outputs + VMOVDQU (R9)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2048(CX), Y9 + VMOVDQU 2080(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2112(CX), Y9 + VMOVDQU 2144(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2176(CX), Y9 + VMOVDQU 2208(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2240(CX), Y9 + VMOVDQU 2272(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2304(CX), Y9 + VMOVDQU 2336(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2368(CX), Y9 + VMOVDQU 2400(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2432(CX), Y9 + VMOVDQU 2464(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 2496(CX), Y9 + VMOVDQU 2528(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 5 to 8 outputs + VMOVDQU (R10)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 2560(CX), Y9 + VMOVDQU 2592(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 2624(CX), Y9 + VMOVDQU 2656(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 2688(CX), Y9 + VMOVDQU 2720(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 2752(CX), Y9 + VMOVDQU 2784(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 2816(CX), Y9 + VMOVDQU 2848(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 2880(CX), Y9 + VMOVDQU 2912(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 2944(CX), Y9 + VMOVDQU 2976(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3008(CX), Y9 + VMOVDQU 3040(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 6 to 8 outputs + VMOVDQU (R11)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3072(CX), Y9 + VMOVDQU 3104(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3136(CX), Y9 + VMOVDQU 3168(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3200(CX), Y9 + VMOVDQU 3232(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3264(CX), Y9 + VMOVDQU 3296(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3328(CX), Y9 + VMOVDQU 3360(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3392(CX), Y9 + VMOVDQU 3424(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3456(CX), Y9 + VMOVDQU 3488(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 3520(CX), Y9 + VMOVDQU 3552(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 7 to 8 outputs + VMOVDQU (R12)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 3584(CX), Y9 + VMOVDQU 3616(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 3648(CX), Y9 + VMOVDQU 3680(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 3712(CX), Y9 + VMOVDQU 3744(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 3776(CX), Y9 + VMOVDQU 3808(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 3840(CX), Y9 + VMOVDQU 3872(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 3904(CX), Y9 + VMOVDQU 3936(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 3968(CX), Y9 + VMOVDQU 4000(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 4032(CX), Y9 + VMOVDQU 4064(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 8 to 8 outputs + VMOVDQU (R13)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 4096(CX), Y9 + VMOVDQU 4128(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 4160(CX), Y9 + VMOVDQU 4192(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 4224(CX), Y9 + VMOVDQU 4256(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 4288(CX), Y9 + VMOVDQU 4320(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 4352(CX), Y9 + VMOVDQU 4384(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 4416(CX), Y9 + VMOVDQU 4448(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 4480(CX), Y9 + VMOVDQU 4512(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 4544(CX), Y9 + VMOVDQU 4576(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Load and process 32 bytes from input 9 to 8 outputs + VMOVDQU (BX)(R14*1), Y11 + VPSRLQ $0x04, Y11, Y12 + VPAND Y8, Y11, Y11 + VPAND Y8, Y12, Y12 + VMOVDQU 4608(CX), Y9 + VMOVDQU 4640(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y0, Y0 + VMOVDQU 4672(CX), Y9 + VMOVDQU 4704(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y1, Y1 + VMOVDQU 4736(CX), Y9 + VMOVDQU 4768(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y2, Y2 + VMOVDQU 4800(CX), Y9 + VMOVDQU 4832(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y3, Y3 + VMOVDQU 4864(CX), Y9 + VMOVDQU 4896(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y4, Y4 + VMOVDQU 4928(CX), Y9 + VMOVDQU 4960(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y5, Y5 + VMOVDQU 4992(CX), Y9 + VMOVDQU 5024(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y6, Y6 + VMOVDQU 5056(CX), Y9 + VMOVDQU 5088(CX), Y10 + VPSHUFB Y11, Y9, Y9 + VPSHUFB Y12, Y10, Y10 + VPXOR Y9, Y10, Y9 + VPXOR Y9, Y7, Y7 + + // Store 8 outputs + MOVQ (DX), R15 + VMOVDQU Y0, (R15)(R14*1) + MOVQ 24(DX), R15 + VMOVDQU Y1, (R15)(R14*1) + MOVQ 48(DX), R15 + VMOVDQU Y2, (R15)(R14*1) + MOVQ 72(DX), R15 + VMOVDQU Y3, (R15)(R14*1) + MOVQ 96(DX), R15 + VMOVDQU Y4, (R15)(R14*1) + MOVQ 120(DX), R15 + VMOVDQU Y5, (R15)(R14*1) + MOVQ 144(DX), R15 + VMOVDQU Y6, (R15)(R14*1) + MOVQ 168(DX), R15 + VMOVDQU Y7, (R15)(R14*1) + + // Prepare for next loop + ADDQ $0x20, R14 + DECQ AX + JNZ mulAvxTwo_10x8_loop + VZEROUPPER + +mulAvxTwo_10x8_end: + RET diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go new file mode 100644 index 0000000..b4917bc --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go @@ -0,0 +1,11 @@ +//+build !amd64 noasm appengine gccgo nogen + +package reedsolomon + +const maxAvx2Inputs = 0 +const maxAvx2Outputs = 0 +const avx2CodeGen = false + +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { + panic("avx2 codegen not available") +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go new file mode 100644 index 0000000..0b49a1e --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go @@ -0,0 +1,293 @@ +// Code generated by command: go generate gen.go. DO NOT EDIT. + +// +build !appengine +// +build !noasm +// +build gc +// +build !nogen + +package reedsolomon + +import "fmt" + +const avx2CodeGen = true +const maxAvx2Inputs = 10 +const maxAvx2Outputs = 8 + +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { + n := stop - start + n = (n >> 5) << 5 + + switch len(in) { + case 1: + switch len(out) { + case 1: + mulAvxTwo_1x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_1x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_1x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_1x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_1x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_1x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_1x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_1x8(matrix, in, out, start, n) + return n + } + case 2: + switch len(out) { + case 1: + mulAvxTwo_2x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_2x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_2x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_2x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_2x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_2x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_2x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_2x8(matrix, in, out, start, n) + return n + } + case 3: + switch len(out) { + case 1: + mulAvxTwo_3x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_3x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_3x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_3x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_3x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_3x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_3x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_3x8(matrix, in, out, start, n) + return n + } + case 4: + switch len(out) { + case 1: + mulAvxTwo_4x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_4x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_4x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_4x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_4x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_4x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_4x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_4x8(matrix, in, out, start, n) + return n + } + case 5: + switch len(out) { + case 1: + mulAvxTwo_5x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_5x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_5x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_5x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_5x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_5x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_5x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_5x8(matrix, in, out, start, n) + return n + } + case 6: + switch len(out) { + case 1: + mulAvxTwo_6x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_6x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_6x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_6x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_6x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_6x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_6x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_6x8(matrix, in, out, start, n) + return n + } + case 7: + switch len(out) { + case 1: + mulAvxTwo_7x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_7x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_7x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_7x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_7x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_7x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_7x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_7x8(matrix, in, out, start, n) + return n + } + case 8: + switch len(out) { + case 1: + mulAvxTwo_8x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_8x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_8x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_8x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_8x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_8x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_8x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_8x8(matrix, in, out, start, n) + return n + } + case 9: + switch len(out) { + case 1: + mulAvxTwo_9x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_9x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_9x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_9x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_9x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_9x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_9x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_9x8(matrix, in, out, start, n) + return n + } + case 10: + switch len(out) { + case 1: + mulAvxTwo_10x1(matrix, in, out, start, n) + return n + case 2: + mulAvxTwo_10x2(matrix, in, out, start, n) + return n + case 3: + mulAvxTwo_10x3(matrix, in, out, start, n) + return n + case 4: + mulAvxTwo_10x4(matrix, in, out, start, n) + return n + case 5: + mulAvxTwo_10x5(matrix, in, out, start, n) + return n + case 6: + mulAvxTwo_10x6(matrix, in, out, start, n) + return n + case 7: + mulAvxTwo_10x7(matrix, in, out, start, n) + return n + case 8: + mulAvxTwo_10x8(matrix, in, out, start, n) + return n + } + } + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go new file mode 100644 index 0000000..1d00e06 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go @@ -0,0 +1,44 @@ +//+build !amd64 noasm appengine gccgo +//+build !arm64 noasm appengine gccgo +//+build !ppc64le noasm appengine gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. + +package reedsolomon + +func galMulSlice(c byte, in, out []byte, o *options) { + out = out[:len(in)] + if c == 1 { + copy(out, in) + return + } + mt := mulTable[c][:256] + for n, input := range in { + out[n] = mt[input] + } +} + +func galMulSliceXor(c byte, in, out []byte, o *options) { + out = out[:len(in)] + if c == 1 { + for n, input := range in { + out[n] ^= input + } + return + } + mt := mulTable[c][:256] + for n, input := range in { + out[n] ^= mt[input] + } +} + +// slice galois add +func sliceXor(in, out []byte, o *options) { + for n, input := range in { + out[n] ^= input + } +} + +func init() { + defaultOptions.useAVX512 = false +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go new file mode 100644 index 0000000..bd15e3a --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go @@ -0,0 +1,13 @@ +//+build !amd64 noasm appengine gccgo + +// Copyright 2020, Klaus Post, see LICENSE for details. + +package reedsolomon + +func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) { + panic("codeSomeShardsAvx512 should not be called if built without asm") +} + +func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) { + panic("codeSomeShardsAvx512P should not be called if built without asm") +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go new file mode 100644 index 0000000..70f93d6 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go @@ -0,0 +1,75 @@ +//+build !noasm +//+build !appengine +//+build !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2018, Minio, Inc. + +package reedsolomon + +//go:noescape +func galMulPpc(low, high, in, out []byte) + +//go:noescape +func galMulPpcXor(low, high, in, out []byte) + +// This is what the assembler routines do in blocks of 16 bytes: +/* +func galMulPpc(low, high, in, out []byte) { + for n, input := range in { + l := input & 0xf + h := input >> 4 + out[n] = low[l] ^ high[h] + } +} +func galMulPpcXor(low, high, in, out []byte) { + for n, input := range in { + l := input & 0xf + h := input >> 4 + out[n] ^= low[l] ^ high[h] + } +} +*/ + +func galMulSlice(c byte, in, out []byte, o *options) { + if c == 1 { + copy(out, in) + return + } + done := (len(in) >> 4) << 4 + if done > 0 { + galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) + } + remain := len(in) - done + if remain > 0 { + mt := mulTable[c][:256] + for i := done; i < len(in); i++ { + out[i] = mt[in[i]] + } + } +} + +func galMulSliceXor(c byte, in, out []byte, o *options) { + if c == 1 { + sliceXor(in, out, o) + return + } + done := (len(in) >> 4) << 4 + if done > 0 { + galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out) + } + remain := len(in) - done + if remain > 0 { + mt := mulTable[c][:256] + for i := done; i < len(in); i++ { + out[i] ^= mt[in[i]] + } + } +} + +// slice galois add +func sliceXor(in, out []byte, o *options) { + for n, input := range in { + out[n] ^= input + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s new file mode 100644 index 0000000..8838f0c --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s @@ -0,0 +1,124 @@ +//+build !noasm !appengine !gccgo + +// Copyright 2015, Klaus Post, see LICENSE for details. +// Copyright 2018, Minio, Inc. + +#include "textflag.h" + +#define LOW R3 +#define HIGH R4 +#define IN R5 +#define LEN R6 +#define OUT R7 +#define CONSTANTS R8 +#define OFFSET R9 +#define OFFSET1 R10 +#define OFFSET2 R11 + +#define X6 VS34 +#define X6_ V2 +#define X7 VS35 +#define X7_ V3 +#define MSG VS36 +#define MSG_ V4 +#define MSG_HI VS37 +#define MSG_HI_ V5 +#define RESULT VS38 +#define RESULT_ V6 +#define ROTATE VS39 +#define ROTATE_ V7 +#define MASK VS40 +#define MASK_ V8 +#define FLIP VS41 +#define FLIP_ V9 + +// func galMulPpc(low, high, in, out []byte) +TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96 + MOVD low+0(FP), LOW + MOVD high+24(FP), HIGH + MOVD in+48(FP), IN + MOVD in_len+56(FP), LEN + MOVD out+72(FP), OUT + + MOVD $16, OFFSET1 + MOVD $32, OFFSET2 + + MOVD $·constants(SB), CONSTANTS + LXVD2X (CONSTANTS)(R0), ROTATE + LXVD2X (CONSTANTS)(OFFSET1), MASK + LXVD2X (CONSTANTS)(OFFSET2), FLIP + + LXVD2X (LOW)(R0), X6 + LXVD2X (HIGH)(R0), X7 + VPERM X6_, V31, FLIP_, X6_ + VPERM X7_, V31, FLIP_, X7_ + + MOVD $0, OFFSET + +loop: + LXVD2X (IN)(OFFSET), MSG + + VSRB MSG_, ROTATE_, MSG_HI_ + VAND MSG_, MASK_, MSG_ + VPERM X6_, V31, MSG_, MSG_ + VPERM X7_, V31, MSG_HI_, MSG_HI_ + + VXOR MSG_, MSG_HI_, MSG_ + + STXVD2X MSG, (OUT)(OFFSET) + + ADD $16, OFFSET, OFFSET + CMP LEN, OFFSET + BGT loop + RET + +// func galMulPpcXorlow, high, in, out []byte) +TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96 + MOVD low+0(FP), LOW + MOVD high+24(FP), HIGH + MOVD in+48(FP), IN + MOVD in_len+56(FP), LEN + MOVD out+72(FP), OUT + + MOVD $16, OFFSET1 + MOVD $32, OFFSET2 + + MOVD $·constants(SB), CONSTANTS + LXVD2X (CONSTANTS)(R0), ROTATE + LXVD2X (CONSTANTS)(OFFSET1), MASK + LXVD2X (CONSTANTS)(OFFSET2), FLIP + + LXVD2X (LOW)(R0), X6 + LXVD2X (HIGH)(R0), X7 + VPERM X6_, V31, FLIP_, X6_ + VPERM X7_, V31, FLIP_, X7_ + + MOVD $0, OFFSET + +loopXor: + LXVD2X (IN)(OFFSET), MSG + LXVD2X (OUT)(OFFSET), RESULT + + VSRB MSG_, ROTATE_, MSG_HI_ + VAND MSG_, MASK_, MSG_ + VPERM X6_, V31, MSG_, MSG_ + VPERM X7_, V31, MSG_HI_, MSG_HI_ + + VXOR MSG_, MSG_HI_, MSG_ + VXOR MSG_, RESULT_, RESULT_ + + STXVD2X RESULT, (OUT)(OFFSET) + + ADD $16, OFFSET, OFFSET + CMP LEN, OFFSET + BGT loopXor + RET + +DATA ·constants+0x0(SB)/8, $0x0404040404040404 +DATA ·constants+0x8(SB)/8, $0x0404040404040404 +DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f +DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f +DATA ·constants+0x20(SB)/8, $0x0706050403020100 +DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908 + +GLOBL ·constants(SB), 8, $48 diff --git a/vendor/github.com/klauspost/reedsolomon/gen.go b/vendor/github.com/klauspost/reedsolomon/gen.go new file mode 100644 index 0000000..6fc545c --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/gen.go @@ -0,0 +1,249 @@ +//+build generate + +//go:generate go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go +//go:generate gofmt -w galois_gen_switch_amd64.go + +package main + +import ( + "bufio" + "fmt" + "os" + + . "github.com/mmcloughlin/avo/build" + "github.com/mmcloughlin/avo/buildtags" + . "github.com/mmcloughlin/avo/operand" + "github.com/mmcloughlin/avo/reg" +) + +// Technically we can do slightly bigger, but we stay reasonable. +const inputMax = 10 +const outputMax = 8 + +var switchDefs [inputMax][outputMax]string +var switchDefsX [inputMax][outputMax]string + +const perLoopBits = 5 +const perLoop = 1 << perLoopBits + +func main() { + Constraint(buildtags.Not("appengine").ToConstraint()) + Constraint(buildtags.Not("noasm").ToConstraint()) + Constraint(buildtags.Not("nogen").ToConstraint()) + Constraint(buildtags.Term("gc").ToConstraint()) + + for i := 1; i <= inputMax; i++ { + for j := 1; j <= outputMax; j++ { + //genMulAvx2(fmt.Sprintf("mulAvxTwoXor_%dx%d", i, j), i, j, true) + genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false) + } + } + f, err := os.Create("galois_gen_switch_amd64.go") + if err != nil { + panic(err) + } + defer f.Close() + w := bufio.NewWriter(f) + defer w.Flush() + w.WriteString(`// Code generated by command: go generate ` + os.Getenv("GOFILE") + `. DO NOT EDIT. + +// +build !appengine +// +build !noasm +// +build gc +// +build !nogen + +package reedsolomon + +import "fmt" + +`) + + w.WriteString("const avx2CodeGen = true\n") + w.WriteString(fmt.Sprintf("const maxAvx2Inputs = %d\nconst maxAvx2Outputs = %d\n", inputMax, outputMax)) + w.WriteString(` + +func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int { + n := stop-start +`) + + w.WriteString(fmt.Sprintf("n = (n>>%d)<<%d\n\n", perLoopBits, perLoopBits)) + w.WriteString(`switch len(in) { +`) + for in, defs := range switchDefs[:] { + w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1)) + for out, def := range defs[:] { + w.WriteString(fmt.Sprintf(" case %d:\n", out+1)) + w.WriteString(def) + } + w.WriteString("}\n") + } + w.WriteString(`} + panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out))) +} +`) + Generate() +} + +func genMulAvx2(name string, inputs int, outputs int, xor bool) { + total := inputs * outputs + + doc := []string{ + fmt.Sprintf("%s takes %d inputs and produces %d outputs.", name, inputs, outputs), + } + if !xor { + doc = append(doc, "The output is initialized to 0.") + } + + // Load shuffle masks on every use. + var loadNone bool + // Use registers for destination registers. + var regDst = true + + // lo, hi, 1 in, 1 out, 2 tmp, 1 mask + est := total*2 + outputs + 5 + if outputs == 1 { + // We don't need to keep a copy of the input if only 1 output. + est -= 2 + } + + if est > 16 { + loadNone = true + // We run out of GP registers first, now. + if inputs+outputs > 12 { + regDst = false + } + } + + TEXT(name, 0, fmt.Sprintf("func(matrix []byte, in [][]byte, out [][]byte, start, n int)")) + + // SWITCH DEFINITION: + s := fmt.Sprintf(" mulAvxTwo_%dx%d(matrix, in, out, start, n)\n", inputs, outputs) + s += fmt.Sprintf("\t\t\t\treturn n\n") + switchDefs[inputs-1][outputs-1] = s + + if loadNone { + Comment("Loading no tables to registers") + } else { + // loadNone == false + Comment("Loading all tables to registers") + } + + Doc(doc...) + Pragma("noescape") + Commentf("Full registers estimated %d YMM used", est) + + length := Load(Param("n"), GP64()) + matrixBase := GP64() + MOVQ(Param("matrix").Base().MustAddr(), matrixBase) + SHRQ(U8(perLoopBits), length) + TESTQ(length, length) + JZ(LabelRef(name + "_end")) + + dst := make([]reg.VecVirtual, outputs) + dstPtr := make([]reg.GPVirtual, outputs) + outBase := Param("out").Base().MustAddr() + outSlicePtr := GP64() + MOVQ(outBase, outSlicePtr) + for i := range dst { + dst[i] = YMM() + if !regDst { + continue + } + ptr := GP64() + MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr) + dstPtr[i] = ptr + } + + inLo := make([]reg.VecVirtual, total) + inHi := make([]reg.VecVirtual, total) + + for i := range inLo { + if loadNone { + break + } + tableLo := YMM() + tableHi := YMM() + VMOVDQU(Mem{Base: matrixBase, Disp: i * 64}, tableLo) + VMOVDQU(Mem{Base: matrixBase, Disp: i*64 + 32}, tableHi) + inLo[i] = tableLo + inHi[i] = tableHi + } + + inPtrs := make([]reg.GPVirtual, inputs) + inSlicePtr := GP64() + MOVQ(Param("in").Base().MustAddr(), inSlicePtr) + for i := range inPtrs { + ptr := GP64() + MOVQ(Mem{Base: inSlicePtr, Disp: i * 24}, ptr) + inPtrs[i] = ptr + } + + tmpMask := GP64() + MOVQ(U32(15), tmpMask) + lowMask := YMM() + MOVQ(tmpMask, lowMask.AsX()) + VPBROADCASTB(lowMask.AsX(), lowMask) + + offset := GP64() + MOVQ(Param("start").MustAddr(), offset) + Label(name + "_loop") + if xor { + Commentf("Load %d outputs", outputs) + } else { + Commentf("Clear %d outputs", outputs) + } + for i := range dst { + if xor { + if regDst { + VMOVDQU(Mem{Base: dstPtr[i], Index: offset, Scale: 1}, dst[i]) + continue + } + ptr := GP64() + MOVQ(outBase, ptr) + VMOVDQU(Mem{Base: ptr, Index: offset, Scale: 1}, dst[i]) + } else { + VPXOR(dst[i], dst[i], dst[i]) + } + } + + lookLow, lookHigh := YMM(), YMM() + inLow, inHigh := YMM(), YMM() + for i := range inPtrs { + Commentf("Load and process 32 bytes from input %d to %d outputs", i, outputs) + VMOVDQU(Mem{Base: inPtrs[i], Index: offset, Scale: 1}, inLow) + VPSRLQ(U8(4), inLow, inHigh) + VPAND(lowMask, inLow, inLow) + VPAND(lowMask, inHigh, inHigh) + for j := range dst { + if loadNone { + VMOVDQU(Mem{Base: matrixBase, Disp: 64 * (i*outputs + j)}, lookLow) + VMOVDQU(Mem{Base: matrixBase, Disp: 32 + 64*(i*outputs+j)}, lookHigh) + VPSHUFB(inLow, lookLow, lookLow) + VPSHUFB(inHigh, lookHigh, lookHigh) + } else { + VPSHUFB(inLow, inLo[i*outputs+j], lookLow) + VPSHUFB(inHigh, inHi[i*outputs+j], lookHigh) + } + VPXOR(lookLow, lookHigh, lookLow) + VPXOR(lookLow, dst[j], dst[j]) + } + } + Commentf("Store %d outputs", outputs) + for i := range dst { + if regDst { + VMOVDQU(dst[i], Mem{Base: dstPtr[i], Index: offset, Scale: 1}) + continue + } + ptr := GP64() + MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr) + VMOVDQU(dst[i], Mem{Base: ptr, Index: offset, Scale: 1}) + } + Comment("Prepare for next loop") + ADDQ(U8(perLoop), offset) + DECQ(length) + JNZ(LabelRef(name + "_loop")) + VZEROUPPER() + + Label(name + "_end") + RET() +} diff --git a/vendor/github.com/klauspost/reedsolomon/go.mod b/vendor/github.com/klauspost/reedsolomon/go.mod new file mode 100644 index 0000000..a059d86 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/go.mod @@ -0,0 +1,7 @@ +module github.com/klauspost/reedsolomon + +go 1.14 + +require ( + github.com/klauspost/cpuid v1.2.4 +) diff --git a/vendor/github.com/klauspost/reedsolomon/go.sum b/vendor/github.com/klauspost/reedsolomon/go.sum new file mode 100644 index 0000000..5a44d81 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/go.sum @@ -0,0 +1,2 @@ +github.com/klauspost/cpuid v1.2.4 h1:EBfaK0SWSwk+fgk6efYFWdzl8MwRWoOO1gkmiaTXPW4= +github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= diff --git a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go new file mode 100644 index 0000000..c9d8ab2 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go @@ -0,0 +1,160 @@ +/** + * A thread-safe tree which caches inverted matrices. + * + * Copyright 2016, Peter Collins + */ + +package reedsolomon + +import ( + "errors" + "sync" +) + +// The tree uses a Reader-Writer mutex to make it thread-safe +// when accessing cached matrices and inserting new ones. +type inversionTree struct { + mutex *sync.RWMutex + root inversionNode +} + +type inversionNode struct { + matrix matrix + children []*inversionNode +} + +// newInversionTree initializes a tree for storing inverted matrices. +// Note that the root node is the identity matrix as it implies +// there were no errors with the original data. +func newInversionTree(dataShards, parityShards int) inversionTree { + identity, _ := identityMatrix(dataShards) + root := inversionNode{ + matrix: identity, + children: make([]*inversionNode, dataShards+parityShards), + } + return inversionTree{ + mutex: &sync.RWMutex{}, + root: root, + } +} + +// GetInvertedMatrix returns the cached inverted matrix or nil if it +// is not found in the tree keyed on the indices of invalid rows. +func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix { + // Lock the tree for reading before accessing the tree. + t.mutex.RLock() + defer t.mutex.RUnlock() + + // If no invalid indices were give we should return the root + // identity matrix. + if len(invalidIndices) == 0 { + return t.root.matrix + } + + // Recursively search for the inverted matrix in the tree, passing in + // 0 as the parent index as we start at the root of the tree. + return t.root.getInvertedMatrix(invalidIndices, 0) +} + +// errAlreadySet is returned if the root node matrix is overwritten +var errAlreadySet = errors.New("the root node identity matrix is already set") + +// InsertInvertedMatrix inserts a new inverted matrix into the tree +// keyed by the indices of invalid rows. The total number of shards +// is required for creating the proper length lists of child nodes for +// each node. +func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error { + // If no invalid indices were given then we are done because the + // root node is already set with the identity matrix. + if len(invalidIndices) == 0 { + return errAlreadySet + } + + if !matrix.IsSquare() { + return errNotSquare + } + + // Lock the tree for writing and reading before accessing the tree. + t.mutex.Lock() + defer t.mutex.Unlock() + + // Recursively create nodes for the inverted matrix in the tree until + // we reach the node to insert the matrix to. We start by passing in + // 0 as the parent index as we start at the root of the tree. + t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0) + + return nil +} + +func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix { + // Get the child node to search next from the list of children. The + // list of children starts relative to the parent index passed in + // because the indices of invalid rows is sorted (by default). As we + // search recursively, the first invalid index gets popped off the list, + // so when searching through the list of children, use that first invalid + // index to find the child node. + firstIndex := invalidIndices[0] + node := n.children[firstIndex-parent] + + // If the child node doesn't exist in the list yet, fail fast by + // returning, so we can construct and insert the proper inverted matrix. + if node == nil { + return nil + } + + // If there's more than one invalid index left in the list we should + // keep searching recursively. + if len(invalidIndices) > 1 { + // Search recursively on the child node by passing in the invalid indices + // with the first index popped off the front. Also the parent index to + // pass down is the first index plus one. + return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1) + } + // If there aren't any more invalid indices to search, we've found our + // node. Return it, however keep in mind that the matrix could still be + // nil because intermediary nodes in the tree are created sometimes with + // their inversion matrices uninitialized. + return node.matrix +} + +func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) { + // As above, get the child node to search next from the list of children. + // The list of children starts relative to the parent index passed in + // because the indices of invalid rows is sorted (by default). As we + // search recursively, the first invalid index gets popped off the list, + // so when searching through the list of children, use that first invalid + // index to find the child node. + firstIndex := invalidIndices[0] + node := n.children[firstIndex-parent] + + // If the child node doesn't exist in the list yet, create a new + // node because we have the writer lock and add it to the list + // of children. + if node == nil { + // Make the length of the list of children equal to the number + // of shards minus the first invalid index because the list of + // invalid indices is sorted, so only this length of errors + // are possible in the tree. + node = &inversionNode{ + children: make([]*inversionNode, shards-firstIndex), + } + // Insert the new node into the tree at the first index relative + // to the parent index that was given in this recursive call. + n.children[firstIndex-parent] = node + } + + // If there's more than one invalid index left in the list we should + // keep searching recursively in order to find the node to add our + // matrix. + if len(invalidIndices) > 1 { + // As above, search recursively on the child node by passing in + // the invalid indices with the first index popped off the front. + // Also the total number of shards and parent index are passed down + // which is equal to the first index plus one. + node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1) + } else { + // If there aren't any more invalid indices to search, we've found our + // node. Cache the inverted matrix in this node. + node.matrix = matrix + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/matrix.go b/vendor/github.com/klauspost/reedsolomon/matrix.go new file mode 100644 index 0000000..a6b9730 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/matrix.go @@ -0,0 +1,279 @@ +/** + * Matrix Algebra over an 8-bit Galois Field + * + * Copyright 2015, Klaus Post + * Copyright 2015, Backblaze, Inc. + */ + +package reedsolomon + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +// byte[row][col] +type matrix [][]byte + +// newMatrix returns a matrix of zeros. +func newMatrix(rows, cols int) (matrix, error) { + if rows <= 0 { + return nil, errInvalidRowSize + } + if cols <= 0 { + return nil, errInvalidColSize + } + + m := matrix(make([][]byte, rows)) + for i := range m { + m[i] = make([]byte, cols) + } + return m, nil +} + +// NewMatrixData initializes a matrix with the given row-major data. +// Note that data is not copied from input. +func newMatrixData(data [][]byte) (matrix, error) { + m := matrix(data) + err := m.Check() + if err != nil { + return nil, err + } + return m, nil +} + +// IdentityMatrix returns an identity matrix of the given size. +func identityMatrix(size int) (matrix, error) { + m, err := newMatrix(size, size) + if err != nil { + return nil, err + } + for i := range m { + m[i][i] = 1 + } + return m, nil +} + +// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number. +var errInvalidRowSize = errors.New("invalid row size") + +// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number. +var errInvalidColSize = errors.New("invalid column size") + +// errColSizeMismatch is returned if the size of matrix columns mismatch. +var errColSizeMismatch = errors.New("column size is not the same for all rows") + +func (m matrix) Check() error { + rows := len(m) + if rows <= 0 { + return errInvalidRowSize + } + cols := len(m[0]) + if cols <= 0 { + return errInvalidColSize + } + + for _, col := range m { + if len(col) != cols { + return errColSizeMismatch + } + } + return nil +} + +// String returns a human-readable string of the matrix contents. +// +// Example: [[1, 2], [3, 4]] +func (m matrix) String() string { + rowOut := make([]string, 0, len(m)) + for _, row := range m { + colOut := make([]string, 0, len(row)) + for _, col := range row { + colOut = append(colOut, strconv.Itoa(int(col))) + } + rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]") + } + return "[" + strings.Join(rowOut, ", ") + "]" +} + +// Multiply multiplies this matrix (the one on the left) by another +// matrix (the one on the right) and returns a new matrix with the result. +func (m matrix) Multiply(right matrix) (matrix, error) { + if len(m[0]) != len(right) { + return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right)) + } + result, _ := newMatrix(len(m), len(right[0])) + for r, row := range result { + for c := range row { + var value byte + for i := range m[0] { + value ^= galMultiply(m[r][i], right[i][c]) + } + result[r][c] = value + } + } + return result, nil +} + +// Augment returns the concatenation of this matrix and the matrix on the right. +func (m matrix) Augment(right matrix) (matrix, error) { + if len(m) != len(right) { + return nil, errMatrixSize + } + + result, _ := newMatrix(len(m), len(m[0])+len(right[0])) + for r, row := range m { + for c := range row { + result[r][c] = m[r][c] + } + cols := len(m[0]) + for c := range right[0] { + result[r][cols+c] = right[r][c] + } + } + return result, nil +} + +// errMatrixSize is returned if matrix dimensions are doesn't match. +var errMatrixSize = errors.New("matrix sizes do not match") + +func (m matrix) SameSize(n matrix) error { + if len(m) != len(n) { + return errMatrixSize + } + for i := range m { + if len(m[i]) != len(n[i]) { + return errMatrixSize + } + } + return nil +} + +// SubMatrix returns a part of this matrix. Data is copied. +func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) { + result, err := newMatrix(rmax-rmin, cmax-cmin) + if err != nil { + return nil, err + } + // OPTME: If used heavily, use copy function to copy slice + for r := rmin; r < rmax; r++ { + for c := cmin; c < cmax; c++ { + result[r-rmin][c-cmin] = m[r][c] + } + } + return result, nil +} + +// SwapRows Exchanges two rows in the matrix. +func (m matrix) SwapRows(r1, r2 int) error { + if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 { + return errInvalidRowSize + } + m[r2], m[r1] = m[r1], m[r2] + return nil +} + +// IsSquare will return true if the matrix is square +// and nil if the matrix is square +func (m matrix) IsSquare() bool { + return len(m) == len(m[0]) +} + +// errSingular is returned if the matrix is singular and cannot be inversed +var errSingular = errors.New("matrix is singular") + +// errNotSquare is returned if attempting to inverse a non-square matrix. +var errNotSquare = errors.New("only square matrices can be inverted") + +// Invert returns the inverse of this matrix. +// Returns ErrSingular when the matrix is singular and doesn't have an inverse. +// The matrix must be square, otherwise ErrNotSquare is returned. +func (m matrix) Invert() (matrix, error) { + if !m.IsSquare() { + return nil, errNotSquare + } + + size := len(m) + work, _ := identityMatrix(size) + work, _ = m.Augment(work) + + err := work.gaussianElimination() + if err != nil { + return nil, err + } + + return work.SubMatrix(0, size, size, size*2) +} + +func (m matrix) gaussianElimination() error { + rows := len(m) + columns := len(m[0]) + // Clear out the part below the main diagonal and scale the main + // diagonal to be 1. + for r := 0; r < rows; r++ { + // If the element on the diagonal is 0, find a row below + // that has a non-zero and swap them. + if m[r][r] == 0 { + for rowBelow := r + 1; rowBelow < rows; rowBelow++ { + if m[rowBelow][r] != 0 { + m.SwapRows(r, rowBelow) + break + } + } + } + // If we couldn't find one, the matrix is singular. + if m[r][r] == 0 { + return errSingular + } + // Scale to 1. + if m[r][r] != 1 { + scale := galDivide(1, m[r][r]) + for c := 0; c < columns; c++ { + m[r][c] = galMultiply(m[r][c], scale) + } + } + // Make everything below the 1 be a 0 by subtracting + // a multiple of it. (Subtraction and addition are + // both exclusive or in the Galois field.) + for rowBelow := r + 1; rowBelow < rows; rowBelow++ { + if m[rowBelow][r] != 0 { + scale := m[rowBelow][r] + for c := 0; c < columns; c++ { + m[rowBelow][c] ^= galMultiply(scale, m[r][c]) + } + } + } + } + + // Now clear the part above the main diagonal. + for d := 0; d < rows; d++ { + for rowAbove := 0; rowAbove < d; rowAbove++ { + if m[rowAbove][d] != 0 { + scale := m[rowAbove][d] + for c := 0; c < columns; c++ { + m[rowAbove][c] ^= galMultiply(scale, m[d][c]) + } + + } + } + } + return nil +} + +// Create a Vandermonde matrix, which is guaranteed to have the +// property that any subset of rows that forms a square matrix +// is invertible. +func vandermonde(rows, cols int) (matrix, error) { + result, err := newMatrix(rows, cols) + if err != nil { + return nil, err + } + for r, row := range result { + for c := range row { + result[r][c] = galExp(byte(r), c) + } + } + return result, nil +} diff --git a/vendor/github.com/klauspost/reedsolomon/options.go b/vendor/github.com/klauspost/reedsolomon/options.go new file mode 100644 index 0000000..b4adc2a --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/options.go @@ -0,0 +1,175 @@ +package reedsolomon + +import ( + "runtime" + + "github.com/klauspost/cpuid" +) + +// Option allows to override processing parameters. +type Option func(*options) + +type options struct { + maxGoroutines int + minSplitSize int + shardSize int + perRound int + + useAVX512, useAVX2, useSSSE3, useSSE2 bool + usePAR1Matrix bool + useCauchy bool + fastOneParity bool + + // stream options + concReads bool + concWrites bool + streamBS int +} + +var defaultOptions = options{ + maxGoroutines: 384, + minSplitSize: -1, + fastOneParity: false, + + // Detect CPU capabilities. + useSSSE3: cpuid.CPU.SSSE3(), + useSSE2: cpuid.CPU.SSE2(), + useAVX2: cpuid.CPU.AVX2(), + useAVX512: cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW(), +} + +func init() { + if runtime.GOMAXPROCS(0) <= 1 { + defaultOptions.maxGoroutines = 1 + } +} + +// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding. +// Jobs will be split into this many parts, unless each goroutine would have to process +// less than minSplitSize bytes (set with WithMinSplitSize). +// For the best speed, keep this well above the GOMAXPROCS number for more fine grained +// scheduling. +// If n <= 0, it is ignored. +func WithMaxGoroutines(n int) Option { + return func(o *options) { + if n > 0 { + o.maxGoroutines = n + } + } +} + +// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a +// specific shard size. +// Send in the shard size you expect to send. Other shard sizes will work, but may not +// run at the optimal speed. +// Overwrites WithMaxGoroutines. +// If shardSize <= 0, it is ignored. +func WithAutoGoroutines(shardSize int) Option { + return func(o *options) { + o.shardSize = shardSize + } +} + +// WithMinSplitSize is the minimum encoding size in bytes per goroutine. +// By default this parameter is determined by CPU cache characteristics. +// See WithMaxGoroutines on how jobs are split. +// If n <= 0, it is ignored. +func WithMinSplitSize(n int) Option { + return func(o *options) { + if n > 0 { + o.minSplitSize = n + } + } +} + +// WithConcurrentStreams will enable concurrent reads and writes on the streams. +// Default: Disabled, meaning only one stream will be read/written at the time. +// Ignored if not used on a stream input. +func WithConcurrentStreams(enabled bool) Option { + return func(o *options) { + o.concReads, o.concWrites = enabled, enabled + } +} + +// WithConcurrentStreamReads will enable concurrent reads from the input streams. +// Default: Disabled, meaning only one stream will be read at the time. +// Ignored if not used on a stream input. +func WithConcurrentStreamReads(enabled bool) Option { + return func(o *options) { + o.concReads = enabled + } +} + +// WithConcurrentStreamWrites will enable concurrent writes to the the output streams. +// Default: Disabled, meaning only one stream will be written at the time. +// Ignored if not used on a stream input. +func WithConcurrentStreamWrites(enabled bool) Option { + return func(o *options) { + o.concWrites = enabled + } +} + +// WithStreamBlockSize allows to set a custom block size per round of reads/writes. +// If not set, any shard size set with WithAutoGoroutines will be used. +// If WithAutoGoroutines is also unset, 4MB will be used. +// Ignored if not used on stream. +func WithStreamBlockSize(n int) Option { + return func(o *options) { + o.streamBS = n + } +} + +func withSSSE3(enabled bool) Option { + return func(o *options) { + o.useSSSE3 = enabled + } +} + +func withAVX2(enabled bool) Option { + return func(o *options) { + o.useAVX2 = enabled + } +} + +func withSSE2(enabled bool) Option { + return func(o *options) { + o.useSSE2 = enabled + } +} + +func withAVX512(enabled bool) Option { + return func(o *options) { + o.useAVX512 = enabled + } +} + +// WithPAR1Matrix causes the encoder to build the matrix how PARv1 +// does. Note that the method they use is buggy, and may lead to cases +// where recovery is impossible, even if there are enough parity +// shards. +func WithPAR1Matrix() Option { + return func(o *options) { + o.usePAR1Matrix = true + o.useCauchy = false + } +} + +// WithCauchyMatrix will make the encoder build a Cauchy style matrix. +// The output of this is not compatible with the standard output. +// A Cauchy matrix is faster to generate. This does not affect data throughput, +// but will result in slightly faster start-up time. +func WithCauchyMatrix() Option { + return func(o *options) { + o.useCauchy = true + o.usePAR1Matrix = false + } +} + +// WithFastOneParityMatrix will switch the matrix to a simple xor +// if there is only one parity shard. +// The PAR1 matrix already has this property so it has little effect there. +func WithFastOneParityMatrix() Option { + return func(o *options) { + o.fastOneParity = true + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go new file mode 100644 index 0000000..13a35d2 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go @@ -0,0 +1,1011 @@ +/** + * Reed-Solomon Coding over 8-bit values. + * + * Copyright 2015, Klaus Post + * Copyright 2015, Backblaze, Inc. + */ + +// Package reedsolomon enables Erasure Coding in Go +// +// For usage and examples, see https://github.com/klauspost/reedsolomon +// +package reedsolomon + +import ( + "bytes" + "errors" + "io" + "runtime" + "sync" + + "github.com/klauspost/cpuid" +) + +// Encoder is an interface to encode Reed-Salomon parity sets for your data. +type Encoder interface { + // Encode parity for a set of data shards. + // Input is 'shards' containing data shards followed by parity shards. + // The number of shards must match the number given to New(). + // Each shard is a byte array, and they must all be the same size. + // The parity shards will always be overwritten and the data shards + // will remain the same, so it is safe for you to read from the + // data shards while this is running. + Encode(shards [][]byte) error + + // Verify returns true if the parity shards contain correct data. + // The data is the same format as Encode. No data is modified, so + // you are allowed to read from data while this is running. + Verify(shards [][]byte) (bool, error) + + // Reconstruct will recreate the missing shards if possible. + // + // Given a list of shards, some of which contain data, fills in the + // ones that don't have data. + // + // The length of the array must be equal to the total number of shards. + // You indicate that a shard is missing by setting it to nil or zero-length. + // If a shard is zero-length but has sufficient capacity, that memory will + // be used, otherwise a new []byte will be allocated. + // + // If there are too few shards to reconstruct the missing + // ones, ErrTooFewShards will be returned. + // + // The reconstructed shard set is complete, but integrity is not verified. + // Use the Verify function to check if data set is ok. + Reconstruct(shards [][]byte) error + + // ReconstructData will recreate any missing data shards, if possible. + // + // Given a list of shards, some of which contain data, fills in the + // data shards that don't have data. + // + // The length of the array must be equal to Shards. + // You indicate that a shard is missing by setting it to nil or zero-length. + // If a shard is zero-length but has sufficient capacity, that memory will + // be used, otherwise a new []byte will be allocated. + // + // If there are too few shards to reconstruct the missing + // ones, ErrTooFewShards will be returned. + // + // As the reconstructed shard set may contain missing parity shards, + // calling the Verify function is likely to fail. + ReconstructData(shards [][]byte) error + + // Update parity is use for change a few data shards and update it's parity. + // Input 'newDatashards' containing data shards changed. + // Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards. + // new parity shards will in shards[DataShards:] + // Update is very useful if DataShards much larger than ParityShards and changed data shards is few. It will + // faster than Encode and not need read all data shards to encode. + Update(shards [][]byte, newDatashards [][]byte) error + + // Split a data slice into the number of shards given to the encoder, + // and create empty parity shards. + // + // The data will be split into equally sized shards. + // If the data size isn't dividable by the number of shards, + // the last shard will contain extra zeros. + // + // There must be at least 1 byte otherwise ErrShortData will be + // returned. + // + // The data will not be copied, except for the last shard, so you + // should not modify the data of the input slice afterwards. + Split(data []byte) ([][]byte, error) + + // Join the shards and write the data segment to dst. + // + // Only the data shards are considered. + // You must supply the exact output size you want. + // If there are to few shards given, ErrTooFewShards will be returned. + // If the total data size is less than outSize, ErrShortData will be returned. + Join(dst io.Writer, shards [][]byte, outSize int) error +} + +// reedSolomon contains a matrix for a specific +// distribution of datashards and parity shards. +// Construct if using New() +type reedSolomon struct { + DataShards int // Number of data shards, should not be modified. + ParityShards int // Number of parity shards, should not be modified. + Shards int // Total number of shards. Calculated, and should not be modified. + m matrix + tree inversionTree + parity [][]byte + o options + mPool sync.Pool +} + +// ErrInvShardNum will be returned by New, if you attempt to create +// an Encoder where either data or parity shards is zero or less. +var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards") + +// ErrMaxShardNum will be returned by New, if you attempt to create an +// Encoder where data and parity shards are bigger than the order of +// GF(2^8). +var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards") + +// buildMatrix creates the matrix to use for encoding, given the +// number of data shards and the number of total shards. +// +// The top square of the matrix is guaranteed to be an identity +// matrix, which means that the data shards are unchanged after +// encoding. +func buildMatrix(dataShards, totalShards int) (matrix, error) { + // Start with a Vandermonde matrix. This matrix would work, + // in theory, but doesn't have the property that the data + // shards are unchanged after encoding. + vm, err := vandermonde(totalShards, dataShards) + if err != nil { + return nil, err + } + + // Multiply by the inverse of the top square of the matrix. + // This will make the top square be the identity matrix, but + // preserve the property that any square subset of rows is + // invertible. + top, err := vm.SubMatrix(0, 0, dataShards, dataShards) + if err != nil { + return nil, err + } + + topInv, err := top.Invert() + if err != nil { + return nil, err + } + + return vm.Multiply(topInv) +} + +// buildMatrixPAR1 creates the matrix to use for encoding according to +// the PARv1 spec, given the number of data shards and the number of +// total shards. Note that the method they use is buggy, and may lead +// to cases where recovery is impossible, even if there are enough +// parity shards. +// +// The top square of the matrix is guaranteed to be an identity +// matrix, which means that the data shards are unchanged after +// encoding. +func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) { + result, err := newMatrix(totalShards, dataShards) + if err != nil { + return nil, err + } + + for r, row := range result { + // The top portion of the matrix is the identity + // matrix, and the bottom is a transposed Vandermonde + // matrix starting at 1 instead of 0. + if r < dataShards { + result[r][r] = 1 + } else { + for c := range row { + result[r][c] = galExp(byte(c+1), r-dataShards) + } + } + } + return result, nil +} + +func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) { + result, err := newMatrix(totalShards, dataShards) + if err != nil { + return nil, err + } + + for r, row := range result { + // The top portion of the matrix is the identity + // matrix, and the bottom is a transposed Cauchy matrix. + if r < dataShards { + result[r][r] = 1 + } else { + for c := range row { + result[r][c] = invTable[(byte(r ^ c))] + } + } + } + return result, nil +} + +// buildXorMatrix can be used to build a matrix with pure XOR +// operations if there is only one parity shard. +func buildXorMatrix(dataShards, totalShards int) (matrix, error) { + if dataShards+1 != totalShards { + return nil, errors.New("internal error") + } + result, err := newMatrix(totalShards, dataShards) + if err != nil { + return nil, err + } + + for r, row := range result { + // The top portion of the matrix is the identity + // matrix. + if r < dataShards { + result[r][r] = 1 + } else { + // Set all values to 1 (XOR) + for c := range row { + result[r][c] = 1 + } + } + } + return result, nil +} + +// New creates a new encoder and initializes it to +// the number of data shards and parity shards that +// you want to use. You can reuse this encoder. +// Note that the maximum number of total shards is 256. +// If no options are supplied, default options are used. +func New(dataShards, parityShards int, opts ...Option) (Encoder, error) { + r := reedSolomon{ + DataShards: dataShards, + ParityShards: parityShards, + Shards: dataShards + parityShards, + o: defaultOptions, + } + + for _, opt := range opts { + opt(&r.o) + } + if dataShards <= 0 || parityShards <= 0 { + return nil, ErrInvShardNum + } + + if dataShards+parityShards > 256 { + return nil, ErrMaxShardNum + } + + var err error + switch { + case r.o.fastOneParity && parityShards == 1: + r.m, err = buildXorMatrix(dataShards, r.Shards) + case r.o.useCauchy: + r.m, err = buildMatrixCauchy(dataShards, r.Shards) + case r.o.usePAR1Matrix: + r.m, err = buildMatrixPAR1(dataShards, r.Shards) + default: + r.m, err = buildMatrix(dataShards, r.Shards) + } + if err != nil { + return nil, err + } + + // Calculate what we want per round + r.o.perRound = cpuid.CPU.Cache.L2 + if r.o.perRound <= 0 { + // Set to 128K if undetectable. + r.o.perRound = 128 << 10 + } + + if cpuid.CPU.ThreadsPerCore > 1 && r.o.maxGoroutines > cpuid.CPU.PhysicalCores { + // If multiple threads per core, make sure they don't contend for cache. + r.o.perRound /= cpuid.CPU.ThreadsPerCore + } + // 1 input + parity must fit in cache, and we add one more to be safer. + r.o.perRound = r.o.perRound / (1 + parityShards) + // Align to 64 bytes. + r.o.perRound = ((r.o.perRound + 63) / 64) * 64 + + if r.o.minSplitSize <= 0 { + // Set minsplit as high as we can, but still have parity in L1. + cacheSize := cpuid.CPU.Cache.L1D + if cacheSize <= 0 { + cacheSize = 32 << 10 + } + + r.o.minSplitSize = cacheSize / (parityShards + 1) + // Min 1K + if r.o.minSplitSize < 1024 { + r.o.minSplitSize = 1024 + } + } + + if r.o.perRound < r.o.minSplitSize { + r.o.perRound = r.o.minSplitSize + } + + if r.o.shardSize > 0 { + p := runtime.GOMAXPROCS(0) + if p == 1 || r.o.shardSize <= r.o.minSplitSize*2 { + // Not worth it. + r.o.maxGoroutines = 1 + } else { + g := r.o.shardSize / r.o.perRound + + // Overprovision by a factor of 2. + if g < p*2 && r.o.perRound > r.o.minSplitSize*2 { + g = p * 2 + r.o.perRound /= 2 + } + + // Have g be multiple of p + g += p - 1 + g -= g % p + + r.o.maxGoroutines = g + } + } + + // Inverted matrices are cached in a tree keyed by the indices + // of the invalid rows of the data to reconstruct. + // The inversion root node will have the identity matrix as + // its inversion matrix because it implies there are no errors + // with the original data. + r.tree = newInversionTree(dataShards, parityShards) + + r.parity = make([][]byte, parityShards) + for i := range r.parity { + r.parity[i] = r.m[dataShards+i] + } + + if avx2CodeGen && r.o.useAVX2 { + r.mPool.New = func() interface{} { + return make([]byte, r.Shards*2*32) + } + } + return &r, err +} + +// ErrTooFewShards is returned if too few shards where given to +// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct +// if there were too few shards to reconstruct the missing data. +var ErrTooFewShards = errors.New("too few shards given") + +// Encodes parity for a set of data shards. +// An array 'shards' containing data shards followed by parity shards. +// The number of shards must match the number given to New. +// Each shard is a byte array, and they must all be the same size. +// The parity shards will always be overwritten and the data shards +// will remain the same. +func (r *reedSolomon) Encode(shards [][]byte) error { + if len(shards) != r.Shards { + return ErrTooFewShards + } + + err := checkShards(shards, false) + if err != nil { + return err + } + + // Get the slice of output buffers. + output := shards[r.DataShards:] + + // Do the coding. + r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0])) + return nil +} + +// ErrInvalidInput is returned if invalid input parameter of Update. +var ErrInvalidInput = errors.New("invalid input") + +func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error { + if len(shards) != r.Shards { + return ErrTooFewShards + } + + if len(newDatashards) != r.DataShards { + return ErrTooFewShards + } + + err := checkShards(shards, true) + if err != nil { + return err + } + + err = checkShards(newDatashards, true) + if err != nil { + return err + } + + for i := range newDatashards { + if newDatashards[i] != nil && shards[i] == nil { + return ErrInvalidInput + } + } + for _, p := range shards[r.DataShards:] { + if p == nil { + return ErrInvalidInput + } + } + + shardSize := shardSize(shards) + + // Get the slice of output buffers. + output := shards[r.DataShards:] + + // Do the coding. + r.updateParityShards(r.parity, shards[0:r.DataShards], newDatashards[0:r.DataShards], output, r.ParityShards, shardSize) + return nil +} + +func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) { + if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize { + r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount) + return + } + + for c := 0; c < r.DataShards; c++ { + in := newinputs[c] + if in == nil { + continue + } + oldin := oldinputs[c] + // oldinputs data will be change + sliceXor(in, oldin, &r.o) + for iRow := 0; iRow < outputCount; iRow++ { + galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o) + } + } +} + +func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) { + var wg sync.WaitGroup + do := byteCount / r.o.maxGoroutines + if do < r.o.minSplitSize { + do = r.o.minSplitSize + } + start := 0 + for start < byteCount { + if start+do > byteCount { + do = byteCount - start + } + wg.Add(1) + go func(start, stop int) { + for c := 0; c < r.DataShards; c++ { + in := newinputs[c] + if in == nil { + continue + } + oldin := oldinputs[c] + // oldinputs data will be change + sliceXor(in[start:stop], oldin[start:stop], &r.o) + for iRow := 0; iRow < outputCount; iRow++ { + galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o) + } + } + wg.Done() + }(start, start+do) + start += do + } + wg.Wait() +} + +// Verify returns true if the parity shards contain the right data. +// The data is the same format as Encode. No data is modified. +func (r *reedSolomon) Verify(shards [][]byte) (bool, error) { + if len(shards) != r.Shards { + return false, ErrTooFewShards + } + err := checkShards(shards, false) + if err != nil { + return false, err + } + + // Slice of buffers being checked. + toCheck := shards[r.DataShards:] + + // Do the checking. + return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil +} + +// Multiplies a subset of rows from a coding matrix by a full set of +// input shards to produce some output shards. +// 'matrixRows' is The rows from the matrix to use. +// 'inputs' An array of byte arrays, each of which is one input shard. +// The number of inputs used is determined by the length of each matrix row. +// outputs Byte arrays where the computed shards are stored. +// The number of outputs computed, and the +// number of matrix rows used, is determined by +// outputCount, which is the number of outputs to compute. +func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) { + if len(outputs) == 0 { + return + } + switch { + case r.o.useAVX512 && r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize && len(inputs) >= 4 && len(outputs) >= 2: + r.codeSomeShardsAvx512P(matrixRows, inputs, outputs, outputCount, byteCount) + return + case r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2: + r.codeSomeShardsAvx512(matrixRows, inputs, outputs, outputCount, byteCount) + return + case r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize: + r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount) + return + } + + // Process using no goroutines + start, end := 0, r.o.perRound + if end > len(inputs[0]) { + end = len(inputs[0]) + } + if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs { + m := genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte)) + start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount) + r.mPool.Put(m) + end = len(inputs[0]) + } + + for start < len(inputs[0]) { + for c := 0; c < r.DataShards; c++ { + in := inputs[c][start:end] + for iRow := 0; iRow < outputCount; iRow++ { + if c == 0 { + galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o) + } else { + galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o) + } + } + } + start = end + end += r.o.perRound + if end > len(inputs[0]) { + end = len(inputs[0]) + } + } +} + +// Perform the same as codeSomeShards, but split the workload into +// several goroutines. +func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) { + var wg sync.WaitGroup + do := byteCount / r.o.maxGoroutines + if do < r.o.minSplitSize { + do = r.o.minSplitSize + } + // Make sizes divisible by 64 + do = (do + 63) & (^63) + start := 0 + var avx2Matrix []byte + if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs { + avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte)) + defer r.mPool.Put(avx2Matrix) + } + for start < byteCount { + if start+do > byteCount { + do = byteCount - start + } + + wg.Add(1) + go func(start, stop int) { + if avx2CodeGen && r.o.useAVX2 && stop-start >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs { + start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop) + } + + lstart, lstop := start, start+r.o.perRound + if lstop > stop { + lstop = stop + } + for lstart < stop { + for c := 0; c < r.DataShards; c++ { + in := inputs[c][lstart:lstop] + for iRow := 0; iRow < outputCount; iRow++ { + if c == 0 { + galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o) + } else { + galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o) + } + } + } + lstart = lstop + lstop += r.o.perRound + if lstop > stop { + lstop = stop + } + } + wg.Done() + }(start, start+do) + start += do + } + wg.Wait() +} + +// checkSomeShards is mostly the same as codeSomeShards, +// except this will check values and return +// as soon as a difference is found. +func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool { + if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize { + return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount) + } + outputs := make([][]byte, len(toCheck)) + for i := range outputs { + outputs[i] = make([]byte, byteCount) + } + for c := 0; c < r.DataShards; c++ { + in := inputs[c] + for iRow := 0; iRow < outputCount; iRow++ { + galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o) + } + } + + for i, calc := range outputs { + if !bytes.Equal(calc, toCheck[i]) { + return false + } + } + return true +} + +func (r *reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool { + same := true + var mu sync.RWMutex // For above + + var wg sync.WaitGroup + do := byteCount / r.o.maxGoroutines + if do < r.o.minSplitSize { + do = r.o.minSplitSize + } + // Make sizes divisible by 64 + do = (do + 63) & (^63) + start := 0 + for start < byteCount { + if start+do > byteCount { + do = byteCount - start + } + wg.Add(1) + go func(start, do int) { + defer wg.Done() + outputs := make([][]byte, len(toCheck)) + for i := range outputs { + outputs[i] = make([]byte, do) + } + for c := 0; c < r.DataShards; c++ { + mu.RLock() + if !same { + mu.RUnlock() + return + } + mu.RUnlock() + in := inputs[c][start : start+do] + for iRow := 0; iRow < outputCount; iRow++ { + galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o) + } + } + + for i, calc := range outputs { + if !bytes.Equal(calc, toCheck[i][start:start+do]) { + mu.Lock() + same = false + mu.Unlock() + return + } + } + }(start, do) + start += do + } + wg.Wait() + return same +} + +// ErrShardNoData will be returned if there are no shards, +// or if the length of all shards is zero. +var ErrShardNoData = errors.New("no shard data") + +// ErrShardSize is returned if shard length isn't the same for all +// shards. +var ErrShardSize = errors.New("shard sizes do not match") + +// checkShards will check if shards are the same size +// or 0, if allowed. An error is returned if this fails. +// An error is also returned if all shards are size 0. +func checkShards(shards [][]byte, nilok bool) error { + size := shardSize(shards) + if size == 0 { + return ErrShardNoData + } + for _, shard := range shards { + if len(shard) != size { + if len(shard) != 0 || !nilok { + return ErrShardSize + } + } + } + return nil +} + +// shardSize return the size of a single shard. +// The first non-zero size is returned, +// or 0 if all shards are size 0. +func shardSize(shards [][]byte) int { + for _, shard := range shards { + if len(shard) != 0 { + return len(shard) + } + } + return 0 +} + +// Reconstruct will recreate the missing shards, if possible. +// +// Given a list of shards, some of which contain data, fills in the +// ones that don't have data. +// +// The length of the array must be equal to Shards. +// You indicate that a shard is missing by setting it to nil or zero-length. +// If a shard is zero-length but has sufficient capacity, that memory will +// be used, otherwise a new []byte will be allocated. +// +// If there are too few shards to reconstruct the missing +// ones, ErrTooFewShards will be returned. +// +// The reconstructed shard set is complete, but integrity is not verified. +// Use the Verify function to check if data set is ok. +func (r *reedSolomon) Reconstruct(shards [][]byte) error { + return r.reconstruct(shards, false) +} + +// ReconstructData will recreate any missing data shards, if possible. +// +// Given a list of shards, some of which contain data, fills in the +// data shards that don't have data. +// +// The length of the array must be equal to Shards. +// You indicate that a shard is missing by setting it to nil or zero-length. +// If a shard is zero-length but has sufficient capacity, that memory will +// be used, otherwise a new []byte will be allocated. +// +// If there are too few shards to reconstruct the missing +// ones, ErrTooFewShards will be returned. +// +// As the reconstructed shard set may contain missing parity shards, +// calling the Verify function is likely to fail. +func (r *reedSolomon) ReconstructData(shards [][]byte) error { + return r.reconstruct(shards, true) +} + +// reconstruct will recreate the missing data shards, and unless +// dataOnly is true, also the missing parity shards +// +// The length of the array must be equal to Shards. +// You indicate that a shard is missing by setting it to nil. +// +// If there are too few shards to reconstruct the missing +// ones, ErrTooFewShards will be returned. +func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error { + if len(shards) != r.Shards { + return ErrTooFewShards + } + // Check arguments. + err := checkShards(shards, true) + if err != nil { + return err + } + + shardSize := shardSize(shards) + + // Quick check: are all of the shards present? If so, there's + // nothing to do. + numberPresent := 0 + dataPresent := 0 + for i := 0; i < r.Shards; i++ { + if len(shards[i]) != 0 { + numberPresent++ + if i < r.DataShards { + dataPresent++ + } + } + } + if numberPresent == r.Shards || dataOnly && dataPresent == r.DataShards { + // Cool. All of the shards data data. We don't + // need to do anything. + return nil + } + + // More complete sanity check + if numberPresent < r.DataShards { + return ErrTooFewShards + } + + // Pull out an array holding just the shards that + // correspond to the rows of the submatrix. These shards + // will be the input to the decoding process that re-creates + // the missing data shards. + // + // Also, create an array of indices of the valid rows we do have + // and the invalid rows we don't have up until we have enough valid rows. + subShards := make([][]byte, r.DataShards) + validIndices := make([]int, r.DataShards) + invalidIndices := make([]int, 0) + subMatrixRow := 0 + for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ { + if len(shards[matrixRow]) != 0 { + subShards[subMatrixRow] = shards[matrixRow] + validIndices[subMatrixRow] = matrixRow + subMatrixRow++ + } else { + invalidIndices = append(invalidIndices, matrixRow) + } + } + + // Attempt to get the cached inverted matrix out of the tree + // based on the indices of the invalid rows. + dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices) + + // If the inverted matrix isn't cached in the tree yet we must + // construct it ourselves and insert it into the tree for the + // future. In this way the inversion tree is lazily loaded. + if dataDecodeMatrix == nil { + // Pull out the rows of the matrix that correspond to the + // shards that we have and build a square matrix. This + // matrix could be used to generate the shards that we have + // from the original data. + subMatrix, _ := newMatrix(r.DataShards, r.DataShards) + for subMatrixRow, validIndex := range validIndices { + for c := 0; c < r.DataShards; c++ { + subMatrix[subMatrixRow][c] = r.m[validIndex][c] + } + } + // Invert the matrix, so we can go from the encoded shards + // back to the original data. Then pull out the row that + // generates the shard that we want to decode. Note that + // since this matrix maps back to the original data, it can + // be used to create a data shard, but not a parity shard. + dataDecodeMatrix, err = subMatrix.Invert() + if err != nil { + return err + } + + // Cache the inverted matrix in the tree for future use keyed on the + // indices of the invalid rows. + err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards) + if err != nil { + return err + } + } + + // Re-create any data shards that were missing. + // + // The input to the coding is all of the shards we actually + // have, and the output is the missing data shards. The computation + // is done using the special decode matrix we just built. + outputs := make([][]byte, r.ParityShards) + matrixRows := make([][]byte, r.ParityShards) + outputCount := 0 + + for iShard := 0; iShard < r.DataShards; iShard++ { + if len(shards[iShard]) == 0 { + if cap(shards[iShard]) >= shardSize { + shards[iShard] = shards[iShard][0:shardSize] + } else { + shards[iShard] = make([]byte, shardSize) + } + outputs[outputCount] = shards[iShard] + matrixRows[outputCount] = dataDecodeMatrix[iShard] + outputCount++ + } + } + r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize) + + if dataOnly { + // Exit out early if we are only interested in the data shards + return nil + } + + // Now that we have all of the data shards intact, we can + // compute any of the parity that is missing. + // + // The input to the coding is ALL of the data shards, including + // any that we just calculated. The output is whichever of the + // data shards were missing. + outputCount = 0 + for iShard := r.DataShards; iShard < r.Shards; iShard++ { + if len(shards[iShard]) == 0 { + if cap(shards[iShard]) >= shardSize { + shards[iShard] = shards[iShard][0:shardSize] + } else { + shards[iShard] = make([]byte, shardSize) + } + outputs[outputCount] = shards[iShard] + matrixRows[outputCount] = r.parity[iShard-r.DataShards] + outputCount++ + } + } + r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize) + return nil +} + +// ErrShortData will be returned by Split(), if there isn't enough data +// to fill the number of shards. +var ErrShortData = errors.New("not enough data to fill the number of requested shards") + +// Split a data slice into the number of shards given to the encoder, +// and create empty parity shards if necessary. +// +// The data will be split into equally sized shards. +// If the data size isn't divisible by the number of shards, +// the last shard will contain extra zeros. +// +// There must be at least 1 byte otherwise ErrShortData will be +// returned. +// +// The data will not be copied, except for the last shard, so you +// should not modify the data of the input slice afterwards. +func (r *reedSolomon) Split(data []byte) ([][]byte, error) { + if len(data) == 0 { + return nil, ErrShortData + } + // Calculate number of bytes per data shard. + perShard := (len(data) + r.DataShards - 1) / r.DataShards + + if cap(data) > len(data) { + data = data[:cap(data)] + } + + // Only allocate memory if necessary + var padding []byte + if len(data) < (r.Shards * perShard) { + // calculate maximum number of full shards in `data` slice + fullShards := len(data) / perShard + padding = make([]byte, r.Shards*perShard-perShard*fullShards) + copy(padding, data[perShard*fullShards:]) + data = data[0 : perShard*fullShards] + } + + // Split into equal-length shards. + dst := make([][]byte, r.Shards) + i := 0 + for ; i < len(dst) && len(data) >= perShard; i++ { + dst[i] = data[:perShard:perShard] + data = data[perShard:] + } + + for j := 0; i+j < len(dst); j++ { + dst[i+j] = padding[:perShard:perShard] + padding = padding[perShard:] + } + + return dst, nil +} + +// ErrReconstructRequired is returned if too few data shards are intact and a +// reconstruction is required before you can successfully join the shards. +var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil") + +// Join the shards and write the data segment to dst. +// +// Only the data shards are considered. +// You must supply the exact output size you want. +// +// If there are to few shards given, ErrTooFewShards will be returned. +// If the total data size is less than outSize, ErrShortData will be returned. +// If one or more required data shards are nil, ErrReconstructRequired will be returned. +func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error { + // Do we have enough shards? + if len(shards) < r.DataShards { + return ErrTooFewShards + } + shards = shards[:r.DataShards] + + // Do we have enough data? + size := 0 + for _, shard := range shards { + if shard == nil { + return ErrReconstructRequired + } + size += len(shard) + + // Do we have enough data already? + if size >= outSize { + break + } + } + if size < outSize { + return ErrShortData + } + + // Copy data to dst + write := outSize + for _, shard := range shards { + if write < len(shard) { + _, err := dst.Write(shard[:write]) + return err + } + n, err := dst.Write(shard) + if err != nil { + return err + } + write -= n + } + return nil +} diff --git a/vendor/github.com/klauspost/reedsolomon/streaming.go b/vendor/github.com/klauspost/reedsolomon/streaming.go new file mode 100644 index 0000000..d048ba0 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/streaming.go @@ -0,0 +1,603 @@ +/** + * Reed-Solomon Coding over 8-bit values. + * + * Copyright 2015, Klaus Post + * Copyright 2015, Backblaze, Inc. + */ + +package reedsolomon + +import ( + "bytes" + "errors" + "fmt" + "io" + "sync" +) + +// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data. +// It provides a fully streaming interface, and processes data in blocks of up to 4MB. +// +// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface, +// since the streaming interface has a start up overhead. +// +// For all operations, no readers and writers should not assume any order/size of +// individual reads/writes. +// +// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples +// folder. +type StreamEncoder interface { + // Encode parity shards for a set of data shards. + // + // Input is 'shards' containing readers for data shards followed by parity shards + // io.Writer. + // + // The number of shards must match the number given to NewStream(). + // + // Each reader must supply the same number of bytes. + // + // The parity shards will be written to the writer. + // The number of bytes written will match the input size. + // + // If a data stream returns an error, a StreamReadError type error + // will be returned. If a parity writer returns an error, a + // StreamWriteError will be returned. + Encode(data []io.Reader, parity []io.Writer) error + + // Verify returns true if the parity shards contain correct data. + // + // The number of shards must match the number total data+parity shards + // given to NewStream(). + // + // Each reader must supply the same number of bytes. + // If a shard stream returns an error, a StreamReadError type error + // will be returned. + Verify(shards []io.Reader) (bool, error) + + // Reconstruct will recreate the missing shards if possible. + // + // Given a list of valid shards (to read) and invalid shards (to write) + // + // You indicate that a shard is missing by setting it to nil in the 'valid' + // slice and at the same time setting a non-nil writer in "fill". + // An index cannot contain both non-nil 'valid' and 'fill' entry. + // If both are provided 'ErrReconstructMismatch' is returned. + // + // If there are too few shards to reconstruct the missing + // ones, ErrTooFewShards will be returned. + // + // The reconstructed shard set is complete, but integrity is not verified. + // Use the Verify function to check if data set is ok. + Reconstruct(valid []io.Reader, fill []io.Writer) error + + // Split a an input stream into the number of shards given to the encoder. + // + // The data will be split into equally sized shards. + // If the data size isn't dividable by the number of shards, + // the last shard will contain extra zeros. + // + // You must supply the total size of your input. + // 'ErrShortData' will be returned if it is unable to retrieve the + // number of bytes indicated. + Split(data io.Reader, dst []io.Writer, size int64) (err error) + + // Join the shards and write the data segment to dst. + // + // Only the data shards are considered. + // + // You must supply the exact output size you want. + // If there are to few shards given, ErrTooFewShards will be returned. + // If the total data size is less than outSize, ErrShortData will be returned. + Join(dst io.Writer, shards []io.Reader, outSize int64) error +} + +// StreamReadError is returned when a read error is encountered +// that relates to a supplied stream. +// This will allow you to find out which reader has failed. +type StreamReadError struct { + Err error // The error + Stream int // The stream number on which the error occurred +} + +// Error returns the error as a string +func (s StreamReadError) Error() string { + return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err) +} + +// String returns the error as a string +func (s StreamReadError) String() string { + return s.Error() +} + +// StreamWriteError is returned when a write error is encountered +// that relates to a supplied stream. This will allow you to +// find out which reader has failed. +type StreamWriteError struct { + Err error // The error + Stream int // The stream number on which the error occurred +} + +// Error returns the error as a string +func (s StreamWriteError) Error() string { + return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err) +} + +// String returns the error as a string +func (s StreamWriteError) String() string { + return s.Error() +} + +// rsStream contains a matrix for a specific +// distribution of datashards and parity shards. +// Construct if using NewStream() +type rsStream struct { + r *reedSolomon + o options + + // Shard reader + readShards func(dst [][]byte, in []io.Reader) error + // Shard writer + writeShards func(out []io.Writer, in [][]byte) error + + blockPool sync.Pool +} + +// NewStream creates a new encoder and initializes it to +// the number of data shards and parity shards that +// you want to use. You can reuse this encoder. +// Note that the maximum number of data shards is 256. +func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) { + r := rsStream{o: defaultOptions} + for _, opt := range o { + opt(&r.o) + } + // Override block size if shard size is set. + if r.o.streamBS == 0 && r.o.shardSize > 0 { + r.o.streamBS = r.o.shardSize + } + if r.o.streamBS <= 0 { + r.o.streamBS = 4 << 20 + } + if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines { + o = append(o, WithAutoGoroutines(r.o.streamBS)) + } + + enc, err := New(dataShards, parityShards, o...) + if err != nil { + return nil, err + } + r.r = enc.(*reedSolomon) + + r.blockPool.New = func() interface{} { + out := make([][]byte, dataShards+parityShards) + for i := range out { + out[i] = make([]byte, r.o.streamBS) + } + return out + } + r.readShards = readShards + r.writeShards = writeShards + if r.o.concReads { + r.readShards = cReadShards + } + if r.o.concWrites { + r.writeShards = cWriteShards + } + + return &r, err +} + +// NewStreamC creates a new encoder and initializes it to +// the number of data shards and parity shards given. +// +// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes. +func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) { + return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...) +} + +func (r *rsStream) createSlice() [][]byte { + out := r.blockPool.Get().([][]byte) + for i := range out { + out[i] = out[i][:r.o.streamBS] + } + return out +} + +// Encodes parity shards for a set of data shards. +// +// Input is 'shards' containing readers for data shards followed by parity shards +// io.Writer. +// +// The number of shards must match the number given to NewStream(). +// +// Each reader must supply the same number of bytes. +// +// The parity shards will be written to the writer. +// The number of bytes written will match the input size. +// +// If a data stream returns an error, a StreamReadError type error +// will be returned. If a parity writer returns an error, a +// StreamWriteError will be returned. +func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error { + if len(data) != r.r.DataShards { + return ErrTooFewShards + } + + if len(parity) != r.r.ParityShards { + return ErrTooFewShards + } + + all := r.createSlice() + defer r.blockPool.Put(all) + in := all[:r.r.DataShards] + out := all[r.r.DataShards:] + read := 0 + + for { + err := r.readShards(in, data) + switch err { + case nil: + case io.EOF: + if read == 0 { + return ErrShardNoData + } + return nil + default: + return err + } + out = trimShards(out, shardSize(in)) + read += shardSize(in) + err = r.r.Encode(all) + if err != nil { + return err + } + err = r.writeShards(parity, out) + if err != nil { + return err + } + } +} + +// Trim the shards so they are all the same size +func trimShards(in [][]byte, size int) [][]byte { + for i := range in { + if len(in[i]) != 0 { + in[i] = in[i][0:size] + } + if len(in[i]) < size { + in[i] = in[i][:0] + } + } + return in +} + +func readShards(dst [][]byte, in []io.Reader) error { + if len(in) != len(dst) { + panic("internal error: in and dst size do not match") + } + size := -1 + for i := range in { + if in[i] == nil { + dst[i] = dst[i][:0] + continue + } + n, err := io.ReadFull(in[i], dst[i]) + // The error is EOF only if no bytes were read. + // If an EOF happens after reading some but not all the bytes, + // ReadFull returns ErrUnexpectedEOF. + switch err { + case io.ErrUnexpectedEOF, io.EOF: + if size < 0 { + size = n + } else if n != size { + // Shard sizes must match. + return ErrShardSize + } + dst[i] = dst[i][0:n] + case nil: + continue + default: + return StreamReadError{Err: err, Stream: i} + } + } + if size == 0 { + return io.EOF + } + return nil +} + +func writeShards(out []io.Writer, in [][]byte) error { + if len(out) != len(in) { + panic("internal error: in and out size do not match") + } + for i := range in { + if out[i] == nil { + continue + } + n, err := out[i].Write(in[i]) + if err != nil { + return StreamWriteError{Err: err, Stream: i} + } + // + if n != len(in[i]) { + return StreamWriteError{Err: io.ErrShortWrite, Stream: i} + } + } + return nil +} + +type readResult struct { + n int + size int + err error +} + +// cReadShards reads shards concurrently +func cReadShards(dst [][]byte, in []io.Reader) error { + if len(in) != len(dst) { + panic("internal error: in and dst size do not match") + } + var wg sync.WaitGroup + wg.Add(len(in)) + res := make(chan readResult, len(in)) + for i := range in { + if in[i] == nil { + dst[i] = dst[i][:0] + wg.Done() + continue + } + go func(i int) { + defer wg.Done() + n, err := io.ReadFull(in[i], dst[i]) + // The error is EOF only if no bytes were read. + // If an EOF happens after reading some but not all the bytes, + // ReadFull returns ErrUnexpectedEOF. + res <- readResult{size: n, err: err, n: i} + + }(i) + } + wg.Wait() + close(res) + size := -1 + for r := range res { + switch r.err { + case io.ErrUnexpectedEOF, io.EOF: + if size < 0 { + size = r.size + } else if r.size != size { + // Shard sizes must match. + return ErrShardSize + } + dst[r.n] = dst[r.n][0:r.size] + case nil: + default: + return StreamReadError{Err: r.err, Stream: r.n} + } + } + if size == 0 { + return io.EOF + } + return nil +} + +// cWriteShards writes shards concurrently +func cWriteShards(out []io.Writer, in [][]byte) error { + if len(out) != len(in) { + panic("internal error: in and out size do not match") + } + var errs = make(chan error, len(out)) + var wg sync.WaitGroup + wg.Add(len(out)) + for i := range in { + go func(i int) { + defer wg.Done() + if out[i] == nil { + errs <- nil + return + } + n, err := out[i].Write(in[i]) + if err != nil { + errs <- StreamWriteError{Err: err, Stream: i} + return + } + if n != len(in[i]) { + errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i} + } + }(i) + } + wg.Wait() + close(errs) + for err := range errs { + if err != nil { + return err + } + } + + return nil +} + +// Verify returns true if the parity shards contain correct data. +// +// The number of shards must match the number total data+parity shards +// given to NewStream(). +// +// Each reader must supply the same number of bytes. +// If a shard stream returns an error, a StreamReadError type error +// will be returned. +func (r *rsStream) Verify(shards []io.Reader) (bool, error) { + if len(shards) != r.r.Shards { + return false, ErrTooFewShards + } + + read := 0 + all := r.createSlice() + defer r.blockPool.Put(all) + for { + err := r.readShards(all, shards) + if err == io.EOF { + if read == 0 { + return false, ErrShardNoData + } + return true, nil + } + if err != nil { + return false, err + } + read += shardSize(all) + ok, err := r.r.Verify(all) + if !ok || err != nil { + return ok, err + } + } +} + +// ErrReconstructMismatch is returned by the StreamEncoder, if you supply +// "valid" and "fill" streams on the same index. +// Therefore it is impossible to see if you consider the shard valid +// or would like to have it reconstructed. +var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive") + +// Reconstruct will recreate the missing shards if possible. +// +// Given a list of valid shards (to read) and invalid shards (to write) +// +// You indicate that a shard is missing by setting it to nil in the 'valid' +// slice and at the same time setting a non-nil writer in "fill". +// An index cannot contain both non-nil 'valid' and 'fill' entry. +// +// If there are too few shards to reconstruct the missing +// ones, ErrTooFewShards will be returned. +// +// The reconstructed shard set is complete when explicitly asked for all missing shards. +// However its integrity is not automatically verified. +// Use the Verify function to check in case the data set is complete. +func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error { + if len(valid) != r.r.Shards { + return ErrTooFewShards + } + if len(fill) != r.r.Shards { + return ErrTooFewShards + } + + all := r.createSlice() + defer r.blockPool.Put(all) + reconDataOnly := true + for i := range valid { + if valid[i] != nil && fill[i] != nil { + return ErrReconstructMismatch + } + if i >= r.r.DataShards && fill[i] != nil { + reconDataOnly = false + } + } + + read := 0 + for { + err := r.readShards(all, valid) + if err == io.EOF { + if read == 0 { + return ErrShardNoData + } + return nil + } + if err != nil { + return err + } + read += shardSize(all) + all = trimShards(all, shardSize(all)) + + if reconDataOnly { + err = r.r.ReconstructData(all) // just reconstruct missing data shards + } else { + err = r.r.Reconstruct(all) // reconstruct all missing shards + } + if err != nil { + return err + } + err = r.writeShards(fill, all) + if err != nil { + return err + } + } +} + +// Join the shards and write the data segment to dst. +// +// Only the data shards are considered. +// +// You must supply the exact output size you want. +// If there are to few shards given, ErrTooFewShards will be returned. +// If the total data size is less than outSize, ErrShortData will be returned. +func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error { + // Do we have enough shards? + if len(shards) < r.r.DataShards { + return ErrTooFewShards + } + + // Trim off parity shards if any + shards = shards[:r.r.DataShards] + for i := range shards { + if shards[i] == nil { + return StreamReadError{Err: ErrShardNoData, Stream: i} + } + } + // Join all shards + src := io.MultiReader(shards...) + + // Copy data to dst + n, err := io.CopyN(dst, src, outSize) + if err == io.EOF { + return ErrShortData + } + if err != nil { + return err + } + if n != outSize { + return ErrShortData + } + return nil +} + +// Split a an input stream into the number of shards given to the encoder. +// +// The data will be split into equally sized shards. +// If the data size isn't dividable by the number of shards, +// the last shard will contain extra zeros. +// +// You must supply the total size of your input. +// 'ErrShortData' will be returned if it is unable to retrieve the +// number of bytes indicated. +func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error { + if size == 0 { + return ErrShortData + } + if len(dst) != r.r.DataShards { + return ErrInvShardNum + } + + for i := range dst { + if dst[i] == nil { + return StreamWriteError{Err: ErrShardNoData, Stream: i} + } + } + + // Calculate number of bytes per shard. + perShard := (size + int64(r.r.DataShards) - 1) / int64(r.r.DataShards) + + // Pad data to r.Shards*perShard. + padding := make([]byte, (int64(r.r.Shards)*perShard)-size) + data = io.MultiReader(data, bytes.NewBuffer(padding)) + + // Split into equal-length shards and copy. + for i := range dst { + n, err := io.CopyN(dst[i], data, perShard) + if err != io.EOF && err != nil { + return err + } + if n != perShard { + return ErrShortData + } + } + + return nil +} |