summaryrefslogtreecommitdiff
path: root/vendor/github.com/klauspost/reedsolomon
diff options
context:
space:
mode:
authorkali kaneko (leap communications) <kali@leap.se>2021-11-29 01:46:27 +0100
committerkali kaneko (leap communications) <kali@leap.se>2021-11-29 18:14:16 +0100
commit18f52af5be3a9a0c73811706108f790d65ee9c67 (patch)
treee13cbacb47d56919caa9c44a2b45dec1497a7860 /vendor/github.com/klauspost/reedsolomon
parentebcef0d57b6ecb5a40c6579f6be07182dd3033ba (diff)
[pkg] update vendor
Diffstat (limited to 'vendor/github.com/klauspost/reedsolomon')
-rw-r--r--vendor/github.com/klauspost/reedsolomon/.gitignore26
-rw-r--r--vendor/github.com/klauspost/reedsolomon/.travis.yml77
-rw-r--r--vendor/github.com/klauspost/reedsolomon/LICENSE23
-rw-r--r--vendor/github.com/klauspost/reedsolomon/README.md395
-rw-r--r--vendor/github.com/klauspost/reedsolomon/appveyor.yml20
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois.go929
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go338
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s400
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_amd64.go138
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_amd64.s368
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_arm64.go67
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_arm64.s125
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go408
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s18526
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_none.go11
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go293
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_noasm.go44
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_notamd64.go13
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go75
-rw-r--r--vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s124
-rw-r--r--vendor/github.com/klauspost/reedsolomon/gen.go249
-rw-r--r--vendor/github.com/klauspost/reedsolomon/go.mod7
-rw-r--r--vendor/github.com/klauspost/reedsolomon/go.sum2
-rw-r--r--vendor/github.com/klauspost/reedsolomon/inversion_tree.go160
-rw-r--r--vendor/github.com/klauspost/reedsolomon/matrix.go279
-rw-r--r--vendor/github.com/klauspost/reedsolomon/options.go175
-rw-r--r--vendor/github.com/klauspost/reedsolomon/reedsolomon.go1011
-rw-r--r--vendor/github.com/klauspost/reedsolomon/streaming.go603
28 files changed, 24886 insertions, 0 deletions
diff --git a/vendor/github.com/klauspost/reedsolomon/.gitignore b/vendor/github.com/klauspost/reedsolomon/.gitignore
new file mode 100644
index 0000000..59610b5
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/.gitignore
@@ -0,0 +1,26 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+
+.idea \ No newline at end of file
diff --git a/vendor/github.com/klauspost/reedsolomon/.travis.yml b/vendor/github.com/klauspost/reedsolomon/.travis.yml
new file mode 100644
index 0000000..f77b85c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/.travis.yml
@@ -0,0 +1,77 @@
+language: go
+
+os:
+ - linux
+ - osx
+ - windows
+
+arch:
+ - amd64
+ - arm64
+ - ppc64le
+ - s390x
+
+go:
+ - 1.12.x
+ - 1.13.x
+ - 1.14.x
+ - master
+
+install:
+ - go get ./...
+
+script:
+ - go vet ./...
+ - go test -cpu=1,2 .
+ - go test -tags=noasm -cpu=1,2 .
+ - go build examples/simple-decoder.go
+ - go build examples/simple-encoder.go
+ - go build examples/stream-decoder.go
+ - go build examples/stream-encoder.go
+
+stages:
+ - gofmt
+ - test
+ - deploy
+
+jobs:
+ allow_failures:
+ - go: 'master'
+ - arch: s390x
+ fast_finish: true
+ include:
+ - stage: gofmt
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - diff <(gofmt -d .) <(printf "")
+ - diff <(gofmt -d ./examples) <(printf "")
+ - go install github.com/klauspost/asmfmt/cmd/asmfmt
+ - diff <(asmfmt -d .) <(printf "")
+ - stage: race
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - go test -cpu=1 -short -race .
+ - go test -cpu=2 -short -race .
+ - go test -tags=noasm -cpu=1 -short -race .
+ - go test -tags=noasm -cpu=4 -short -race .
+ - go test -no-avx512 -short -race .
+ - go test -no-avx512 -no-avx2 -short -race .
+ - go test -no-avx512 -no-avx2 -no-ssse3 -short -race .
+ - stage: amd64-noasm
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - go test -no-avx512
+ - go test -no-avx512 -no-avx2
+ - go test -no-avx512 -no-avx2 -no-ssse3
+ - stage: i386
+ go: 1.14.x
+ os: linux
+ arch: amd64
+ script:
+ - GOOS=linux GOARCH=386 go test -short .
diff --git a/vendor/github.com/klauspost/reedsolomon/LICENSE b/vendor/github.com/klauspost/reedsolomon/LICENSE
new file mode 100644
index 0000000..a947e16
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/LICENSE
@@ -0,0 +1,23 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Klaus Post
+Copyright (c) 2015 Backblaze
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/klauspost/reedsolomon/README.md b/vendor/github.com/klauspost/reedsolomon/README.md
new file mode 100644
index 0000000..f9824cb
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
@@ -0,0 +1,395 @@
+# Reed-Solomon
+[![GoDoc][1]][2] [![Build Status][3]][4]
+
+[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
+[2]: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
+[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
+[4]: https://travis-ci.org/klauspost/reedsolomon
+
+Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
+
+This is a Go port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by
+[Backblaze](http://backblaze.com), with some additional optimizations.
+
+For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
+
+Package home: https://github.com/klauspost/reedsolomon
+
+Godoc: https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc
+
+# Installation
+To get the package use the standard:
+```bash
+go get -u github.com/klauspost/reedsolomon
+```
+
+# Changes
+
+## May 2020
+
+* ARM64 optimizations, up to 2.5x faster.
+* Added [WithFastOneParityMatrix](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithFastOneParityMatrix) for faster operation with 1 parity shard.
+* Much better performance when using a limited number of goroutines.
+* AVX512 is now using multiple cores.
+* Stream processing overhaul, big speedups in most cases.
+* AVX512 optimizations
+
+## March 6, 2019
+
+The pure Go implementation is about 30% faster. Minor tweaks to assembler implementations.
+
+## February 8, 2019
+
+AVX512 accelerated version added for Intel Skylake CPUs. This can give up to a 4x speed improvement as compared to AVX2.
+See [here](https://github.com/klauspost/reedsolomon#performance-on-avx512) for more details.
+
+## December 18, 2018
+
+Assembly code for ppc64le has been contributed, this boosts performance by about 10x on this platform.
+
+## November 18, 2017
+
+Added [WithAutoGoroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithAutoGoroutines) which will attempt
+to calculate the optimal number of goroutines to use based on your expected shard size and detected CPU.
+
+## October 1, 2017
+
+* [Cauchy Matrix](https://godoc.org/github.com/klauspost/reedsolomon#WithCauchyMatrix) is now an option.
+Thanks to [templexxx](https://github.com/templexxx) for the basis of this.
+
+* Default maximum number of [goroutines](https://godoc.org/github.com/klauspost/reedsolomon#WithMaxGoroutines)
+has been increased for better multi-core scaling.
+
+* After several requests the Reconstruct and ReconstructData now slices of zero length but sufficient capacity to
+be used instead of allocating new memory.
+
+## August 26, 2017
+
+* The [`Encoder()`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) now contains an `Update`
+function contributed by [chenzhongtao](https://github.com/chenzhongtao).
+
+* [Frank Wessels](https://github.com/fwessels) kindly contributed ARM 64 bit assembly,
+which gives a huge performance boost on this platform.
+
+## July 20, 2017
+
+`ReconstructData` added to [`Encoder`](https://godoc.org/github.com/klauspost/reedsolomon#Encoder) interface.
+This can cause compatibility issues if you implement your own Encoder. A simple workaround can be added:
+
+```Go
+func (e *YourEnc) ReconstructData(shards [][]byte) error {
+ return ReconstructData(shards)
+}
+```
+
+You can of course also do your own implementation.
+The [`StreamEncoder`](https://godoc.org/github.com/klauspost/reedsolomon#StreamEncoder)
+handles this without modifying the interface.
+This is a good lesson on why returning interfaces is not a good design.
+
+# Usage
+
+This section assumes you know the basics of Reed-Solomon encoding.
+A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
+
+This package performs the calculation of the parity sets. The usage is therefore relatively simple.
+
+First of all, you need to choose your distribution of data and parity shards.
+A 'good' distribution is very subjective, and will depend a lot on your usage scenario.
+A good starting point is above 5 and below 257 data shards (the maximum supported number),
+and the number of parity shards to be 2 or above, and below the number of data shards.
+
+To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
+```Go
+ enc, err := reedsolomon.New(10, 3)
+```
+This encoder will work for all parity sets with this distribution of data and parity shards.
+The error will only be set if you specify 0 or negative values in any of the parameters,
+or if you specify more than 256 data shards.
+
+If you will primarily be using it with one shard size it is recommended to use
+[`WithAutoGoroutines(shardSize)`](https://pkg.go.dev/github.com/klauspost/reedsolomon?tab=doc#WithAutoGoroutines)
+as an additional parameter. This will attempt to calculate the optimal number of goroutines to use for the best speed.
+It is not required that all shards are this size.
+
+The you send and receive data is a simple slice of byte slices; `[][]byte`.
+In the example above, the top slice must have a length of 13.
+
+```Go
+ data := make([][]byte, 13)
+```
+You should then fill the 10 first slices with *equally sized* data,
+and create parity shards that will be populated with parity data. In this case we create the data in memory,
+but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
+
+```Go
+ // Create all shards, size them at 50000 each
+ for i := range input {
+ data[i] := make([]byte, 50000)
+ }
+
+
+ // Fill some data into the data shards
+ for i, in := range data[:10] {
+ for j:= range in {
+ in[j] = byte((i+j)&0xff)
+ }
+ }
+```
+
+To populate the parity shards, you simply call `Encode()` with your data.
+```Go
+ err = enc.Encode(data)
+```
+The only cases where you should get an error is, if the data shards aren't of equal size.
+The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
+
+```Go
+ ok, err = enc.Verify(data)
+```
+
+The final (and important) part is to be able to reconstruct missing shards.
+For this to work, you need to know which parts of your data is missing.
+The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario,
+you need to implement a hash check for each shard.
+
+If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
+
+To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
+
+```Go
+ // Delete two data shards
+ data[3] = nil
+ data[7] = nil
+
+ // Reconstruct the missing shards
+ err := enc.Reconstruct(data)
+```
+The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
+
+If you are only interested in the data shards (for reading purposes) you can call `ReconstructData()`:
+
+```Go
+ // Delete two data shards
+ data[3] = nil
+ data[7] = nil
+
+ // Reconstruct just the missing data shards
+ err := enc.ReconstructData(data)
+```
+
+So to sum up reconstruction:
+* The number of data/parity shards must match the numbers used for encoding.
+* The order of shards must be the same as used when encoding.
+* You may only supply data you know is valid.
+* Invalid shards should be set to nil.
+
+For complete examples of an encoder and decoder see the
+[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Splitting/Joining Data
+
+You might have a large slice of data.
+To help you split this, there are some helper functions that can split and join a single byte slice.
+
+```Go
+ bigfile, _ := ioutil.Readfile("myfile.data")
+
+ // Split the file
+ split, err := enc.Split(bigfile)
+```
+This will split the file into the number of data shards set when creating the encoder and create empty parity shards.
+
+An important thing to note is that you have to *keep track of the exact input size*.
+If the size of the input isn't divisible by the number of data shards, extra zeros will be inserted in the last shard.
+
+To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply:
+```Go
+ // Join a data set and write it to io.Discard.
+ err = enc.Join(io.Discard, data, len(bigfile))
+```
+
+# Streaming/Merging
+
+It might seem like a limitation that all data should be in memory,
+but an important property is that *as long as the number of data/parity shards are the same,
+you can merge/split data sets*, and they will remain valid as a separate set.
+
+```Go
+ // Split the data set of 50000 elements into two of 25000
+ splitA := make([][]byte, 13)
+ splitB := make([][]byte, 13)
+
+ // Merge into a 100000 element set
+ merged := make([][]byte, 13)
+
+ for i := range data {
+ splitA[i] = data[i][:25000]
+ splitB[i] = data[i][25000:]
+
+ // Concatenate it to itself
+ merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
+ merged[i] = append(merged[i], data[i]...)
+ }
+
+ // Each part should still verify as ok.
+ ok, err := enc.Verify(splitA)
+ if ok && err == nil {
+ log.Println("splitA ok")
+ }
+
+ ok, err = enc.Verify(splitB)
+ if ok && err == nil {
+ log.Println("splitB ok")
+ }
+
+ ok, err = enc.Verify(merge)
+ if ok && err == nil {
+ log.Println("merge ok")
+ }
+```
+
+This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks.
+For the best throughput, don't use too small blocks.
+
+This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data.
+This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
+
+# Streaming API
+
+There has been added support for a streaming API, to help perform fully streaming operations,
+which enables you to do the same operations, but on streams.
+To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function
+to create the encoding/decoding interfaces.
+
+You can use [`WithConcurrentStreams`](https://godoc.org/github.com/klauspost/reedsolomon#WithConcurrentStreams)
+to ready an interface that reads/writes concurrently from the streams.
+
+You can specify the size of each operation using
+[`WithStreamBlockSize`](https://godoc.org/github.com/klauspost/reedsolomon#WithStreamBlockSize).
+This will set the size of each read/write operation.
+
+Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API.
+Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API.
+If an error occurs in relation to a stream,
+a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError)
+or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError)
+will help you determine which stream was the offender.
+
+There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
+
+For complete examples of a streaming encoder and decoder see the
+[examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
+
+# Advanced Options
+
+You can modify internal options which affects how jobs are split between and processed by goroutines.
+
+To create options, use the WithXXX functions. You can supply options to `New`, `NewStream`.
+If no Options are supplied, default options are used.
+
+Example of how to supply options:
+
+ ```Go
+ enc, err := reedsolomon.New(10, 3, WithMaxGoroutines(25))
+ ```
+
+
+# Performance
+Performance depends mainly on the number of parity shards.
+In rough terms, doubling the number of parity shards will double the encoding time.
+
+Here are the throughput numbers with some different selections of data and parity shards.
+For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
+
+| Data | Parity | Parity | MB/s | SSSE3 MB/s | SSSE3 Speed | Rel. Speed |
+|------|--------|--------|--------|-------------|-------------|------------|
+| 5 | 2 | 40% | 576,11 | 2599,2 | 451% | 100,00% |
+| 10 | 2 | 20% | 587,73 | 3100,28 | 528% | 102,02% |
+| 10 | 4 | 40% | 298,38 | 2470,97 | 828% | 51,79% |
+| 50 | 20 | 40% | 59,81 | 713,28 | 1193% | 10,38% |
+
+If `runtime.GOMAXPROCS()` is set to a value higher than 1,
+the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
+
+Example of performance scaling on AMD Ryzen 3950X - 16 physical cores, 32 logical cores, AVX 2.
+The example uses 10 blocks with 1MB data each and 4 parity blocks.
+
+| Threads | Speed |
+|---------|------------|
+| 1 | 9979 MB/s |
+| 2 | 18870 MB/s |
+| 4 | 33697 MB/s |
+| 8 | 51531 MB/s |
+| 16 | 59204 MB/s |
+
+
+Benchmarking `Reconstruct()` followed by a `Verify()` (=`all`) versus just calling `ReconstructData()` (=`data`) gives the following result:
+```
+benchmark all MB/s data MB/s speedup
+BenchmarkReconstruct10x2x10000-8 2011.67 10530.10 5.23x
+BenchmarkReconstruct50x5x50000-8 4585.41 14301.60 3.12x
+BenchmarkReconstruct10x2x1M-8 8081.15 28216.41 3.49x
+BenchmarkReconstruct5x2x1M-8 5780.07 28015.37 4.85x
+BenchmarkReconstruct10x4x1M-8 4352.56 14367.61 3.30x
+BenchmarkReconstruct50x20x1M-8 1364.35 4189.79 3.07x
+BenchmarkReconstruct10x4x16M-8 1484.35 5779.53 3.89x
+```
+
+# Performance on AVX512
+
+The performance on AVX512 has been accelerated for Intel CPUs.
+This gives speedups on a per-core basis typically up to 2x compared to
+AVX2 as can be seen in the following table:
+
+```
+[...]
+```
+
+This speedup has been achieved by computing multiple parity blocks in parallel as opposed to one after the other.
+In doing so it is possible to minimize the memory bandwidth required for loading all data shards.
+At the same time the calculations are performed in the 512-bit wide ZMM registers and the surplus of ZMM
+registers (32 in total) is used to keep more data around (most notably the matrix coefficients).
+
+# Performance on ARM64 NEON
+
+By exploiting NEON instructions the performance for ARM has been accelerated.
+Below are the performance numbers for a single core on an EC2 m6g.16xlarge (Graviton2) instance (Amazon Linux 2):
+
+```
+BenchmarkGalois128K-64 119562 10028 ns/op 13070.78 MB/s
+BenchmarkGalois1M-64 14380 83424 ns/op 12569.22 MB/s
+BenchmarkGaloisXor128K-64 96508 12432 ns/op 10543.29 MB/s
+BenchmarkGaloisXor1M-64 10000 100322 ns/op 10452.13 MB/s
+```
+
+# Performance on ppc64le
+
+The performance for ppc64le has been accelerated.
+This gives roughly a 10x performance improvement on this architecture as can been seen below:
+
+```
+benchmark old MB/s new MB/s speedup
+BenchmarkGalois128K-160 948.87 8878.85 9.36x
+BenchmarkGalois1M-160 968.85 9041.92 9.33x
+BenchmarkGaloisXor128K-160 862.02 7905.00 9.17x
+BenchmarkGaloisXor1M-160 784.60 6296.65 8.03x
+```
+
+# asm2plan9s
+
+[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
+
+# Links
+* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
+* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
+* [ocaml-reed-solomon-erasure](https://gitlab.com/darrenldl/ocaml-reed-solomon-erasure). Compatible OCaml implementation.
+* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
+* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
+* [reed-solomon-erasure](https://github.com/darrenldl/reed-solomon-erasure). Compatible Rust implementation.
+* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
+* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
+
+# License
+
+This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
diff --git a/vendor/github.com/klauspost/reedsolomon/appveyor.yml b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
new file mode 100644
index 0000000..9bb067f
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
@@ -0,0 +1,20 @@
+os: Visual Studio 2015
+
+platform: x64
+
+clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
+
+# environment variables
+environment:
+ GOPATH: c:\gopath
+
+install:
+ - echo %PATH%
+ - echo %GOPATH%
+ - go version
+ - go env
+ - go get -d ./...
+
+build_script:
+ - go test -v -cpu=2 ./...
+ - go test -cpu=1,2,4 -short -race ./...
diff --git a/vendor/github.com/klauspost/reedsolomon/galois.go b/vendor/github.com/klauspost/reedsolomon/galois.go
new file mode 100644
index 0000000..76049f9
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois.go
@@ -0,0 +1,929 @@
+/**
+ * 8-bit Galois Field
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc. All rights reserved.
+ */
+
+package reedsolomon
+
+const (
+ // The number of elements in the field.
+ fieldSize = 256
+
+ // The polynomial used to generate the logarithm table.
+ //
+ // There are a number of polynomials that work to generate
+ // a Galois field of 256 elements. The choice is arbitrary,
+ // and we just use the first one.
+ //
+ // The possibilities are: 29, 43, 45, 77, 95, 99, 101, 105,
+ //* 113, 135, 141, 169, 195, 207, 231, and 245.
+ generatingPolynomial = 29
+)
+
+var logTable = [fieldSize]byte{
+ 0, 0, 1, 25, 2, 50, 26, 198,
+ 3, 223, 51, 238, 27, 104, 199, 75,
+ 4, 100, 224, 14, 52, 141, 239, 129,
+ 28, 193, 105, 248, 200, 8, 76, 113,
+ 5, 138, 101, 47, 225, 36, 15, 33,
+ 53, 147, 142, 218, 240, 18, 130, 69,
+ 29, 181, 194, 125, 106, 39, 249, 185,
+ 201, 154, 9, 120, 77, 228, 114, 166,
+ 6, 191, 139, 98, 102, 221, 48, 253,
+ 226, 152, 37, 179, 16, 145, 34, 136,
+ 54, 208, 148, 206, 143, 150, 219, 189,
+ 241, 210, 19, 92, 131, 56, 70, 64,
+ 30, 66, 182, 163, 195, 72, 126, 110,
+ 107, 58, 40, 84, 250, 133, 186, 61,
+ 202, 94, 155, 159, 10, 21, 121, 43,
+ 78, 212, 229, 172, 115, 243, 167, 87,
+ 7, 112, 192, 247, 140, 128, 99, 13,
+ 103, 74, 222, 237, 49, 197, 254, 24,
+ 227, 165, 153, 119, 38, 184, 180, 124,
+ 17, 68, 146, 217, 35, 32, 137, 46,
+ 55, 63, 209, 91, 149, 188, 207, 205,
+ 144, 135, 151, 178, 220, 252, 190, 97,
+ 242, 86, 211, 171, 20, 42, 93, 158,
+ 132, 60, 57, 83, 71, 109, 65, 162,
+ 31, 45, 67, 216, 183, 123, 164, 118,
+ 196, 23, 73, 236, 127, 12, 111, 246,
+ 108, 161, 59, 82, 41, 157, 85, 170,
+ 251, 96, 134, 177, 187, 204, 62, 90,
+ 203, 89, 95, 176, 156, 169, 160, 81,
+ 11, 245, 22, 235, 122, 117, 44, 215,
+ 79, 174, 213, 233, 230, 231, 173, 232,
+ 116, 214, 244, 234, 168, 80, 88, 175,
+}
+
+/**
+ * Inverse of the logarithm table. Maps integer logarithms
+ * to members of the field. There is no entry for 255
+ * because the highest log is 254.
+ *
+ * This table was generated by `go run gentables.go`
+ */
+var expTable = []byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 0x8f, 0x3, 0x6, 0xc, 0x18, 0x30, 0x60, 0xc0, 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 0x46, 0x8c, 0x5, 0xa, 0x14, 0x28, 0x50, 0xa0, 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 0x65, 0xca, 0x89, 0xf, 0x1e, 0x3c, 0x78, 0xf0, 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 0xd, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x7, 0xe, 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x9, 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0xb, 0x16, 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e}
+
+func galAdd(a, b byte) byte {
+ return a ^ b
+}
+
+func galSub(a, b byte) byte {
+ return a ^ b
+}
+
+// Table from https://github.com/templexxx/reedsolomon
+var invTable = [256]byte{0x0, 0x1, 0x8e, 0xf4, 0x47, 0xa7, 0x7a, 0xba, 0xad, 0x9d, 0xdd, 0x98, 0x3d, 0xaa, 0x5d, 0x96, 0xd8, 0x72, 0xc0, 0x58, 0xe0, 0x3e, 0x4c, 0x66, 0x90, 0xde, 0x55, 0x80, 0xa0, 0x83, 0x4b, 0x2a, 0x6c, 0xed, 0x39, 0x51, 0x60, 0x56, 0x2c, 0x8a, 0x70, 0xd0, 0x1f, 0x4a, 0x26, 0x8b, 0x33, 0x6e, 0x48, 0x89, 0x6f, 0x2e, 0xa4, 0xc3, 0x40, 0x5e, 0x50, 0x22, 0xcf, 0xa9, 0xab, 0xc, 0x15, 0xe1, 0x36, 0x5f, 0xf8, 0xd5, 0x92, 0x4e, 0xa6, 0x4, 0x30, 0x88, 0x2b, 0x1e, 0x16, 0x67, 0x45, 0x93, 0x38, 0x23, 0x68, 0x8c, 0x81, 0x1a, 0x25, 0x61, 0x13, 0xc1, 0xcb, 0x63, 0x97, 0xe, 0x37, 0x41, 0x24, 0x57, 0xca, 0x5b, 0xb9, 0xc4, 0x17, 0x4d, 0x52, 0x8d, 0xef, 0xb3, 0x20, 0xec, 0x2f, 0x32, 0x28, 0xd1, 0x11, 0xd9, 0xe9, 0xfb, 0xda, 0x79, 0xdb, 0x77, 0x6, 0xbb, 0x84, 0xcd, 0xfe, 0xfc, 0x1b, 0x54, 0xa1, 0x1d, 0x7c, 0xcc, 0xe4, 0xb0, 0x49, 0x31, 0x27, 0x2d, 0x53, 0x69, 0x2, 0xf5, 0x18, 0xdf, 0x44, 0x4f, 0x9b, 0xbc, 0xf, 0x5c, 0xb, 0xdc, 0xbd, 0x94, 0xac, 0x9, 0xc7, 0xa2, 0x1c, 0x82, 0x9f, 0xc6, 0x34, 0xc2, 0x46, 0x5, 0xce, 0x3b, 0xd, 0x3c, 0x9c, 0x8, 0xbe, 0xb7, 0x87, 0xe5, 0xee, 0x6b, 0xeb, 0xf2, 0xbf, 0xaf, 0xc5, 0x64, 0x7, 0x7b, 0x95, 0x9a, 0xae, 0xb6, 0x12, 0x59, 0xa5, 0x35, 0x65, 0xb8, 0xa3, 0x9e, 0xd2, 0xf7, 0x62, 0x5a, 0x85, 0x7d, 0xa8, 0x3a, 0x29, 0x71, 0xc8, 0xf6, 0xf9, 0x43, 0xd7, 0xd6, 0x10, 0x73, 0x76, 0x78, 0x99, 0xa, 0x19, 0x91, 0x14, 0x3f, 0xe6, 0xf0, 0x86, 0xb1, 0xe2, 0xf1, 0xfa, 0x74, 0xf3, 0xb4, 0x6d, 0x21, 0xb2, 0x6a, 0xe3, 0xe7, 0xb5, 0xea, 0x3, 0x8f, 0xd3, 0xc9, 0x42, 0xd4, 0xe8, 0x75, 0x7f, 0xff, 0x7e, 0xfd}
+
+var mulTable = [256][256]uint8{[256]uint8{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+ {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff},
+ {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, 0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0xd, 0xf, 0x9, 0xb, 0x5, 0x7, 0x1, 0x3, 0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23, 0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43, 0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63, 0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83, 0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3, 0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3, 0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3},
+ {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, 0x9d, 0x9e, 0x9b, 0x98, 0x91, 0x92, 0x97, 0x94, 0x85, 0x86, 0x83, 0x80, 0x89, 0x8a, 0x8f, 0x8c, 0xad, 0xae, 0xab, 0xa8, 0xa1, 0xa2, 0xa7, 0xa4, 0xb5, 0xb6, 0xb3, 0xb0, 0xb9, 0xba, 0xbf, 0xbc, 0xfd, 0xfe, 0xfb, 0xf8, 0xf1, 0xf2, 0xf7, 0xf4, 0xe5, 0xe6, 0xe3, 0xe0, 0xe9, 0xea, 0xef, 0xec, 0xcd, 0xce, 0xcb, 0xc8, 0xc1, 0xc2, 0xc7, 0xc4, 0xd5, 0xd6, 0xd3, 0xd0, 0xd9, 0xda, 0xdf, 0xdc, 0x5d, 0x5e, 0x5b, 0x58, 0x51, 0x52, 0x57, 0x54, 0x45, 0x46, 0x43, 0x40, 0x49, 0x4a, 0x4f, 0x4c, 0x6d, 0x6e, 0x6b, 0x68, 0x61, 0x62, 0x67, 0x64, 0x75, 0x76, 0x73, 0x70, 0x79, 0x7a, 0x7f, 0x7c, 0x3d, 0x3e, 0x3b, 0x38, 0x31, 0x32, 0x37, 0x34, 0x25, 0x26, 0x23, 0x20, 0x29, 0x2a, 0x2f, 0x2c, 0xd, 0xe, 0xb, 0x8, 0x1, 0x2, 0x7, 0x4, 0x15, 0x16, 0x13, 0x10, 0x19, 0x1a, 0x1f, 0x1c},
+ {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c, 0x40, 0x44, 0x48, 0x4c, 0x50, 0x54, 0x58, 0x5c, 0x60, 0x64, 0x68, 0x6c, 0x70, 0x74, 0x78, 0x7c, 0x80, 0x84, 0x88, 0x8c, 0x90, 0x94, 0x98, 0x9c, 0xa0, 0xa4, 0xa8, 0xac, 0xb0, 0xb4, 0xb8, 0xbc, 0xc0, 0xc4, 0xc8, 0xcc, 0xd0, 0xd4, 0xd8, 0xdc, 0xe0, 0xe4, 0xe8, 0xec, 0xf0, 0xf4, 0xf8, 0xfc, 0x1d, 0x19, 0x15, 0x11, 0xd, 0x9, 0x5, 0x1, 0x3d, 0x39, 0x35, 0x31, 0x2d, 0x29, 0x25, 0x21, 0x5d, 0x59, 0x55, 0x51, 0x4d, 0x49, 0x45, 0x41, 0x7d, 0x79, 0x75, 0x71, 0x6d, 0x69, 0x65, 0x61, 0x9d, 0x99, 0x95, 0x91, 0x8d, 0x89, 0x85, 0x81, 0xbd, 0xb9, 0xb5, 0xb1, 0xad, 0xa9, 0xa5, 0xa1, 0xdd, 0xd9, 0xd5, 0xd1, 0xcd, 0xc9, 0xc5, 0xc1, 0xfd, 0xf9, 0xf5, 0xf1, 0xed, 0xe9, 0xe5, 0xe1, 0x3a, 0x3e, 0x32, 0x36, 0x2a, 0x2e, 0x22, 0x26, 0x1a, 0x1e, 0x12, 0x16, 0xa, 0xe, 0x2, 0x6, 0x7a, 0x7e, 0x72, 0x76, 0x6a, 0x6e, 0x62, 0x66, 0x5a, 0x5e, 0x52, 0x56, 0x4a, 0x4e, 0x42, 0x46, 0xba, 0xbe, 0xb2, 0xb6, 0xaa, 0xae, 0xa2, 0xa6, 0x9a, 0x9e, 0x92, 0x96, 0x8a, 0x8e, 0x82, 0x86, 0xfa, 0xfe, 0xf2, 0xf6, 0xea, 0xee, 0xe2, 0xe6, 0xda, 0xde, 0xd2, 0xd6, 0xca, 0xce, 0xc2, 0xc6, 0x27, 0x23, 0x2f, 0x2b, 0x37, 0x33, 0x3f, 0x3b, 0x7, 0x3, 0xf, 0xb, 0x17, 0x13, 0x1f, 0x1b, 0x67, 0x63, 0x6f, 0x6b, 0x77, 0x73, 0x7f, 0x7b, 0x47, 0x43, 0x4f, 0x4b, 0x57, 0x53, 0x5f, 0x5b, 0xa7, 0xa3, 0xaf, 0xab, 0xb7, 0xb3, 0xbf, 0xbb, 0x87, 0x83, 0x8f, 0x8b, 0x97, 0x93, 0x9f, 0x9b, 0xe7, 0xe3, 0xef, 0xeb, 0xf7, 0xf3, 0xff, 0xfb, 0xc7, 0xc3, 0xcf, 0xcb, 0xd7, 0xd3, 0xdf, 0xdb},
+ {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33, 0x50, 0x55, 0x5a, 0x5f, 0x44, 0x41, 0x4e, 0x4b, 0x78, 0x7d, 0x72, 0x77, 0x6c, 0x69, 0x66, 0x63, 0xa0, 0xa5, 0xaa, 0xaf, 0xb4, 0xb1, 0xbe, 0xbb, 0x88, 0x8d, 0x82, 0x87, 0x9c, 0x99, 0x96, 0x93, 0xf0, 0xf5, 0xfa, 0xff, 0xe4, 0xe1, 0xee, 0xeb, 0xd8, 0xdd, 0xd2, 0xd7, 0xcc, 0xc9, 0xc6, 0xc3, 0x5d, 0x58, 0x57, 0x52, 0x49, 0x4c, 0x43, 0x46, 0x75, 0x70, 0x7f, 0x7a, 0x61, 0x64, 0x6b, 0x6e, 0xd, 0x8, 0x7, 0x2, 0x19, 0x1c, 0x13, 0x16, 0x25, 0x20, 0x2f, 0x2a, 0x31, 0x34, 0x3b, 0x3e, 0xfd, 0xf8, 0xf7, 0xf2, 0xe9, 0xec, 0xe3, 0xe6, 0xd5, 0xd0, 0xdf, 0xda, 0xc1, 0xc4, 0xcb, 0xce, 0xad, 0xa8, 0xa7, 0xa2, 0xb9, 0xbc, 0xb3, 0xb6, 0x85, 0x80, 0x8f, 0x8a, 0x91, 0x94, 0x9b, 0x9e, 0xba, 0xbf, 0xb0, 0xb5, 0xae, 0xab, 0xa4, 0xa1, 0x92, 0x97, 0x98, 0x9d, 0x86, 0x83, 0x8c, 0x89, 0xea, 0xef, 0xe0, 0xe5, 0xfe, 0xfb, 0xf4, 0xf1, 0xc2, 0xc7, 0xc8, 0xcd, 0xd6, 0xd3, 0xdc, 0xd9, 0x1a, 0x1f, 0x10, 0x15, 0xe, 0xb, 0x4, 0x1, 0x32, 0x37, 0x38, 0x3d, 0x26, 0x23, 0x2c, 0x29, 0x4a, 0x4f, 0x40, 0x45, 0x5e, 0x5b, 0x54, 0x51, 0x62, 0x67, 0x68, 0x6d, 0x76, 0x73, 0x7c, 0x79, 0xe7, 0xe2, 0xed, 0xe8, 0xf3, 0xf6, 0xf9, 0xfc, 0xcf, 0xca, 0xc5, 0xc0, 0xdb, 0xde, 0xd1, 0xd4, 0xb7, 0xb2, 0xbd, 0xb8, 0xa3, 0xa6, 0xa9, 0xac, 0x9f, 0x9a, 0x95, 0x90, 0x8b, 0x8e, 0x81, 0x84, 0x47, 0x42, 0x4d, 0x48, 0x53, 0x56, 0x59, 0x5c, 0x6f, 0x6a, 0x65, 0x60, 0x7b, 0x7e, 0x71, 0x74, 0x17, 0x12, 0x1d, 0x18, 0x3, 0x6, 0x9, 0xc, 0x3f, 0x3a, 0x35, 0x30, 0x2b, 0x2e, 0x21, 0x24},
+ {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22, 0x60, 0x66, 0x6c, 0x6a, 0x78, 0x7e, 0x74, 0x72, 0x50, 0x56, 0x5c, 0x5a, 0x48, 0x4e, 0x44, 0x42, 0xc0, 0xc6, 0xcc, 0xca, 0xd8, 0xde, 0xd4, 0xd2, 0xf0, 0xf6, 0xfc, 0xfa, 0xe8, 0xee, 0xe4, 0xe2, 0xa0, 0xa6, 0xac, 0xaa, 0xb8, 0xbe, 0xb4, 0xb2, 0x90, 0x96, 0x9c, 0x9a, 0x88, 0x8e, 0x84, 0x82, 0x9d, 0x9b, 0x91, 0x97, 0x85, 0x83, 0x89, 0x8f, 0xad, 0xab, 0xa1, 0xa7, 0xb5, 0xb3, 0xb9, 0xbf, 0xfd, 0xfb, 0xf1, 0xf7, 0xe5, 0xe3, 0xe9, 0xef, 0xcd, 0xcb, 0xc1, 0xc7, 0xd5, 0xd3, 0xd9, 0xdf, 0x5d, 0x5b, 0x51, 0x57, 0x45, 0x43, 0x49, 0x4f, 0x6d, 0x6b, 0x61, 0x67, 0x75, 0x73, 0x79, 0x7f, 0x3d, 0x3b, 0x31, 0x37, 0x25, 0x23, 0x29, 0x2f, 0xd, 0xb, 0x1, 0x7, 0x15, 0x13, 0x19, 0x1f, 0x27, 0x21, 0x2b, 0x2d, 0x3f, 0x39, 0x33, 0x35, 0x17, 0x11, 0x1b, 0x1d, 0xf, 0x9, 0x3, 0x5, 0x47, 0x41, 0x4b, 0x4d, 0x5f, 0x59, 0x53, 0x55, 0x77, 0x71, 0x7b, 0x7d, 0x6f, 0x69, 0x63, 0x65, 0xe7, 0xe1, 0xeb, 0xed, 0xff, 0xf9, 0xf3, 0xf5, 0xd7, 0xd1, 0xdb, 0xdd, 0xcf, 0xc9, 0xc3, 0xc5, 0x87, 0x81, 0x8b, 0x8d, 0x9f, 0x99, 0x93, 0x95, 0xb7, 0xb1, 0xbb, 0xbd, 0xaf, 0xa9, 0xa3, 0xa5, 0xba, 0xbc, 0xb6, 0xb0, 0xa2, 0xa4, 0xae, 0xa8, 0x8a, 0x8c, 0x86, 0x80, 0x92, 0x94, 0x9e, 0x98, 0xda, 0xdc, 0xd6, 0xd0, 0xc2, 0xc4, 0xce, 0xc8, 0xea, 0xec, 0xe6, 0xe0, 0xf2, 0xf4, 0xfe, 0xf8, 0x7a, 0x7c, 0x76, 0x70, 0x62, 0x64, 0x6e, 0x68, 0x4a, 0x4c, 0x46, 0x40, 0x52, 0x54, 0x5e, 0x58, 0x1a, 0x1c, 0x16, 0x10, 0x2, 0x4, 0xe, 0x8, 0x2a, 0x2c, 0x26, 0x20, 0x32, 0x34, 0x3e, 0x38},
+ {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5, 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, 0xdd, 0xda, 0xd3, 0xd4, 0xc1, 0xc6, 0xcf, 0xc8, 0xe5, 0xe2, 0xeb, 0xec, 0xf9, 0xfe, 0xf7, 0xf0, 0xad, 0xaa, 0xa3, 0xa4, 0xb1, 0xb6, 0xbf, 0xb8, 0x95, 0x92, 0x9b, 0x9c, 0x89, 0x8e, 0x87, 0x80, 0x3d, 0x3a, 0x33, 0x34, 0x21, 0x26, 0x2f, 0x28, 0x5, 0x2, 0xb, 0xc, 0x19, 0x1e, 0x17, 0x10, 0x4d, 0x4a, 0x43, 0x44, 0x51, 0x56, 0x5f, 0x58, 0x75, 0x72, 0x7b, 0x7c, 0x69, 0x6e, 0x67, 0x60, 0xa7, 0xa0, 0xa9, 0xae, 0xbb, 0xbc, 0xb5, 0xb2, 0x9f, 0x98, 0x91, 0x96, 0x83, 0x84, 0x8d, 0x8a, 0xd7, 0xd0, 0xd9, 0xde, 0xcb, 0xcc, 0xc5, 0xc2, 0xef, 0xe8, 0xe1, 0xe6, 0xf3, 0xf4, 0xfd, 0xfa, 0x47, 0x40, 0x49, 0x4e, 0x5b, 0x5c, 0x55, 0x52, 0x7f, 0x78, 0x71, 0x76, 0x63, 0x64, 0x6d, 0x6a, 0x37, 0x30, 0x39, 0x3e, 0x2b, 0x2c, 0x25, 0x22, 0xf, 0x8, 0x1, 0x6, 0x13, 0x14, 0x1d, 0x1a, 0x7a, 0x7d, 0x74, 0x73, 0x66, 0x61, 0x68, 0x6f, 0x42, 0x45, 0x4c, 0x4b, 0x5e, 0x59, 0x50, 0x57, 0xa, 0xd, 0x4, 0x3, 0x16, 0x11, 0x18, 0x1f, 0x32, 0x35, 0x3c, 0x3b, 0x2e, 0x29, 0x20, 0x27, 0x9a, 0x9d, 0x94, 0x93, 0x86, 0x81, 0x88, 0x8f, 0xa2, 0xa5, 0xac, 0xab, 0xbe, 0xb9, 0xb0, 0xb7, 0xea, 0xed, 0xe4, 0xe3, 0xf6, 0xf1, 0xf8, 0xff, 0xd2, 0xd5, 0xdc, 0xdb, 0xce, 0xc9, 0xc0, 0xc7},
+ {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78, 0x80, 0x88, 0x90, 0x98, 0xa0, 0xa8, 0xb0, 0xb8, 0xc0, 0xc8, 0xd0, 0xd8, 0xe0, 0xe8, 0xf0, 0xf8, 0x1d, 0x15, 0xd, 0x5, 0x3d, 0x35, 0x2d, 0x25, 0x5d, 0x55, 0x4d, 0x45, 0x7d, 0x75, 0x6d, 0x65, 0x9d, 0x95, 0x8d, 0x85, 0xbd, 0xb5, 0xad, 0xa5, 0xdd, 0xd5, 0xcd, 0xc5, 0xfd, 0xf5, 0xed, 0xe5, 0x3a, 0x32, 0x2a, 0x22, 0x1a, 0x12, 0xa, 0x2, 0x7a, 0x72, 0x6a, 0x62, 0x5a, 0x52, 0x4a, 0x42, 0xba, 0xb2, 0xaa, 0xa2, 0x9a, 0x92, 0x8a, 0x82, 0xfa, 0xf2, 0xea, 0xe2, 0xda, 0xd2, 0xca, 0xc2, 0x27, 0x2f, 0x37, 0x3f, 0x7, 0xf, 0x17, 0x1f, 0x67, 0x6f, 0x77, 0x7f, 0x47, 0x4f, 0x57, 0x5f, 0xa7, 0xaf, 0xb7, 0xbf, 0x87, 0x8f, 0x97, 0x9f, 0xe7, 0xef, 0xf7, 0xff, 0xc7, 0xcf, 0xd7, 0xdf, 0x74, 0x7c, 0x64, 0x6c, 0x54, 0x5c, 0x44, 0x4c, 0x34, 0x3c, 0x24, 0x2c, 0x14, 0x1c, 0x4, 0xc, 0xf4, 0xfc, 0xe4, 0xec, 0xd4, 0xdc, 0xc4, 0xcc, 0xb4, 0xbc, 0xa4, 0xac, 0x94, 0x9c, 0x84, 0x8c, 0x69, 0x61, 0x79, 0x71, 0x49, 0x41, 0x59, 0x51, 0x29, 0x21, 0x39, 0x31, 0x9, 0x1, 0x19, 0x11, 0xe9, 0xe1, 0xf9, 0xf1, 0xc9, 0xc1, 0xd9, 0xd1, 0xa9, 0xa1, 0xb9, 0xb1, 0x89, 0x81, 0x99, 0x91, 0x4e, 0x46, 0x5e, 0x56, 0x6e, 0x66, 0x7e, 0x76, 0xe, 0x6, 0x1e, 0x16, 0x2e, 0x26, 0x3e, 0x36, 0xce, 0xc6, 0xde, 0xd6, 0xee, 0xe6, 0xfe, 0xf6, 0x8e, 0x86, 0x9e, 0x96, 0xae, 0xa6, 0xbe, 0xb6, 0x53, 0x5b, 0x43, 0x4b, 0x73, 0x7b, 0x63, 0x6b, 0x13, 0x1b, 0x3, 0xb, 0x33, 0x3b, 0x23, 0x2b, 0xd3, 0xdb, 0xc3, 0xcb, 0xf3, 0xfb, 0xe3, 0xeb, 0x93, 0x9b, 0x83, 0x8b, 0xb3, 0xbb, 0xa3, 0xab},
+ {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, 0x3d, 0x34, 0x2f, 0x26, 0x19, 0x10, 0xb, 0x2, 0x75, 0x7c, 0x67, 0x6e, 0x51, 0x58, 0x43, 0x4a, 0xad, 0xa4, 0xbf, 0xb6, 0x89, 0x80, 0x9b, 0x92, 0xe5, 0xec, 0xf7, 0xfe, 0xc1, 0xc8, 0xd3, 0xda, 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x4, 0xd, 0xea, 0xe3, 0xf8, 0xf1, 0xce, 0xc7, 0xdc, 0xd5, 0xa2, 0xab, 0xb0, 0xb9, 0x86, 0x8f, 0x94, 0x9d, 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0xf, 0x6, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, 0xf4, 0xfd, 0xe6, 0xef, 0xd0, 0xd9, 0xc2, 0xcb, 0xbc, 0xb5, 0xae, 0xa7, 0x98, 0x91, 0x8a, 0x83, 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, 0x8, 0x1, 0x1a, 0x13, 0xc9, 0xc0, 0xdb, 0xd2, 0xed, 0xe4, 0xff, 0xf6, 0x81, 0x88, 0x93, 0x9a, 0xa5, 0xac, 0xb7, 0xbe, 0x59, 0x50, 0x4b, 0x42, 0x7d, 0x74, 0x6f, 0x66, 0x11, 0x18, 0x3, 0xa, 0x35, 0x3c, 0x27, 0x2e, 0x8e, 0x87, 0x9c, 0x95, 0xaa, 0xa3, 0xb8, 0xb1, 0xc6, 0xcf, 0xd4, 0xdd, 0xe2, 0xeb, 0xf0, 0xf9, 0x1e, 0x17, 0xc, 0x5, 0x3a, 0x33, 0x28, 0x21, 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0xb3, 0xba, 0xa1, 0xa8, 0x97, 0x9e, 0x85, 0x8c, 0xfb, 0xf2, 0xe9, 0xe0, 0xdf, 0xd6, 0xcd, 0xc4, 0x23, 0x2a, 0x31, 0x38, 0x7, 0xe, 0x15, 0x1c, 0x6b, 0x62, 0x79, 0x70, 0x4f, 0x46, 0x5d, 0x54},
+ {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66, 0xa0, 0xaa, 0xb4, 0xbe, 0x88, 0x82, 0x9c, 0x96, 0xf0, 0xfa, 0xe4, 0xee, 0xd8, 0xd2, 0xcc, 0xc6, 0x5d, 0x57, 0x49, 0x43, 0x75, 0x7f, 0x61, 0x6b, 0xd, 0x7, 0x19, 0x13, 0x25, 0x2f, 0x31, 0x3b, 0xfd, 0xf7, 0xe9, 0xe3, 0xd5, 0xdf, 0xc1, 0xcb, 0xad, 0xa7, 0xb9, 0xb3, 0x85, 0x8f, 0x91, 0x9b, 0xba, 0xb0, 0xae, 0xa4, 0x92, 0x98, 0x86, 0x8c, 0xea, 0xe0, 0xfe, 0xf4, 0xc2, 0xc8, 0xd6, 0xdc, 0x1a, 0x10, 0xe, 0x4, 0x32, 0x38, 0x26, 0x2c, 0x4a, 0x40, 0x5e, 0x54, 0x62, 0x68, 0x76, 0x7c, 0xe7, 0xed, 0xf3, 0xf9, 0xcf, 0xc5, 0xdb, 0xd1, 0xb7, 0xbd, 0xa3, 0xa9, 0x9f, 0x95, 0x8b, 0x81, 0x47, 0x4d, 0x53, 0x59, 0x6f, 0x65, 0x7b, 0x71, 0x17, 0x1d, 0x3, 0x9, 0x3f, 0x35, 0x2b, 0x21, 0x69, 0x63, 0x7d, 0x77, 0x41, 0x4b, 0x55, 0x5f, 0x39, 0x33, 0x2d, 0x27, 0x11, 0x1b, 0x5, 0xf, 0xc9, 0xc3, 0xdd, 0xd7, 0xe1, 0xeb, 0xf5, 0xff, 0x99, 0x93, 0x8d, 0x87, 0xb1, 0xbb, 0xa5, 0xaf, 0x34, 0x3e, 0x20, 0x2a, 0x1c, 0x16, 0x8, 0x2, 0x64, 0x6e, 0x70, 0x7a, 0x4c, 0x46, 0x58, 0x52, 0x94, 0x9e, 0x80, 0x8a, 0xbc, 0xb6, 0xa8, 0xa2, 0xc4, 0xce, 0xd0, 0xda, 0xec, 0xe6, 0xf8, 0xf2, 0xd3, 0xd9, 0xc7, 0xcd, 0xfb, 0xf1, 0xef, 0xe5, 0x83, 0x89, 0x97, 0x9d, 0xab, 0xa1, 0xbf, 0xb5, 0x73, 0x79, 0x67, 0x6d, 0x5b, 0x51, 0x4f, 0x45, 0x23, 0x29, 0x37, 0x3d, 0xb, 0x1, 0x1f, 0x15, 0x8e, 0x84, 0x9a, 0x90, 0xa6, 0xac, 0xb2, 0xb8, 0xde, 0xd4, 0xca, 0xc0, 0xf6, 0xfc, 0xe2, 0xe8, 0x2e, 0x24, 0x3a, 0x30, 0x6, 0xc, 0x12, 0x18, 0x7e, 0x74, 0x6a, 0x60, 0x56, 0x5c, 0x42, 0x48},
+ {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, 0x7d, 0x76, 0x6b, 0x60, 0x51, 0x5a, 0x47, 0x4c, 0x25, 0x2e, 0x33, 0x38, 0x9, 0x2, 0x1f, 0x14, 0xcd, 0xc6, 0xdb, 0xd0, 0xe1, 0xea, 0xf7, 0xfc, 0x95, 0x9e, 0x83, 0x88, 0xb9, 0xb2, 0xaf, 0xa4, 0xfa, 0xf1, 0xec, 0xe7, 0xd6, 0xdd, 0xc0, 0xcb, 0xa2, 0xa9, 0xb4, 0xbf, 0x8e, 0x85, 0x98, 0x93, 0x4a, 0x41, 0x5c, 0x57, 0x66, 0x6d, 0x70, 0x7b, 0x12, 0x19, 0x4, 0xf, 0x3e, 0x35, 0x28, 0x23, 0x87, 0x8c, 0x91, 0x9a, 0xab, 0xa0, 0xbd, 0xb6, 0xdf, 0xd4, 0xc9, 0xc2, 0xf3, 0xf8, 0xe5, 0xee, 0x37, 0x3c, 0x21, 0x2a, 0x1b, 0x10, 0xd, 0x6, 0x6f, 0x64, 0x79, 0x72, 0x43, 0x48, 0x55, 0x5e, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, 0x1, 0xa, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x94, 0x9f, 0x82, 0x89, 0xb8, 0xb3, 0xae, 0xa5, 0xcc, 0xc7, 0xda, 0xd1, 0xe0, 0xeb, 0xf6, 0xfd, 0x24, 0x2f, 0x32, 0x39, 0x8, 0x3, 0x1e, 0x15, 0x7c, 0x77, 0x6a, 0x61, 0x50, 0x5b, 0x46, 0x4d, 0x13, 0x18, 0x5, 0xe, 0x3f, 0x34, 0x29, 0x22, 0x4b, 0x40, 0x5d, 0x56, 0x67, 0x6c, 0x71, 0x7a, 0xa3, 0xa8, 0xb5, 0xbe, 0x8f, 0x84, 0x99, 0x92, 0xfb, 0xf0, 0xed, 0xe6, 0xd7, 0xdc, 0xc1, 0xca, 0x6e, 0x65, 0x78, 0x73, 0x42, 0x49, 0x54, 0x5f, 0x36, 0x3d, 0x20, 0x2b, 0x1a, 0x11, 0xc, 0x7, 0xde, 0xd5, 0xc8, 0xc3, 0xf2, 0xf9, 0xe4, 0xef, 0x86, 0x8d, 0x90, 0x9b, 0xaa, 0xa1, 0xbc, 0xb7},
+ {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44, 0xc0, 0xcc, 0xd8, 0xd4, 0xf0, 0xfc, 0xe8, 0xe4, 0xa0, 0xac, 0xb8, 0xb4, 0x90, 0x9c, 0x88, 0x84, 0x9d, 0x91, 0x85, 0x89, 0xad, 0xa1, 0xb5, 0xb9, 0xfd, 0xf1, 0xe5, 0xe9, 0xcd, 0xc1, 0xd5, 0xd9, 0x5d, 0x51, 0x45, 0x49, 0x6d, 0x61, 0x75, 0x79, 0x3d, 0x31, 0x25, 0x29, 0xd, 0x1, 0x15, 0x19, 0x27, 0x2b, 0x3f, 0x33, 0x17, 0x1b, 0xf, 0x3, 0x47, 0x4b, 0x5f, 0x53, 0x77, 0x7b, 0x6f, 0x63, 0xe7, 0xeb, 0xff, 0xf3, 0xd7, 0xdb, 0xcf, 0xc3, 0x87, 0x8b, 0x9f, 0x93, 0xb7, 0xbb, 0xaf, 0xa3, 0xba, 0xb6, 0xa2, 0xae, 0x8a, 0x86, 0x92, 0x9e, 0xda, 0xd6, 0xc2, 0xce, 0xea, 0xe6, 0xf2, 0xfe, 0x7a, 0x76, 0x62, 0x6e, 0x4a, 0x46, 0x52, 0x5e, 0x1a, 0x16, 0x2, 0xe, 0x2a, 0x26, 0x32, 0x3e, 0x4e, 0x42, 0x56, 0x5a, 0x7e, 0x72, 0x66, 0x6a, 0x2e, 0x22, 0x36, 0x3a, 0x1e, 0x12, 0x6, 0xa, 0x8e, 0x82, 0x96, 0x9a, 0xbe, 0xb2, 0xa6, 0xaa, 0xee, 0xe2, 0xf6, 0xfa, 0xde, 0xd2, 0xc6, 0xca, 0xd3, 0xdf, 0xcb, 0xc7, 0xe3, 0xef, 0xfb, 0xf7, 0xb3, 0xbf, 0xab, 0xa7, 0x83, 0x8f, 0x9b, 0x97, 0x13, 0x1f, 0xb, 0x7, 0x23, 0x2f, 0x3b, 0x37, 0x73, 0x7f, 0x6b, 0x67, 0x43, 0x4f, 0x5b, 0x57, 0x69, 0x65, 0x71, 0x7d, 0x59, 0x55, 0x41, 0x4d, 0x9, 0x5, 0x11, 0x1d, 0x39, 0x35, 0x21, 0x2d, 0xa9, 0xa5, 0xb1, 0xbd, 0x99, 0x95, 0x81, 0x8d, 0xc9, 0xc5, 0xd1, 0xdd, 0xf9, 0xf5, 0xe1, 0xed, 0xf4, 0xf8, 0xec, 0xe0, 0xc4, 0xc8, 0xdc, 0xd0, 0x94, 0x98, 0x8c, 0x80, 0xa4, 0xa8, 0xbc, 0xb0, 0x34, 0x38, 0x2c, 0x20, 0x4, 0x8, 0x1c, 0x10, 0x54, 0x58, 0x4c, 0x40, 0x64, 0x68, 0x7c, 0x70},
+ {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x5, 0x8, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0xf, 0x2, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, 0xa, 0x7, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, 0xce, 0xc3, 0xd4, 0xd9, 0xfa, 0xf7, 0xe0, 0xed, 0xa6, 0xab, 0xbc, 0xb1, 0x92, 0x9f, 0x88, 0x85, 0x1e, 0x13, 0x4, 0x9, 0x2a, 0x27, 0x30, 0x3d, 0x76, 0x7b, 0x6c, 0x61, 0x42, 0x4f, 0x58, 0x55, 0x73, 0x7e, 0x69, 0x64, 0x47, 0x4a, 0x5d, 0x50, 0x1b, 0x16, 0x1, 0xc, 0x2f, 0x22, 0x35, 0x38, 0xa3, 0xae, 0xb9, 0xb4, 0x97, 0x9a, 0x8d, 0x80, 0xcb, 0xc6, 0xd1, 0xdc, 0xff, 0xf2, 0xe5, 0xe8, 0xa9, 0xa4, 0xb3, 0xbe, 0x9d, 0x90, 0x87, 0x8a, 0xc1, 0xcc, 0xdb, 0xd6, 0xf5, 0xf8, 0xef, 0xe2, 0x79, 0x74, 0x63, 0x6e, 0x4d, 0x40, 0x57, 0x5a, 0x11, 0x1c, 0xb, 0x6, 0x25, 0x28, 0x3f, 0x32, 0x14, 0x19, 0xe, 0x3, 0x20, 0x2d, 0x3a, 0x37, 0x7c, 0x71, 0x66, 0x6b, 0x48, 0x45, 0x52, 0x5f, 0xc4, 0xc9, 0xde, 0xd3, 0xf0, 0xfd, 0xea, 0xe7, 0xac, 0xa1, 0xb6, 0xbb, 0x98, 0x95, 0x82, 0x8f},
+ {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0x3d, 0x33, 0x21, 0x2f, 0x5, 0xb, 0x19, 0x17, 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d, 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, 0x37, 0x39, 0x2b, 0x25, 0xf, 0x1, 0x13, 0x1d, 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0xa, 0x4, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, 0x53, 0x5d, 0x4f, 0x41, 0x6b, 0x65, 0x77, 0x79, 0x23, 0x2d, 0x3f, 0x31, 0x1b, 0x15, 0x7, 0x9, 0xb3, 0xbd, 0xaf, 0xa1, 0x8b, 0x85, 0x97, 0x99, 0xc3, 0xcd, 0xdf, 0xd1, 0xfb, 0xf5, 0xe7, 0xe9, 0x8e, 0x80, 0x92, 0x9c, 0xb6, 0xb8, 0xaa, 0xa4, 0xfe, 0xf0, 0xe2, 0xec, 0xc6, 0xc8, 0xda, 0xd4, 0x6e, 0x60, 0x72, 0x7c, 0x56, 0x58, 0x4a, 0x44, 0x1e, 0x10, 0x2, 0xc, 0x26, 0x28, 0x3a, 0x34, 0xf4, 0xfa, 0xe8, 0xe6, 0xcc, 0xc2, 0xd0, 0xde, 0x84, 0x8a, 0x98, 0x96, 0xbc, 0xb2, 0xa0, 0xae, 0x14, 0x1a, 0x8, 0x6, 0x2c, 0x22, 0x30, 0x3e, 0x64, 0x6a, 0x78, 0x76, 0x5c, 0x52, 0x40, 0x4e, 0x29, 0x27, 0x35, 0x3b, 0x11, 0x1f, 0xd, 0x3, 0x59, 0x57, 0x45, 0x4b, 0x61, 0x6f, 0x7d, 0x73, 0xc9, 0xc7, 0xd5, 0xdb, 0xf1, 0xff, 0xed, 0xe3, 0xb9, 0xb7, 0xa5, 0xab, 0x81, 0x8f, 0x9d, 0x93},
+ {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55, 0xf0, 0xff, 0xee, 0xe1, 0xcc, 0xc3, 0xd2, 0xdd, 0x88, 0x87, 0x96, 0x99, 0xb4, 0xbb, 0xaa, 0xa5, 0xfd, 0xf2, 0xe3, 0xec, 0xc1, 0xce, 0xdf, 0xd0, 0x85, 0x8a, 0x9b, 0x94, 0xb9, 0xb6, 0xa7, 0xa8, 0xd, 0x2, 0x13, 0x1c, 0x31, 0x3e, 0x2f, 0x20, 0x75, 0x7a, 0x6b, 0x64, 0x49, 0x46, 0x57, 0x58, 0xe7, 0xe8, 0xf9, 0xf6, 0xdb, 0xd4, 0xc5, 0xca, 0x9f, 0x90, 0x81, 0x8e, 0xa3, 0xac, 0xbd, 0xb2, 0x17, 0x18, 0x9, 0x6, 0x2b, 0x24, 0x35, 0x3a, 0x6f, 0x60, 0x71, 0x7e, 0x53, 0x5c, 0x4d, 0x42, 0x1a, 0x15, 0x4, 0xb, 0x26, 0x29, 0x38, 0x37, 0x62, 0x6d, 0x7c, 0x73, 0x5e, 0x51, 0x40, 0x4f, 0xea, 0xe5, 0xf4, 0xfb, 0xd6, 0xd9, 0xc8, 0xc7, 0x92, 0x9d, 0x8c, 0x83, 0xae, 0xa1, 0xb0, 0xbf, 0xd3, 0xdc, 0xcd, 0xc2, 0xef, 0xe0, 0xf1, 0xfe, 0xab, 0xa4, 0xb5, 0xba, 0x97, 0x98, 0x89, 0x86, 0x23, 0x2c, 0x3d, 0x32, 0x1f, 0x10, 0x1, 0xe, 0x5b, 0x54, 0x45, 0x4a, 0x67, 0x68, 0x79, 0x76, 0x2e, 0x21, 0x30, 0x3f, 0x12, 0x1d, 0xc, 0x3, 0x56, 0x59, 0x48, 0x47, 0x6a, 0x65, 0x74, 0x7b, 0xde, 0xd1, 0xc0, 0xcf, 0xe2, 0xed, 0xfc, 0xf3, 0xa6, 0xa9, 0xb8, 0xb7, 0x9a, 0x95, 0x84, 0x8b, 0x34, 0x3b, 0x2a, 0x25, 0x8, 0x7, 0x16, 0x19, 0x4c, 0x43, 0x52, 0x5d, 0x70, 0x7f, 0x6e, 0x61, 0xc4, 0xcb, 0xda, 0xd5, 0xf8, 0xf7, 0xe6, 0xe9, 0xbc, 0xb3, 0xa2, 0xad, 0x80, 0x8f, 0x9e, 0x91, 0xc9, 0xc6, 0xd7, 0xd8, 0xf5, 0xfa, 0xeb, 0xe4, 0xb1, 0xbe, 0xaf, 0xa0, 0x8d, 0x82, 0x93, 0x9c, 0x39, 0x36, 0x27, 0x28, 0x5, 0xa, 0x1b, 0x14, 0x41, 0x4e, 0x5f, 0x50, 0x7d, 0x72, 0x63, 0x6c},
+ {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0, 0x1d, 0xd, 0x3d, 0x2d, 0x5d, 0x4d, 0x7d, 0x6d, 0x9d, 0x8d, 0xbd, 0xad, 0xdd, 0xcd, 0xfd, 0xed, 0x3a, 0x2a, 0x1a, 0xa, 0x7a, 0x6a, 0x5a, 0x4a, 0xba, 0xaa, 0x9a, 0x8a, 0xfa, 0xea, 0xda, 0xca, 0x27, 0x37, 0x7, 0x17, 0x67, 0x77, 0x47, 0x57, 0xa7, 0xb7, 0x87, 0x97, 0xe7, 0xf7, 0xc7, 0xd7, 0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x4, 0xf4, 0xe4, 0xd4, 0xc4, 0xb4, 0xa4, 0x94, 0x84, 0x69, 0x79, 0x49, 0x59, 0x29, 0x39, 0x9, 0x19, 0xe9, 0xf9, 0xc9, 0xd9, 0xa9, 0xb9, 0x89, 0x99, 0x4e, 0x5e, 0x6e, 0x7e, 0xe, 0x1e, 0x2e, 0x3e, 0xce, 0xde, 0xee, 0xfe, 0x8e, 0x9e, 0xae, 0xbe, 0x53, 0x43, 0x73, 0x63, 0x13, 0x3, 0x33, 0x23, 0xd3, 0xc3, 0xf3, 0xe3, 0x93, 0x83, 0xb3, 0xa3, 0xe8, 0xf8, 0xc8, 0xd8, 0xa8, 0xb8, 0x88, 0x98, 0x68, 0x78, 0x48, 0x58, 0x28, 0x38, 0x8, 0x18, 0xf5, 0xe5, 0xd5, 0xc5, 0xb5, 0xa5, 0x95, 0x85, 0x75, 0x65, 0x55, 0x45, 0x35, 0x25, 0x15, 0x5, 0xd2, 0xc2, 0xf2, 0xe2, 0x92, 0x82, 0xb2, 0xa2, 0x52, 0x42, 0x72, 0x62, 0x12, 0x2, 0x32, 0x22, 0xcf, 0xdf, 0xef, 0xff, 0x8f, 0x9f, 0xaf, 0xbf, 0x4f, 0x5f, 0x6f, 0x7f, 0xf, 0x1f, 0x2f, 0x3f, 0x9c, 0x8c, 0xbc, 0xac, 0xdc, 0xcc, 0xfc, 0xec, 0x1c, 0xc, 0x3c, 0x2c, 0x5c, 0x4c, 0x7c, 0x6c, 0x81, 0x91, 0xa1, 0xb1, 0xc1, 0xd1, 0xe1, 0xf1, 0x1, 0x11, 0x21, 0x31, 0x41, 0x51, 0x61, 0x71, 0xa6, 0xb6, 0x86, 0x96, 0xe6, 0xf6, 0xc6, 0xd6, 0x26, 0x36, 0x6, 0x16, 0x66, 0x76, 0x46, 0x56, 0xbb, 0xab, 0x9b, 0x8b, 0xfb, 0xeb, 0xdb, 0xcb, 0x3b, 0x2b, 0x1b, 0xb, 0x7b, 0x6b, 0x5b, 0x4b},
+ {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0xd, 0x1c, 0x2f, 0x3e, 0x49, 0x58, 0x6b, 0x7a, 0x85, 0x94, 0xa7, 0xb6, 0xc1, 0xd0, 0xe3, 0xf2, 0x1a, 0xb, 0x38, 0x29, 0x5e, 0x4f, 0x7c, 0x6d, 0x92, 0x83, 0xb0, 0xa1, 0xd6, 0xc7, 0xf4, 0xe5, 0x17, 0x6, 0x35, 0x24, 0x53, 0x42, 0x71, 0x60, 0x9f, 0x8e, 0xbd, 0xac, 0xdb, 0xca, 0xf9, 0xe8, 0x34, 0x25, 0x16, 0x7, 0x70, 0x61, 0x52, 0x43, 0xbc, 0xad, 0x9e, 0x8f, 0xf8, 0xe9, 0xda, 0xcb, 0x39, 0x28, 0x1b, 0xa, 0x7d, 0x6c, 0x5f, 0x4e, 0xb1, 0xa0, 0x93, 0x82, 0xf5, 0xe4, 0xd7, 0xc6, 0x2e, 0x3f, 0xc, 0x1d, 0x6a, 0x7b, 0x48, 0x59, 0xa6, 0xb7, 0x84, 0x95, 0xe2, 0xf3, 0xc0, 0xd1, 0x23, 0x32, 0x1, 0x10, 0x67, 0x76, 0x45, 0x54, 0xab, 0xba, 0x89, 0x98, 0xef, 0xfe, 0xcd, 0xdc, 0x68, 0x79, 0x4a, 0x5b, 0x2c, 0x3d, 0xe, 0x1f, 0xe0, 0xf1, 0xc2, 0xd3, 0xa4, 0xb5, 0x86, 0x97, 0x65, 0x74, 0x47, 0x56, 0x21, 0x30, 0x3, 0x12, 0xed, 0xfc, 0xcf, 0xde, 0xa9, 0xb8, 0x8b, 0x9a, 0x72, 0x63, 0x50, 0x41, 0x36, 0x27, 0x14, 0x5, 0xfa, 0xeb, 0xd8, 0xc9, 0xbe, 0xaf, 0x9c, 0x8d, 0x7f, 0x6e, 0x5d, 0x4c, 0x3b, 0x2a, 0x19, 0x8, 0xf7, 0xe6, 0xd5, 0xc4, 0xb3, 0xa2, 0x91, 0x80, 0x5c, 0x4d, 0x7e, 0x6f, 0x18, 0x9, 0x3a, 0x2b, 0xd4, 0xc5, 0xf6, 0xe7, 0x90, 0x81, 0xb2, 0xa3, 0x51, 0x40, 0x73, 0x62, 0x15, 0x4, 0x37, 0x26, 0xd9, 0xc8, 0xfb, 0xea, 0x9d, 0x8c, 0xbf, 0xae, 0x46, 0x57, 0x64, 0x75, 0x2, 0x13, 0x20, 0x31, 0xce, 0xdf, 0xec, 0xfd, 0x8a, 0x9b, 0xa8, 0xb9, 0x4b, 0x5a, 0x69, 0x78, 0xf, 0x1e, 0x2d, 0x3c, 0xc3, 0xd2, 0xe1, 0xf0, 0x87, 0x96, 0xa5, 0xb4},
+ {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee, 0x3d, 0x2f, 0x19, 0xb, 0x75, 0x67, 0x51, 0x43, 0xad, 0xbf, 0x89, 0x9b, 0xe5, 0xf7, 0xc1, 0xd3, 0x7a, 0x68, 0x5e, 0x4c, 0x32, 0x20, 0x16, 0x4, 0xea, 0xf8, 0xce, 0xdc, 0xa2, 0xb0, 0x86, 0x94, 0x47, 0x55, 0x63, 0x71, 0xf, 0x1d, 0x2b, 0x39, 0xd7, 0xc5, 0xf3, 0xe1, 0x9f, 0x8d, 0xbb, 0xa9, 0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a, 0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x8, 0x1a, 0xc9, 0xdb, 0xed, 0xff, 0x81, 0x93, 0xa5, 0xb7, 0x59, 0x4b, 0x7d, 0x6f, 0x11, 0x3, 0x35, 0x27, 0x8e, 0x9c, 0xaa, 0xb8, 0xc6, 0xd4, 0xe2, 0xf0, 0x1e, 0xc, 0x3a, 0x28, 0x56, 0x44, 0x72, 0x60, 0xb3, 0xa1, 0x97, 0x85, 0xfb, 0xe9, 0xdf, 0xcd, 0x23, 0x31, 0x7, 0x15, 0x6b, 0x79, 0x4f, 0x5d, 0xf5, 0xe7, 0xd1, 0xc3, 0xbd, 0xaf, 0x99, 0x8b, 0x65, 0x77, 0x41, 0x53, 0x2d, 0x3f, 0x9, 0x1b, 0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6, 0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x2, 0x34, 0x26, 0x8f, 0x9d, 0xab, 0xb9, 0xc7, 0xd5, 0xe3, 0xf1, 0x1f, 0xd, 0x3b, 0x29, 0x57, 0x45, 0x73, 0x61, 0xb2, 0xa0, 0x96, 0x84, 0xfa, 0xe8, 0xde, 0xcc, 0x22, 0x30, 0x6, 0x14, 0x6a, 0x78, 0x4e, 0x5c, 0x1, 0x13, 0x25, 0x37, 0x49, 0x5b, 0x6d, 0x7f, 0x91, 0x83, 0xb5, 0xa7, 0xd9, 0xcb, 0xfd, 0xef, 0x3c, 0x2e, 0x18, 0xa, 0x74, 0x66, 0x50, 0x42, 0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2, 0x7b, 0x69, 0x5f, 0x4d, 0x33, 0x21, 0x17, 0x5, 0xeb, 0xf9, 0xcf, 0xdd, 0xa3, 0xb1, 0x87, 0x95, 0x46, 0x54, 0x62, 0x70, 0xe, 0x1c, 0x2a, 0x38, 0xd6, 0xc4, 0xf2, 0xe0, 0x9e, 0x8c, 0xba, 0xa8},
+ {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1, 0x2d, 0x3e, 0xb, 0x18, 0x61, 0x72, 0x47, 0x54, 0xb5, 0xa6, 0x93, 0x80, 0xf9, 0xea, 0xdf, 0xcc, 0x5a, 0x49, 0x7c, 0x6f, 0x16, 0x5, 0x30, 0x23, 0xc2, 0xd1, 0xe4, 0xf7, 0x8e, 0x9d, 0xa8, 0xbb, 0x77, 0x64, 0x51, 0x42, 0x3b, 0x28, 0x1d, 0xe, 0xef, 0xfc, 0xc9, 0xda, 0xa3, 0xb0, 0x85, 0x96, 0xb4, 0xa7, 0x92, 0x81, 0xf8, 0xeb, 0xde, 0xcd, 0x2c, 0x3f, 0xa, 0x19, 0x60, 0x73, 0x46, 0x55, 0x99, 0x8a, 0xbf, 0xac, 0xd5, 0xc6, 0xf3, 0xe0, 0x1, 0x12, 0x27, 0x34, 0x4d, 0x5e, 0x6b, 0x78, 0xee, 0xfd, 0xc8, 0xdb, 0xa2, 0xb1, 0x84, 0x97, 0x76, 0x65, 0x50, 0x43, 0x3a, 0x29, 0x1c, 0xf, 0xc3, 0xd0, 0xe5, 0xf6, 0x8f, 0x9c, 0xa9, 0xba, 0x5b, 0x48, 0x7d, 0x6e, 0x17, 0x4, 0x31, 0x22, 0x75, 0x66, 0x53, 0x40, 0x39, 0x2a, 0x1f, 0xc, 0xed, 0xfe, 0xcb, 0xd8, 0xa1, 0xb2, 0x87, 0x94, 0x58, 0x4b, 0x7e, 0x6d, 0x14, 0x7, 0x32, 0x21, 0xc0, 0xd3, 0xe6, 0xf5, 0x8c, 0x9f, 0xaa, 0xb9, 0x2f, 0x3c, 0x9, 0x1a, 0x63, 0x70, 0x45, 0x56, 0xb7, 0xa4, 0x91, 0x82, 0xfb, 0xe8, 0xdd, 0xce, 0x2, 0x11, 0x24, 0x37, 0x4e, 0x5d, 0x68, 0x7b, 0x9a, 0x89, 0xbc, 0xaf, 0xd6, 0xc5, 0xf0, 0xe3, 0xc1, 0xd2, 0xe7, 0xf4, 0x8d, 0x9e, 0xab, 0xb8, 0x59, 0x4a, 0x7f, 0x6c, 0x15, 0x6, 0x33, 0x20, 0xec, 0xff, 0xca, 0xd9, 0xa0, 0xb3, 0x86, 0x95, 0x74, 0x67, 0x52, 0x41, 0x38, 0x2b, 0x1e, 0xd, 0x9b, 0x88, 0xbd, 0xae, 0xd7, 0xc4, 0xf1, 0xe2, 0x3, 0x10, 0x25, 0x36, 0x4f, 0x5c, 0x69, 0x7a, 0xb6, 0xa5, 0x90, 0x83, 0xfa, 0xe9, 0xdc, 0xcf, 0x2e, 0x3d, 0x8, 0x1b, 0x62, 0x71, 0x44, 0x57},
+ {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc, 0x5d, 0x49, 0x75, 0x61, 0xd, 0x19, 0x25, 0x31, 0xfd, 0xe9, 0xd5, 0xc1, 0xad, 0xb9, 0x85, 0x91, 0xba, 0xae, 0x92, 0x86, 0xea, 0xfe, 0xc2, 0xd6, 0x1a, 0xe, 0x32, 0x26, 0x4a, 0x5e, 0x62, 0x76, 0xe7, 0xf3, 0xcf, 0xdb, 0xb7, 0xa3, 0x9f, 0x8b, 0x47, 0x53, 0x6f, 0x7b, 0x17, 0x3, 0x3f, 0x2b, 0x69, 0x7d, 0x41, 0x55, 0x39, 0x2d, 0x11, 0x5, 0xc9, 0xdd, 0xe1, 0xf5, 0x99, 0x8d, 0xb1, 0xa5, 0x34, 0x20, 0x1c, 0x8, 0x64, 0x70, 0x4c, 0x58, 0x94, 0x80, 0xbc, 0xa8, 0xc4, 0xd0, 0xec, 0xf8, 0xd3, 0xc7, 0xfb, 0xef, 0x83, 0x97, 0xab, 0xbf, 0x73, 0x67, 0x5b, 0x4f, 0x23, 0x37, 0xb, 0x1f, 0x8e, 0x9a, 0xa6, 0xb2, 0xde, 0xca, 0xf6, 0xe2, 0x2e, 0x3a, 0x6, 0x12, 0x7e, 0x6a, 0x56, 0x42, 0xd2, 0xc6, 0xfa, 0xee, 0x82, 0x96, 0xaa, 0xbe, 0x72, 0x66, 0x5a, 0x4e, 0x22, 0x36, 0xa, 0x1e, 0x8f, 0x9b, 0xa7, 0xb3, 0xdf, 0xcb, 0xf7, 0xe3, 0x2f, 0x3b, 0x7, 0x13, 0x7f, 0x6b, 0x57, 0x43, 0x68, 0x7c, 0x40, 0x54, 0x38, 0x2c, 0x10, 0x4, 0xc8, 0xdc, 0xe0, 0xf4, 0x98, 0x8c, 0xb0, 0xa4, 0x35, 0x21, 0x1d, 0x9, 0x65, 0x71, 0x4d, 0x59, 0x95, 0x81, 0xbd, 0xa9, 0xc5, 0xd1, 0xed, 0xf9, 0xbb, 0xaf, 0x93, 0x87, 0xeb, 0xff, 0xc3, 0xd7, 0x1b, 0xf, 0x33, 0x27, 0x4b, 0x5f, 0x63, 0x77, 0xe6, 0xf2, 0xce, 0xda, 0xb6, 0xa2, 0x9e, 0x8a, 0x46, 0x52, 0x6e, 0x7a, 0x16, 0x2, 0x3e, 0x2a, 0x1, 0x15, 0x29, 0x3d, 0x51, 0x45, 0x79, 0x6d, 0xa1, 0xb5, 0x89, 0x9d, 0xf1, 0xe5, 0xd9, 0xcd, 0x5c, 0x48, 0x74, 0x60, 0xc, 0x18, 0x24, 0x30, 0xfc, 0xe8, 0xd4, 0xc0, 0xac, 0xb8, 0x84, 0x90},
+ {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3, 0x4d, 0x58, 0x67, 0x72, 0x19, 0xc, 0x33, 0x26, 0xe5, 0xf0, 0xcf, 0xda, 0xb1, 0xa4, 0x9b, 0x8e, 0x9a, 0x8f, 0xb0, 0xa5, 0xce, 0xdb, 0xe4, 0xf1, 0x32, 0x27, 0x18, 0xd, 0x66, 0x73, 0x4c, 0x59, 0xd7, 0xc2, 0xfd, 0xe8, 0x83, 0x96, 0xa9, 0xbc, 0x7f, 0x6a, 0x55, 0x40, 0x2b, 0x3e, 0x1, 0x14, 0x29, 0x3c, 0x3, 0x16, 0x7d, 0x68, 0x57, 0x42, 0x81, 0x94, 0xab, 0xbe, 0xd5, 0xc0, 0xff, 0xea, 0x64, 0x71, 0x4e, 0x5b, 0x30, 0x25, 0x1a, 0xf, 0xcc, 0xd9, 0xe6, 0xf3, 0x98, 0x8d, 0xb2, 0xa7, 0xb3, 0xa6, 0x99, 0x8c, 0xe7, 0xf2, 0xcd, 0xd8, 0x1b, 0xe, 0x31, 0x24, 0x4f, 0x5a, 0x65, 0x70, 0xfe, 0xeb, 0xd4, 0xc1, 0xaa, 0xbf, 0x80, 0x95, 0x56, 0x43, 0x7c, 0x69, 0x2, 0x17, 0x28, 0x3d, 0x52, 0x47, 0x78, 0x6d, 0x6, 0x13, 0x2c, 0x39, 0xfa, 0xef, 0xd0, 0xc5, 0xae, 0xbb, 0x84, 0x91, 0x1f, 0xa, 0x35, 0x20, 0x4b, 0x5e, 0x61, 0x74, 0xb7, 0xa2, 0x9d, 0x88, 0xe3, 0xf6, 0xc9, 0xdc, 0xc8, 0xdd, 0xe2, 0xf7, 0x9c, 0x89, 0xb6, 0xa3, 0x60, 0x75, 0x4a, 0x5f, 0x34, 0x21, 0x1e, 0xb, 0x85, 0x90, 0xaf, 0xba, 0xd1, 0xc4, 0xfb, 0xee, 0x2d, 0x38, 0x7, 0x12, 0x79, 0x6c, 0x53, 0x46, 0x7b, 0x6e, 0x51, 0x44, 0x2f, 0x3a, 0x5, 0x10, 0xd3, 0xc6, 0xf9, 0xec, 0x87, 0x92, 0xad, 0xb8, 0x36, 0x23, 0x1c, 0x9, 0x62, 0x77, 0x48, 0x5d, 0x9e, 0x8b, 0xb4, 0xa1, 0xca, 0xdf, 0xe0, 0xf5, 0xe1, 0xf4, 0xcb, 0xde, 0xb5, 0xa0, 0x9f, 0x8a, 0x49, 0x5c, 0x63, 0x76, 0x1d, 0x8, 0x37, 0x22, 0xac, 0xb9, 0x86, 0x93, 0xf8, 0xed, 0xd2, 0xc7, 0x4, 0x11, 0x2e, 0x3b, 0x50, 0x45, 0x7a, 0x6f},
+ {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2, 0x7d, 0x6b, 0x51, 0x47, 0x25, 0x33, 0x9, 0x1f, 0xcd, 0xdb, 0xe1, 0xf7, 0x95, 0x83, 0xb9, 0xaf, 0xfa, 0xec, 0xd6, 0xc0, 0xa2, 0xb4, 0x8e, 0x98, 0x4a, 0x5c, 0x66, 0x70, 0x12, 0x4, 0x3e, 0x28, 0x87, 0x91, 0xab, 0xbd, 0xdf, 0xc9, 0xf3, 0xe5, 0x37, 0x21, 0x1b, 0xd, 0x6f, 0x79, 0x43, 0x55, 0xe9, 0xff, 0xc5, 0xd3, 0xb1, 0xa7, 0x9d, 0x8b, 0x59, 0x4f, 0x75, 0x63, 0x1, 0x17, 0x2d, 0x3b, 0x94, 0x82, 0xb8, 0xae, 0xcc, 0xda, 0xe0, 0xf6, 0x24, 0x32, 0x8, 0x1e, 0x7c, 0x6a, 0x50, 0x46, 0x13, 0x5, 0x3f, 0x29, 0x4b, 0x5d, 0x67, 0x71, 0xa3, 0xb5, 0x8f, 0x99, 0xfb, 0xed, 0xd7, 0xc1, 0x6e, 0x78, 0x42, 0x54, 0x36, 0x20, 0x1a, 0xc, 0xde, 0xc8, 0xf2, 0xe4, 0x86, 0x90, 0xaa, 0xbc, 0xcf, 0xd9, 0xe3, 0xf5, 0x97, 0x81, 0xbb, 0xad, 0x7f, 0x69, 0x53, 0x45, 0x27, 0x31, 0xb, 0x1d, 0xb2, 0xa4, 0x9e, 0x88, 0xea, 0xfc, 0xc6, 0xd0, 0x2, 0x14, 0x2e, 0x38, 0x5a, 0x4c, 0x76, 0x60, 0x35, 0x23, 0x19, 0xf, 0x6d, 0x7b, 0x41, 0x57, 0x85, 0x93, 0xa9, 0xbf, 0xdd, 0xcb, 0xf1, 0xe7, 0x48, 0x5e, 0x64, 0x72, 0x10, 0x6, 0x3c, 0x2a, 0xf8, 0xee, 0xd4, 0xc2, 0xa0, 0xb6, 0x8c, 0x9a, 0x26, 0x30, 0xa, 0x1c, 0x7e, 0x68, 0x52, 0x44, 0x96, 0x80, 0xba, 0xac, 0xce, 0xd8, 0xe2, 0xf4, 0x5b, 0x4d, 0x77, 0x61, 0x3, 0x15, 0x2f, 0x39, 0xeb, 0xfd, 0xc7, 0xd1, 0xb3, 0xa5, 0x9f, 0x89, 0xdc, 0xca, 0xf0, 0xe6, 0x84, 0x92, 0xa8, 0xbe, 0x6c, 0x7a, 0x40, 0x56, 0x34, 0x22, 0x18, 0xe, 0xa1, 0xb7, 0x8d, 0x9b, 0xf9, 0xef, 0xd5, 0xc3, 0x11, 0x7, 0x3d, 0x2b, 0x49, 0x5f, 0x65, 0x73},
+ {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd, 0x6d, 0x7a, 0x43, 0x54, 0x31, 0x26, 0x1f, 0x8, 0xd5, 0xc2, 0xfb, 0xec, 0x89, 0x9e, 0xa7, 0xb0, 0xda, 0xcd, 0xf4, 0xe3, 0x86, 0x91, 0xa8, 0xbf, 0x62, 0x75, 0x4c, 0x5b, 0x3e, 0x29, 0x10, 0x7, 0xb7, 0xa0, 0x99, 0x8e, 0xeb, 0xfc, 0xc5, 0xd2, 0xf, 0x18, 0x21, 0x36, 0x53, 0x44, 0x7d, 0x6a, 0xa9, 0xbe, 0x87, 0x90, 0xf5, 0xe2, 0xdb, 0xcc, 0x11, 0x6, 0x3f, 0x28, 0x4d, 0x5a, 0x63, 0x74, 0xc4, 0xd3, 0xea, 0xfd, 0x98, 0x8f, 0xb6, 0xa1, 0x7c, 0x6b, 0x52, 0x45, 0x20, 0x37, 0xe, 0x19, 0x73, 0x64, 0x5d, 0x4a, 0x2f, 0x38, 0x1, 0x16, 0xcb, 0xdc, 0xe5, 0xf2, 0x97, 0x80, 0xb9, 0xae, 0x1e, 0x9, 0x30, 0x27, 0x42, 0x55, 0x6c, 0x7b, 0xa6, 0xb1, 0x88, 0x9f, 0xfa, 0xed, 0xd4, 0xc3, 0x4f, 0x58, 0x61, 0x76, 0x13, 0x4, 0x3d, 0x2a, 0xf7, 0xe0, 0xd9, 0xce, 0xab, 0xbc, 0x85, 0x92, 0x22, 0x35, 0xc, 0x1b, 0x7e, 0x69, 0x50, 0x47, 0x9a, 0x8d, 0xb4, 0xa3, 0xc6, 0xd1, 0xe8, 0xff, 0x95, 0x82, 0xbb, 0xac, 0xc9, 0xde, 0xe7, 0xf0, 0x2d, 0x3a, 0x3, 0x14, 0x71, 0x66, 0x5f, 0x48, 0xf8, 0xef, 0xd6, 0xc1, 0xa4, 0xb3, 0x8a, 0x9d, 0x40, 0x57, 0x6e, 0x79, 0x1c, 0xb, 0x32, 0x25, 0xe6, 0xf1, 0xc8, 0xdf, 0xba, 0xad, 0x94, 0x83, 0x5e, 0x49, 0x70, 0x67, 0x2, 0x15, 0x2c, 0x3b, 0x8b, 0x9c, 0xa5, 0xb2, 0xd7, 0xc0, 0xf9, 0xee, 0x33, 0x24, 0x1d, 0xa, 0x6f, 0x78, 0x41, 0x56, 0x3c, 0x2b, 0x12, 0x5, 0x60, 0x77, 0x4e, 0x59, 0x84, 0x93, 0xaa, 0xbd, 0xd8, 0xcf, 0xf6, 0xe1, 0x51, 0x46, 0x7f, 0x68, 0xd, 0x1a, 0x23, 0x34, 0xe9, 0xfe, 0xc7, 0xd0, 0xb5, 0xa2, 0x9b, 0x8c},
+ {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88, 0x9d, 0x85, 0xad, 0xb5, 0xfd, 0xe5, 0xcd, 0xd5, 0x5d, 0x45, 0x6d, 0x75, 0x3d, 0x25, 0xd, 0x15, 0x27, 0x3f, 0x17, 0xf, 0x47, 0x5f, 0x77, 0x6f, 0xe7, 0xff, 0xd7, 0xcf, 0x87, 0x9f, 0xb7, 0xaf, 0xba, 0xa2, 0x8a, 0x92, 0xda, 0xc2, 0xea, 0xf2, 0x7a, 0x62, 0x4a, 0x52, 0x1a, 0x2, 0x2a, 0x32, 0x4e, 0x56, 0x7e, 0x66, 0x2e, 0x36, 0x1e, 0x6, 0x8e, 0x96, 0xbe, 0xa6, 0xee, 0xf6, 0xde, 0xc6, 0xd3, 0xcb, 0xe3, 0xfb, 0xb3, 0xab, 0x83, 0x9b, 0x13, 0xb, 0x23, 0x3b, 0x73, 0x6b, 0x43, 0x5b, 0x69, 0x71, 0x59, 0x41, 0x9, 0x11, 0x39, 0x21, 0xa9, 0xb1, 0x99, 0x81, 0xc9, 0xd1, 0xf9, 0xe1, 0xf4, 0xec, 0xc4, 0xdc, 0x94, 0x8c, 0xa4, 0xbc, 0x34, 0x2c, 0x4, 0x1c, 0x54, 0x4c, 0x64, 0x7c, 0x9c, 0x84, 0xac, 0xb4, 0xfc, 0xe4, 0xcc, 0xd4, 0x5c, 0x44, 0x6c, 0x74, 0x3c, 0x24, 0xc, 0x14, 0x1, 0x19, 0x31, 0x29, 0x61, 0x79, 0x51, 0x49, 0xc1, 0xd9, 0xf1, 0xe9, 0xa1, 0xb9, 0x91, 0x89, 0xbb, 0xa3, 0x8b, 0x93, 0xdb, 0xc3, 0xeb, 0xf3, 0x7b, 0x63, 0x4b, 0x53, 0x1b, 0x3, 0x2b, 0x33, 0x26, 0x3e, 0x16, 0xe, 0x46, 0x5e, 0x76, 0x6e, 0xe6, 0xfe, 0xd6, 0xce, 0x86, 0x9e, 0xb6, 0xae, 0xd2, 0xca, 0xe2, 0xfa, 0xb2, 0xaa, 0x82, 0x9a, 0x12, 0xa, 0x22, 0x3a, 0x72, 0x6a, 0x42, 0x5a, 0x4f, 0x57, 0x7f, 0x67, 0x2f, 0x37, 0x1f, 0x7, 0x8f, 0x97, 0xbf, 0xa7, 0xef, 0xf7, 0xdf, 0xc7, 0xf5, 0xed, 0xc5, 0xdd, 0x95, 0x8d, 0xa5, 0xbd, 0x35, 0x2d, 0x5, 0x1d, 0x55, 0x4d, 0x65, 0x7d, 0x68, 0x70, 0x58, 0x40, 0x8, 0x10, 0x38, 0x20, 0xa8, 0xb0, 0x98, 0x80, 0xc8, 0xd0, 0xf8, 0xe0},
+ {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87, 0x8d, 0x94, 0xbf, 0xa6, 0xe9, 0xf0, 0xdb, 0xc2, 0x45, 0x5c, 0x77, 0x6e, 0x21, 0x38, 0x13, 0xa, 0x7, 0x1e, 0x35, 0x2c, 0x63, 0x7a, 0x51, 0x48, 0xcf, 0xd6, 0xfd, 0xe4, 0xab, 0xb2, 0x99, 0x80, 0x8a, 0x93, 0xb8, 0xa1, 0xee, 0xf7, 0xdc, 0xc5, 0x42, 0x5b, 0x70, 0x69, 0x26, 0x3f, 0x14, 0xd, 0xe, 0x17, 0x3c, 0x25, 0x6a, 0x73, 0x58, 0x41, 0xc6, 0xdf, 0xf4, 0xed, 0xa2, 0xbb, 0x90, 0x89, 0x83, 0x9a, 0xb1, 0xa8, 0xe7, 0xfe, 0xd5, 0xcc, 0x4b, 0x52, 0x79, 0x60, 0x2f, 0x36, 0x1d, 0x4, 0x9, 0x10, 0x3b, 0x22, 0x6d, 0x74, 0x5f, 0x46, 0xc1, 0xd8, 0xf3, 0xea, 0xa5, 0xbc, 0x97, 0x8e, 0x84, 0x9d, 0xb6, 0xaf, 0xe0, 0xf9, 0xd2, 0xcb, 0x4c, 0x55, 0x7e, 0x67, 0x28, 0x31, 0x1a, 0x3, 0x1c, 0x5, 0x2e, 0x37, 0x78, 0x61, 0x4a, 0x53, 0xd4, 0xcd, 0xe6, 0xff, 0xb0, 0xa9, 0x82, 0x9b, 0x91, 0x88, 0xa3, 0xba, 0xf5, 0xec, 0xc7, 0xde, 0x59, 0x40, 0x6b, 0x72, 0x3d, 0x24, 0xf, 0x16, 0x1b, 0x2, 0x29, 0x30, 0x7f, 0x66, 0x4d, 0x54, 0xd3, 0xca, 0xe1, 0xf8, 0xb7, 0xae, 0x85, 0x9c, 0x96, 0x8f, 0xa4, 0xbd, 0xf2, 0xeb, 0xc0, 0xd9, 0x5e, 0x47, 0x6c, 0x75, 0x3a, 0x23, 0x8, 0x11, 0x12, 0xb, 0x20, 0x39, 0x76, 0x6f, 0x44, 0x5d, 0xda, 0xc3, 0xe8, 0xf1, 0xbe, 0xa7, 0x8c, 0x95, 0x9f, 0x86, 0xad, 0xb4, 0xfb, 0xe2, 0xc9, 0xd0, 0x57, 0x4e, 0x65, 0x7c, 0x33, 0x2a, 0x1, 0x18, 0x15, 0xc, 0x27, 0x3e, 0x71, 0x68, 0x43, 0x5a, 0xdd, 0xc4, 0xef, 0xf6, 0xb9, 0xa0, 0x8b, 0x92, 0x98, 0x81, 0xaa, 0xb3, 0xfc, 0xe5, 0xce, 0xd7, 0x50, 0x49, 0x62, 0x7b, 0x34, 0x2d, 0x6, 0x1f},
+ {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96, 0xbd, 0xa7, 0x89, 0x93, 0xd5, 0xcf, 0xe1, 0xfb, 0x6d, 0x77, 0x59, 0x43, 0x5, 0x1f, 0x31, 0x2b, 0x67, 0x7d, 0x53, 0x49, 0xf, 0x15, 0x3b, 0x21, 0xb7, 0xad, 0x83, 0x99, 0xdf, 0xc5, 0xeb, 0xf1, 0xda, 0xc0, 0xee, 0xf4, 0xb2, 0xa8, 0x86, 0x9c, 0xa, 0x10, 0x3e, 0x24, 0x62, 0x78, 0x56, 0x4c, 0xce, 0xd4, 0xfa, 0xe0, 0xa6, 0xbc, 0x92, 0x88, 0x1e, 0x4, 0x2a, 0x30, 0x76, 0x6c, 0x42, 0x58, 0x73, 0x69, 0x47, 0x5d, 0x1b, 0x1, 0x2f, 0x35, 0xa3, 0xb9, 0x97, 0x8d, 0xcb, 0xd1, 0xff, 0xe5, 0xa9, 0xb3, 0x9d, 0x87, 0xc1, 0xdb, 0xf5, 0xef, 0x79, 0x63, 0x4d, 0x57, 0x11, 0xb, 0x25, 0x3f, 0x14, 0xe, 0x20, 0x3a, 0x7c, 0x66, 0x48, 0x52, 0xc4, 0xde, 0xf0, 0xea, 0xac, 0xb6, 0x98, 0x82, 0x81, 0x9b, 0xb5, 0xaf, 0xe9, 0xf3, 0xdd, 0xc7, 0x51, 0x4b, 0x65, 0x7f, 0x39, 0x23, 0xd, 0x17, 0x3c, 0x26, 0x8, 0x12, 0x54, 0x4e, 0x60, 0x7a, 0xec, 0xf6, 0xd8, 0xc2, 0x84, 0x9e, 0xb0, 0xaa, 0xe6, 0xfc, 0xd2, 0xc8, 0x8e, 0x94, 0xba, 0xa0, 0x36, 0x2c, 0x2, 0x18, 0x5e, 0x44, 0x6a, 0x70, 0x5b, 0x41, 0x6f, 0x75, 0x33, 0x29, 0x7, 0x1d, 0x8b, 0x91, 0xbf, 0xa5, 0xe3, 0xf9, 0xd7, 0xcd, 0x4f, 0x55, 0x7b, 0x61, 0x27, 0x3d, 0x13, 0x9, 0x9f, 0x85, 0xab, 0xb1, 0xf7, 0xed, 0xc3, 0xd9, 0xf2, 0xe8, 0xc6, 0xdc, 0x9a, 0x80, 0xae, 0xb4, 0x22, 0x38, 0x16, 0xc, 0x4a, 0x50, 0x7e, 0x64, 0x28, 0x32, 0x1c, 0x6, 0x40, 0x5a, 0x74, 0x6e, 0xf8, 0xe2, 0xcc, 0xd6, 0x90, 0x8a, 0xa4, 0xbe, 0x95, 0x8f, 0xa1, 0xbb, 0xfd, 0xe7, 0xc9, 0xd3, 0x45, 0x5f, 0x71, 0x6b, 0x2d, 0x37, 0x19, 0x3},
+ {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99, 0xad, 0xb6, 0x9b, 0x80, 0xc1, 0xda, 0xf7, 0xec, 0x75, 0x6e, 0x43, 0x58, 0x19, 0x2, 0x2f, 0x34, 0x47, 0x5c, 0x71, 0x6a, 0x2b, 0x30, 0x1d, 0x6, 0x9f, 0x84, 0xa9, 0xb2, 0xf3, 0xe8, 0xc5, 0xde, 0xea, 0xf1, 0xdc, 0xc7, 0x86, 0x9d, 0xb0, 0xab, 0x32, 0x29, 0x4, 0x1f, 0x5e, 0x45, 0x68, 0x73, 0x8e, 0x95, 0xb8, 0xa3, 0xe2, 0xf9, 0xd4, 0xcf, 0x56, 0x4d, 0x60, 0x7b, 0x3a, 0x21, 0xc, 0x17, 0x23, 0x38, 0x15, 0xe, 0x4f, 0x54, 0x79, 0x62, 0xfb, 0xe0, 0xcd, 0xd6, 0x97, 0x8c, 0xa1, 0xba, 0xc9, 0xd2, 0xff, 0xe4, 0xa5, 0xbe, 0x93, 0x88, 0x11, 0xa, 0x27, 0x3c, 0x7d, 0x66, 0x4b, 0x50, 0x64, 0x7f, 0x52, 0x49, 0x8, 0x13, 0x3e, 0x25, 0xbc, 0xa7, 0x8a, 0x91, 0xd0, 0xcb, 0xe6, 0xfd, 0x1, 0x1a, 0x37, 0x2c, 0x6d, 0x76, 0x5b, 0x40, 0xd9, 0xc2, 0xef, 0xf4, 0xb5, 0xae, 0x83, 0x98, 0xac, 0xb7, 0x9a, 0x81, 0xc0, 0xdb, 0xf6, 0xed, 0x74, 0x6f, 0x42, 0x59, 0x18, 0x3, 0x2e, 0x35, 0x46, 0x5d, 0x70, 0x6b, 0x2a, 0x31, 0x1c, 0x7, 0x9e, 0x85, 0xa8, 0xb3, 0xf2, 0xe9, 0xc4, 0xdf, 0xeb, 0xf0, 0xdd, 0xc6, 0x87, 0x9c, 0xb1, 0xaa, 0x33, 0x28, 0x5, 0x1e, 0x5f, 0x44, 0x69, 0x72, 0x8f, 0x94, 0xb9, 0xa2, 0xe3, 0xf8, 0xd5, 0xce, 0x57, 0x4c, 0x61, 0x7a, 0x3b, 0x20, 0xd, 0x16, 0x22, 0x39, 0x14, 0xf, 0x4e, 0x55, 0x78, 0x63, 0xfa, 0xe1, 0xcc, 0xd7, 0x96, 0x8d, 0xa0, 0xbb, 0xc8, 0xd3, 0xfe, 0xe5, 0xa4, 0xbf, 0x92, 0x89, 0x10, 0xb, 0x26, 0x3d, 0x7c, 0x67, 0x4a, 0x51, 0x65, 0x7e, 0x53, 0x48, 0x9, 0x12, 0x3f, 0x24, 0xbd, 0xa6, 0x8b, 0x90, 0xd1, 0xca, 0xe7, 0xfc},
+ {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4, 0xdd, 0xc1, 0xe5, 0xf9, 0xad, 0xb1, 0x95, 0x89, 0x3d, 0x21, 0x5, 0x19, 0x4d, 0x51, 0x75, 0x69, 0xa7, 0xbb, 0x9f, 0x83, 0xd7, 0xcb, 0xef, 0xf3, 0x47, 0x5b, 0x7f, 0x63, 0x37, 0x2b, 0xf, 0x13, 0x7a, 0x66, 0x42, 0x5e, 0xa, 0x16, 0x32, 0x2e, 0x9a, 0x86, 0xa2, 0xbe, 0xea, 0xf6, 0xd2, 0xce, 0x53, 0x4f, 0x6b, 0x77, 0x23, 0x3f, 0x1b, 0x7, 0xb3, 0xaf, 0x8b, 0x97, 0xc3, 0xdf, 0xfb, 0xe7, 0x8e, 0x92, 0xb6, 0xaa, 0xfe, 0xe2, 0xc6, 0xda, 0x6e, 0x72, 0x56, 0x4a, 0x1e, 0x2, 0x26, 0x3a, 0xf4, 0xe8, 0xcc, 0xd0, 0x84, 0x98, 0xbc, 0xa0, 0x14, 0x8, 0x2c, 0x30, 0x64, 0x78, 0x5c, 0x40, 0x29, 0x35, 0x11, 0xd, 0x59, 0x45, 0x61, 0x7d, 0xc9, 0xd5, 0xf1, 0xed, 0xb9, 0xa5, 0x81, 0x9d, 0xa6, 0xba, 0x9e, 0x82, 0xd6, 0xca, 0xee, 0xf2, 0x46, 0x5a, 0x7e, 0x62, 0x36, 0x2a, 0xe, 0x12, 0x7b, 0x67, 0x43, 0x5f, 0xb, 0x17, 0x33, 0x2f, 0x9b, 0x87, 0xa3, 0xbf, 0xeb, 0xf7, 0xd3, 0xcf, 0x1, 0x1d, 0x39, 0x25, 0x71, 0x6d, 0x49, 0x55, 0xe1, 0xfd, 0xd9, 0xc5, 0x91, 0x8d, 0xa9, 0xb5, 0xdc, 0xc0, 0xe4, 0xf8, 0xac, 0xb0, 0x94, 0x88, 0x3c, 0x20, 0x4, 0x18, 0x4c, 0x50, 0x74, 0x68, 0xf5, 0xe9, 0xcd, 0xd1, 0x85, 0x99, 0xbd, 0xa1, 0x15, 0x9, 0x2d, 0x31, 0x65, 0x79, 0x5d, 0x41, 0x28, 0x34, 0x10, 0xc, 0x58, 0x44, 0x60, 0x7c, 0xc8, 0xd4, 0xf0, 0xec, 0xb8, 0xa4, 0x80, 0x9c, 0x52, 0x4e, 0x6a, 0x76, 0x22, 0x3e, 0x1a, 0x6, 0xb2, 0xae, 0x8a, 0x96, 0xc2, 0xde, 0xfa, 0xe6, 0x8f, 0x93, 0xb7, 0xab, 0xff, 0xe3, 0xc7, 0xdb, 0x6f, 0x73, 0x57, 0x4b, 0x1f, 0x3, 0x27, 0x3b},
+ {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb, 0xcd, 0xd0, 0xf7, 0xea, 0xb9, 0xa4, 0x83, 0x9e, 0x25, 0x38, 0x1f, 0x2, 0x51, 0x4c, 0x6b, 0x76, 0x87, 0x9a, 0xbd, 0xa0, 0xf3, 0xee, 0xc9, 0xd4, 0x6f, 0x72, 0x55, 0x48, 0x1b, 0x6, 0x21, 0x3c, 0x4a, 0x57, 0x70, 0x6d, 0x3e, 0x23, 0x4, 0x19, 0xa2, 0xbf, 0x98, 0x85, 0xd6, 0xcb, 0xec, 0xf1, 0x13, 0xe, 0x29, 0x34, 0x67, 0x7a, 0x5d, 0x40, 0xfb, 0xe6, 0xc1, 0xdc, 0x8f, 0x92, 0xb5, 0xa8, 0xde, 0xc3, 0xe4, 0xf9, 0xaa, 0xb7, 0x90, 0x8d, 0x36, 0x2b, 0xc, 0x11, 0x42, 0x5f, 0x78, 0x65, 0x94, 0x89, 0xae, 0xb3, 0xe0, 0xfd, 0xda, 0xc7, 0x7c, 0x61, 0x46, 0x5b, 0x8, 0x15, 0x32, 0x2f, 0x59, 0x44, 0x63, 0x7e, 0x2d, 0x30, 0x17, 0xa, 0xb1, 0xac, 0x8b, 0x96, 0xc5, 0xd8, 0xff, 0xe2, 0x26, 0x3b, 0x1c, 0x1, 0x52, 0x4f, 0x68, 0x75, 0xce, 0xd3, 0xf4, 0xe9, 0xba, 0xa7, 0x80, 0x9d, 0xeb, 0xf6, 0xd1, 0xcc, 0x9f, 0x82, 0xa5, 0xb8, 0x3, 0x1e, 0x39, 0x24, 0x77, 0x6a, 0x4d, 0x50, 0xa1, 0xbc, 0x9b, 0x86, 0xd5, 0xc8, 0xef, 0xf2, 0x49, 0x54, 0x73, 0x6e, 0x3d, 0x20, 0x7, 0x1a, 0x6c, 0x71, 0x56, 0x4b, 0x18, 0x5, 0x22, 0x3f, 0x84, 0x99, 0xbe, 0xa3, 0xf0, 0xed, 0xca, 0xd7, 0x35, 0x28, 0xf, 0x12, 0x41, 0x5c, 0x7b, 0x66, 0xdd, 0xc0, 0xe7, 0xfa, 0xa9, 0xb4, 0x93, 0x8e, 0xf8, 0xe5, 0xc2, 0xdf, 0x8c, 0x91, 0xb6, 0xab, 0x10, 0xd, 0x2a, 0x37, 0x64, 0x79, 0x5e, 0x43, 0xb2, 0xaf, 0x88, 0x95, 0xc6, 0xdb, 0xfc, 0xe1, 0x5a, 0x47, 0x60, 0x7d, 0x2e, 0x33, 0x14, 0x9, 0x7f, 0x62, 0x45, 0x58, 0xb, 0x16, 0x31, 0x2c, 0x97, 0x8a, 0xad, 0xb0, 0xe3, 0xfe, 0xd9, 0xc4},
+ {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa, 0xfd, 0xe3, 0xc1, 0xdf, 0x85, 0x9b, 0xb9, 0xa7, 0xd, 0x13, 0x31, 0x2f, 0x75, 0x6b, 0x49, 0x57, 0xe7, 0xf9, 0xdb, 0xc5, 0x9f, 0x81, 0xa3, 0xbd, 0x17, 0x9, 0x2b, 0x35, 0x6f, 0x71, 0x53, 0x4d, 0x1a, 0x4, 0x26, 0x38, 0x62, 0x7c, 0x5e, 0x40, 0xea, 0xf4, 0xd6, 0xc8, 0x92, 0x8c, 0xae, 0xb0, 0xd3, 0xcd, 0xef, 0xf1, 0xab, 0xb5, 0x97, 0x89, 0x23, 0x3d, 0x1f, 0x1, 0x5b, 0x45, 0x67, 0x79, 0x2e, 0x30, 0x12, 0xc, 0x56, 0x48, 0x6a, 0x74, 0xde, 0xc0, 0xe2, 0xfc, 0xa6, 0xb8, 0x9a, 0x84, 0x34, 0x2a, 0x8, 0x16, 0x4c, 0x52, 0x70, 0x6e, 0xc4, 0xda, 0xf8, 0xe6, 0xbc, 0xa2, 0x80, 0x9e, 0xc9, 0xd7, 0xf5, 0xeb, 0xb1, 0xaf, 0x8d, 0x93, 0x39, 0x27, 0x5, 0x1b, 0x41, 0x5f, 0x7d, 0x63, 0xbb, 0xa5, 0x87, 0x99, 0xc3, 0xdd, 0xff, 0xe1, 0x4b, 0x55, 0x77, 0x69, 0x33, 0x2d, 0xf, 0x11, 0x46, 0x58, 0x7a, 0x64, 0x3e, 0x20, 0x2, 0x1c, 0xb6, 0xa8, 0x8a, 0x94, 0xce, 0xd0, 0xf2, 0xec, 0x5c, 0x42, 0x60, 0x7e, 0x24, 0x3a, 0x18, 0x6, 0xac, 0xb2, 0x90, 0x8e, 0xd4, 0xca, 0xe8, 0xf6, 0xa1, 0xbf, 0x9d, 0x83, 0xd9, 0xc7, 0xe5, 0xfb, 0x51, 0x4f, 0x6d, 0x73, 0x29, 0x37, 0x15, 0xb, 0x68, 0x76, 0x54, 0x4a, 0x10, 0xe, 0x2c, 0x32, 0x98, 0x86, 0xa4, 0xba, 0xe0, 0xfe, 0xdc, 0xc2, 0x95, 0x8b, 0xa9, 0xb7, 0xed, 0xf3, 0xd1, 0xcf, 0x65, 0x7b, 0x59, 0x47, 0x1d, 0x3, 0x21, 0x3f, 0x8f, 0x91, 0xb3, 0xad, 0xf7, 0xe9, 0xcb, 0xd5, 0x7f, 0x61, 0x43, 0x5d, 0x7, 0x19, 0x3b, 0x25, 0x72, 0x6c, 0x4e, 0x50, 0xa, 0x14, 0x36, 0x28, 0x82, 0x9c, 0xbe, 0xa0, 0xfa, 0xe4, 0xc6, 0xd8},
+ {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5, 0xed, 0xf2, 0xd3, 0xcc, 0x91, 0x8e, 0xaf, 0xb0, 0x15, 0xa, 0x2b, 0x34, 0x69, 0x76, 0x57, 0x48, 0xc7, 0xd8, 0xf9, 0xe6, 0xbb, 0xa4, 0x85, 0x9a, 0x3f, 0x20, 0x1, 0x1e, 0x43, 0x5c, 0x7d, 0x62, 0x2a, 0x35, 0x14, 0xb, 0x56, 0x49, 0x68, 0x77, 0xd2, 0xcd, 0xec, 0xf3, 0xae, 0xb1, 0x90, 0x8f, 0x93, 0x8c, 0xad, 0xb2, 0xef, 0xf0, 0xd1, 0xce, 0x6b, 0x74, 0x55, 0x4a, 0x17, 0x8, 0x29, 0x36, 0x7e, 0x61, 0x40, 0x5f, 0x2, 0x1d, 0x3c, 0x23, 0x86, 0x99, 0xb8, 0xa7, 0xfa, 0xe5, 0xc4, 0xdb, 0x54, 0x4b, 0x6a, 0x75, 0x28, 0x37, 0x16, 0x9, 0xac, 0xb3, 0x92, 0x8d, 0xd0, 0xcf, 0xee, 0xf1, 0xb9, 0xa6, 0x87, 0x98, 0xc5, 0xda, 0xfb, 0xe4, 0x41, 0x5e, 0x7f, 0x60, 0x3d, 0x22, 0x3, 0x1c, 0x3b, 0x24, 0x5, 0x1a, 0x47, 0x58, 0x79, 0x66, 0xc3, 0xdc, 0xfd, 0xe2, 0xbf, 0xa0, 0x81, 0x9e, 0xd6, 0xc9, 0xe8, 0xf7, 0xaa, 0xb5, 0x94, 0x8b, 0x2e, 0x31, 0x10, 0xf, 0x52, 0x4d, 0x6c, 0x73, 0xfc, 0xe3, 0xc2, 0xdd, 0x80, 0x9f, 0xbe, 0xa1, 0x4, 0x1b, 0x3a, 0x25, 0x78, 0x67, 0x46, 0x59, 0x11, 0xe, 0x2f, 0x30, 0x6d, 0x72, 0x53, 0x4c, 0xe9, 0xf6, 0xd7, 0xc8, 0x95, 0x8a, 0xab, 0xb4, 0xa8, 0xb7, 0x96, 0x89, 0xd4, 0xcb, 0xea, 0xf5, 0x50, 0x4f, 0x6e, 0x71, 0x2c, 0x33, 0x12, 0xd, 0x45, 0x5a, 0x7b, 0x64, 0x39, 0x26, 0x7, 0x18, 0xbd, 0xa2, 0x83, 0x9c, 0xc1, 0xde, 0xff, 0xe0, 0x6f, 0x70, 0x51, 0x4e, 0x13, 0xc, 0x2d, 0x32, 0x97, 0x88, 0xa9, 0xb6, 0xeb, 0xf4, 0xd5, 0xca, 0x82, 0x9d, 0xbc, 0xa3, 0xfe, 0xe1, 0xc0, 0xdf, 0x7a, 0x65, 0x44, 0x5b, 0x6, 0x19, 0x38, 0x27},
+ {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd, 0x3a, 0x1a, 0x7a, 0x5a, 0xba, 0x9a, 0xfa, 0xda, 0x27, 0x7, 0x67, 0x47, 0xa7, 0x87, 0xe7, 0xc7, 0x74, 0x54, 0x34, 0x14, 0xf4, 0xd4, 0xb4, 0x94, 0x69, 0x49, 0x29, 0x9, 0xe9, 0xc9, 0xa9, 0x89, 0x4e, 0x6e, 0xe, 0x2e, 0xce, 0xee, 0x8e, 0xae, 0x53, 0x73, 0x13, 0x33, 0xd3, 0xf3, 0x93, 0xb3, 0xe8, 0xc8, 0xa8, 0x88, 0x68, 0x48, 0x28, 0x8, 0xf5, 0xd5, 0xb5, 0x95, 0x75, 0x55, 0x35, 0x15, 0xd2, 0xf2, 0x92, 0xb2, 0x52, 0x72, 0x12, 0x32, 0xcf, 0xef, 0x8f, 0xaf, 0x4f, 0x6f, 0xf, 0x2f, 0x9c, 0xbc, 0xdc, 0xfc, 0x1c, 0x3c, 0x5c, 0x7c, 0x81, 0xa1, 0xc1, 0xe1, 0x1, 0x21, 0x41, 0x61, 0xa6, 0x86, 0xe6, 0xc6, 0x26, 0x6, 0x66, 0x46, 0xbb, 0x9b, 0xfb, 0xdb, 0x3b, 0x1b, 0x7b, 0x5b, 0xcd, 0xed, 0x8d, 0xad, 0x4d, 0x6d, 0xd, 0x2d, 0xd0, 0xf0, 0x90, 0xb0, 0x50, 0x70, 0x10, 0x30, 0xf7, 0xd7, 0xb7, 0x97, 0x77, 0x57, 0x37, 0x17, 0xea, 0xca, 0xaa, 0x8a, 0x6a, 0x4a, 0x2a, 0xa, 0xb9, 0x99, 0xf9, 0xd9, 0x39, 0x19, 0x79, 0x59, 0xa4, 0x84, 0xe4, 0xc4, 0x24, 0x4, 0x64, 0x44, 0x83, 0xa3, 0xc3, 0xe3, 0x3, 0x23, 0x43, 0x63, 0x9e, 0xbe, 0xde, 0xfe, 0x1e, 0x3e, 0x5e, 0x7e, 0x25, 0x5, 0x65, 0x45, 0xa5, 0x85, 0xe5, 0xc5, 0x38, 0x18, 0x78, 0x58, 0xb8, 0x98, 0xf8, 0xd8, 0x1f, 0x3f, 0x5f, 0x7f, 0x9f, 0xbf, 0xdf, 0xff, 0x2, 0x22, 0x42, 0x62, 0x82, 0xa2, 0xc2, 0xe2, 0x51, 0x71, 0x11, 0x31, 0xd1, 0xf1, 0x91, 0xb1, 0x4c, 0x6c, 0xc, 0x2c, 0xcc, 0xec, 0x8c, 0xac, 0x6b, 0x4b, 0x2b, 0xb, 0xeb, 0xcb, 0xab, 0x8b, 0x76, 0x56, 0x36, 0x16, 0xf6, 0xd6, 0xb6, 0x96},
+ {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2, 0x2a, 0xb, 0x68, 0x49, 0xae, 0x8f, 0xec, 0xcd, 0x3f, 0x1e, 0x7d, 0x5c, 0xbb, 0x9a, 0xf9, 0xd8, 0x54, 0x75, 0x16, 0x37, 0xd0, 0xf1, 0x92, 0xb3, 0x41, 0x60, 0x3, 0x22, 0xc5, 0xe4, 0x87, 0xa6, 0x7e, 0x5f, 0x3c, 0x1d, 0xfa, 0xdb, 0xb8, 0x99, 0x6b, 0x4a, 0x29, 0x8, 0xef, 0xce, 0xad, 0x8c, 0xa8, 0x89, 0xea, 0xcb, 0x2c, 0xd, 0x6e, 0x4f, 0xbd, 0x9c, 0xff, 0xde, 0x39, 0x18, 0x7b, 0x5a, 0x82, 0xa3, 0xc0, 0xe1, 0x6, 0x27, 0x44, 0x65, 0x97, 0xb6, 0xd5, 0xf4, 0x13, 0x32, 0x51, 0x70, 0xfc, 0xdd, 0xbe, 0x9f, 0x78, 0x59, 0x3a, 0x1b, 0xe9, 0xc8, 0xab, 0x8a, 0x6d, 0x4c, 0x2f, 0xe, 0xd6, 0xf7, 0x94, 0xb5, 0x52, 0x73, 0x10, 0x31, 0xc3, 0xe2, 0x81, 0xa0, 0x47, 0x66, 0x5, 0x24, 0x4d, 0x6c, 0xf, 0x2e, 0xc9, 0xe8, 0x8b, 0xaa, 0x58, 0x79, 0x1a, 0x3b, 0xdc, 0xfd, 0x9e, 0xbf, 0x67, 0x46, 0x25, 0x4, 0xe3, 0xc2, 0xa1, 0x80, 0x72, 0x53, 0x30, 0x11, 0xf6, 0xd7, 0xb4, 0x95, 0x19, 0x38, 0x5b, 0x7a, 0x9d, 0xbc, 0xdf, 0xfe, 0xc, 0x2d, 0x4e, 0x6f, 0x88, 0xa9, 0xca, 0xeb, 0x33, 0x12, 0x71, 0x50, 0xb7, 0x96, 0xf5, 0xd4, 0x26, 0x7, 0x64, 0x45, 0xa2, 0x83, 0xe0, 0xc1, 0xe5, 0xc4, 0xa7, 0x86, 0x61, 0x40, 0x23, 0x2, 0xf0, 0xd1, 0xb2, 0x93, 0x74, 0x55, 0x36, 0x17, 0xcf, 0xee, 0x8d, 0xac, 0x4b, 0x6a, 0x9, 0x28, 0xda, 0xfb, 0x98, 0xb9, 0x5e, 0x7f, 0x1c, 0x3d, 0xb1, 0x90, 0xf3, 0xd2, 0x35, 0x14, 0x77, 0x56, 0xa4, 0x85, 0xe6, 0xc7, 0x20, 0x1, 0x62, 0x43, 0x9b, 0xba, 0xd9, 0xf8, 0x1f, 0x3e, 0x5d, 0x7c, 0x8e, 0xaf, 0xcc, 0xed, 0xa, 0x2b, 0x48, 0x69},
+ {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3, 0x1a, 0x38, 0x5e, 0x7c, 0x92, 0xb0, 0xd6, 0xf4, 0x17, 0x35, 0x53, 0x71, 0x9f, 0xbd, 0xdb, 0xf9, 0x34, 0x16, 0x70, 0x52, 0xbc, 0x9e, 0xf8, 0xda, 0x39, 0x1b, 0x7d, 0x5f, 0xb1, 0x93, 0xf5, 0xd7, 0x2e, 0xc, 0x6a, 0x48, 0xa6, 0x84, 0xe2, 0xc0, 0x23, 0x1, 0x67, 0x45, 0xab, 0x89, 0xef, 0xcd, 0x68, 0x4a, 0x2c, 0xe, 0xe0, 0xc2, 0xa4, 0x86, 0x65, 0x47, 0x21, 0x3, 0xed, 0xcf, 0xa9, 0x8b, 0x72, 0x50, 0x36, 0x14, 0xfa, 0xd8, 0xbe, 0x9c, 0x7f, 0x5d, 0x3b, 0x19, 0xf7, 0xd5, 0xb3, 0x91, 0x5c, 0x7e, 0x18, 0x3a, 0xd4, 0xf6, 0x90, 0xb2, 0x51, 0x73, 0x15, 0x37, 0xd9, 0xfb, 0x9d, 0xbf, 0x46, 0x64, 0x2, 0x20, 0xce, 0xec, 0x8a, 0xa8, 0x4b, 0x69, 0xf, 0x2d, 0xc3, 0xe1, 0x87, 0xa5, 0xd0, 0xf2, 0x94, 0xb6, 0x58, 0x7a, 0x1c, 0x3e, 0xdd, 0xff, 0x99, 0xbb, 0x55, 0x77, 0x11, 0x33, 0xca, 0xe8, 0x8e, 0xac, 0x42, 0x60, 0x6, 0x24, 0xc7, 0xe5, 0x83, 0xa1, 0x4f, 0x6d, 0xb, 0x29, 0xe4, 0xc6, 0xa0, 0x82, 0x6c, 0x4e, 0x28, 0xa, 0xe9, 0xcb, 0xad, 0x8f, 0x61, 0x43, 0x25, 0x7, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0xf3, 0xd1, 0xb7, 0x95, 0x7b, 0x59, 0x3f, 0x1d, 0xb8, 0x9a, 0xfc, 0xde, 0x30, 0x12, 0x74, 0x56, 0xb5, 0x97, 0xf1, 0xd3, 0x3d, 0x1f, 0x79, 0x5b, 0xa2, 0x80, 0xe6, 0xc4, 0x2a, 0x8, 0x6e, 0x4c, 0xaf, 0x8d, 0xeb, 0xc9, 0x27, 0x5, 0x63, 0x41, 0x8c, 0xae, 0xc8, 0xea, 0x4, 0x26, 0x40, 0x62, 0x81, 0xa3, 0xc5, 0xe7, 0x9, 0x2b, 0x4d, 0x6f, 0x96, 0xb4, 0xd2, 0xf0, 0x1e, 0x3c, 0x5a, 0x78, 0x9b, 0xb9, 0xdf, 0xfd, 0x13, 0x31, 0x57, 0x75},
+ {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec, 0xa, 0x29, 0x4c, 0x6f, 0x86, 0xa5, 0xc0, 0xe3, 0xf, 0x2c, 0x49, 0x6a, 0x83, 0xa0, 0xc5, 0xe6, 0x14, 0x37, 0x52, 0x71, 0x98, 0xbb, 0xde, 0xfd, 0x11, 0x32, 0x57, 0x74, 0x9d, 0xbe, 0xdb, 0xf8, 0x1e, 0x3d, 0x58, 0x7b, 0x92, 0xb1, 0xd4, 0xf7, 0x1b, 0x38, 0x5d, 0x7e, 0x97, 0xb4, 0xd1, 0xf2, 0x28, 0xb, 0x6e, 0x4d, 0xa4, 0x87, 0xe2, 0xc1, 0x2d, 0xe, 0x6b, 0x48, 0xa1, 0x82, 0xe7, 0xc4, 0x22, 0x1, 0x64, 0x47, 0xae, 0x8d, 0xe8, 0xcb, 0x27, 0x4, 0x61, 0x42, 0xab, 0x88, 0xed, 0xce, 0x3c, 0x1f, 0x7a, 0x59, 0xb0, 0x93, 0xf6, 0xd5, 0x39, 0x1a, 0x7f, 0x5c, 0xb5, 0x96, 0xf3, 0xd0, 0x36, 0x15, 0x70, 0x53, 0xba, 0x99, 0xfc, 0xdf, 0x33, 0x10, 0x75, 0x56, 0xbf, 0x9c, 0xf9, 0xda, 0x50, 0x73, 0x16, 0x35, 0xdc, 0xff, 0x9a, 0xb9, 0x55, 0x76, 0x13, 0x30, 0xd9, 0xfa, 0x9f, 0xbc, 0x5a, 0x79, 0x1c, 0x3f, 0xd6, 0xf5, 0x90, 0xb3, 0x5f, 0x7c, 0x19, 0x3a, 0xd3, 0xf0, 0x95, 0xb6, 0x44, 0x67, 0x2, 0x21, 0xc8, 0xeb, 0x8e, 0xad, 0x41, 0x62, 0x7, 0x24, 0xcd, 0xee, 0x8b, 0xa8, 0x4e, 0x6d, 0x8, 0x2b, 0xc2, 0xe1, 0x84, 0xa7, 0x4b, 0x68, 0xd, 0x2e, 0xc7, 0xe4, 0x81, 0xa2, 0x78, 0x5b, 0x3e, 0x1d, 0xf4, 0xd7, 0xb2, 0x91, 0x7d, 0x5e, 0x3b, 0x18, 0xf1, 0xd2, 0xb7, 0x94, 0x72, 0x51, 0x34, 0x17, 0xfe, 0xdd, 0xb8, 0x9b, 0x77, 0x54, 0x31, 0x12, 0xfb, 0xd8, 0xbd, 0x9e, 0x6c, 0x4f, 0x2a, 0x9, 0xe0, 0xc3, 0xa6, 0x85, 0x69, 0x4a, 0x2f, 0xc, 0xe5, 0xc6, 0xa3, 0x80, 0x66, 0x45, 0x20, 0x3, 0xea, 0xc9, 0xac, 0x8f, 0x63, 0x40, 0x25, 0x6, 0xef, 0xcc, 0xa9, 0x8a},
+ {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1, 0x7a, 0x5e, 0x32, 0x16, 0xea, 0xce, 0xa2, 0x86, 0x47, 0x63, 0xf, 0x2b, 0xd7, 0xf3, 0x9f, 0xbb, 0xf4, 0xd0, 0xbc, 0x98, 0x64, 0x40, 0x2c, 0x8, 0xc9, 0xed, 0x81, 0xa5, 0x59, 0x7d, 0x11, 0x35, 0x8e, 0xaa, 0xc6, 0xe2, 0x1e, 0x3a, 0x56, 0x72, 0xb3, 0x97, 0xfb, 0xdf, 0x23, 0x7, 0x6b, 0x4f, 0xf5, 0xd1, 0xbd, 0x99, 0x65, 0x41, 0x2d, 0x9, 0xc8, 0xec, 0x80, 0xa4, 0x58, 0x7c, 0x10, 0x34, 0x8f, 0xab, 0xc7, 0xe3, 0x1f, 0x3b, 0x57, 0x73, 0xb2, 0x96, 0xfa, 0xde, 0x22, 0x6, 0x6a, 0x4e, 0x1, 0x25, 0x49, 0x6d, 0x91, 0xb5, 0xd9, 0xfd, 0x3c, 0x18, 0x74, 0x50, 0xac, 0x88, 0xe4, 0xc0, 0x7b, 0x5f, 0x33, 0x17, 0xeb, 0xcf, 0xa3, 0x87, 0x46, 0x62, 0xe, 0x2a, 0xd6, 0xf2, 0x9e, 0xba, 0xf7, 0xd3, 0xbf, 0x9b, 0x67, 0x43, 0x2f, 0xb, 0xca, 0xee, 0x82, 0xa6, 0x5a, 0x7e, 0x12, 0x36, 0x8d, 0xa9, 0xc5, 0xe1, 0x1d, 0x39, 0x55, 0x71, 0xb0, 0x94, 0xf8, 0xdc, 0x20, 0x4, 0x68, 0x4c, 0x3, 0x27, 0x4b, 0x6f, 0x93, 0xb7, 0xdb, 0xff, 0x3e, 0x1a, 0x76, 0x52, 0xae, 0x8a, 0xe6, 0xc2, 0x79, 0x5d, 0x31, 0x15, 0xe9, 0xcd, 0xa1, 0x85, 0x44, 0x60, 0xc, 0x28, 0xd4, 0xf0, 0x9c, 0xb8, 0x2, 0x26, 0x4a, 0x6e, 0x92, 0xb6, 0xda, 0xfe, 0x3f, 0x1b, 0x77, 0x53, 0xaf, 0x8b, 0xe7, 0xc3, 0x78, 0x5c, 0x30, 0x14, 0xe8, 0xcc, 0xa0, 0x84, 0x45, 0x61, 0xd, 0x29, 0xd5, 0xf1, 0x9d, 0xb9, 0xf6, 0xd2, 0xbe, 0x9a, 0x66, 0x42, 0x2e, 0xa, 0xcb, 0xef, 0x83, 0xa7, 0x5b, 0x7f, 0x13, 0x37, 0x8c, 0xa8, 0xc4, 0xe0, 0x1c, 0x38, 0x54, 0x70, 0xb1, 0x95, 0xf9, 0xdd, 0x21, 0x5, 0x69, 0x4d},
+ {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce, 0x6a, 0x4f, 0x20, 0x5, 0xfe, 0xdb, 0xb4, 0x91, 0x5f, 0x7a, 0x15, 0x30, 0xcb, 0xee, 0x81, 0xa4, 0xd4, 0xf1, 0x9e, 0xbb, 0x40, 0x65, 0xa, 0x2f, 0xe1, 0xc4, 0xab, 0x8e, 0x75, 0x50, 0x3f, 0x1a, 0xbe, 0x9b, 0xf4, 0xd1, 0x2a, 0xf, 0x60, 0x45, 0x8b, 0xae, 0xc1, 0xe4, 0x1f, 0x3a, 0x55, 0x70, 0xb5, 0x90, 0xff, 0xda, 0x21, 0x4, 0x6b, 0x4e, 0x80, 0xa5, 0xca, 0xef, 0x14, 0x31, 0x5e, 0x7b, 0xdf, 0xfa, 0x95, 0xb0, 0x4b, 0x6e, 0x1, 0x24, 0xea, 0xcf, 0xa0, 0x85, 0x7e, 0x5b, 0x34, 0x11, 0x61, 0x44, 0x2b, 0xe, 0xf5, 0xd0, 0xbf, 0x9a, 0x54, 0x71, 0x1e, 0x3b, 0xc0, 0xe5, 0x8a, 0xaf, 0xb, 0x2e, 0x41, 0x64, 0x9f, 0xba, 0xd5, 0xf0, 0x3e, 0x1b, 0x74, 0x51, 0xaa, 0x8f, 0xe0, 0xc5, 0x77, 0x52, 0x3d, 0x18, 0xe3, 0xc6, 0xa9, 0x8c, 0x42, 0x67, 0x8, 0x2d, 0xd6, 0xf3, 0x9c, 0xb9, 0x1d, 0x38, 0x57, 0x72, 0x89, 0xac, 0xc3, 0xe6, 0x28, 0xd, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3, 0xa3, 0x86, 0xe9, 0xcc, 0x37, 0x12, 0x7d, 0x58, 0x96, 0xb3, 0xdc, 0xf9, 0x2, 0x27, 0x48, 0x6d, 0xc9, 0xec, 0x83, 0xa6, 0x5d, 0x78, 0x17, 0x32, 0xfc, 0xd9, 0xb6, 0x93, 0x68, 0x4d, 0x22, 0x7, 0xc2, 0xe7, 0x88, 0xad, 0x56, 0x73, 0x1c, 0x39, 0xf7, 0xd2, 0xbd, 0x98, 0x63, 0x46, 0x29, 0xc, 0xa8, 0x8d, 0xe2, 0xc7, 0x3c, 0x19, 0x76, 0x53, 0x9d, 0xb8, 0xd7, 0xf2, 0x9, 0x2c, 0x43, 0x66, 0x16, 0x33, 0x5c, 0x79, 0x82, 0xa7, 0xc8, 0xed, 0x23, 0x6, 0x69, 0x4c, 0xb7, 0x92, 0xfd, 0xd8, 0x7c, 0x59, 0x36, 0x13, 0xe8, 0xcd, 0xa2, 0x87, 0x49, 0x6c, 0x3, 0x26, 0xdd, 0xf8, 0x97, 0xb2},
+ {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf, 0x5a, 0x7c, 0x16, 0x30, 0xc2, 0xe4, 0x8e, 0xa8, 0x77, 0x51, 0x3b, 0x1d, 0xef, 0xc9, 0xa3, 0x85, 0xb4, 0x92, 0xf8, 0xde, 0x2c, 0xa, 0x60, 0x46, 0x99, 0xbf, 0xd5, 0xf3, 0x1, 0x27, 0x4d, 0x6b, 0xee, 0xc8, 0xa2, 0x84, 0x76, 0x50, 0x3a, 0x1c, 0xc3, 0xe5, 0x8f, 0xa9, 0x5b, 0x7d, 0x17, 0x31, 0x75, 0x53, 0x39, 0x1f, 0xed, 0xcb, 0xa1, 0x87, 0x58, 0x7e, 0x14, 0x32, 0xc0, 0xe6, 0x8c, 0xaa, 0x2f, 0x9, 0x63, 0x45, 0xb7, 0x91, 0xfb, 0xdd, 0x2, 0x24, 0x4e, 0x68, 0x9a, 0xbc, 0xd6, 0xf0, 0xc1, 0xe7, 0x8d, 0xab, 0x59, 0x7f, 0x15, 0x33, 0xec, 0xca, 0xa0, 0x86, 0x74, 0x52, 0x38, 0x1e, 0x9b, 0xbd, 0xd7, 0xf1, 0x3, 0x25, 0x4f, 0x69, 0xb6, 0x90, 0xfa, 0xdc, 0x2e, 0x8, 0x62, 0x44, 0xea, 0xcc, 0xa6, 0x80, 0x72, 0x54, 0x3e, 0x18, 0xc7, 0xe1, 0x8b, 0xad, 0x5f, 0x79, 0x13, 0x35, 0xb0, 0x96, 0xfc, 0xda, 0x28, 0xe, 0x64, 0x42, 0x9d, 0xbb, 0xd1, 0xf7, 0x5, 0x23, 0x49, 0x6f, 0x5e, 0x78, 0x12, 0x34, 0xc6, 0xe0, 0x8a, 0xac, 0x73, 0x55, 0x3f, 0x19, 0xeb, 0xcd, 0xa7, 0x81, 0x4, 0x22, 0x48, 0x6e, 0x9c, 0xba, 0xd0, 0xf6, 0x29, 0xf, 0x65, 0x43, 0xb1, 0x97, 0xfd, 0xdb, 0x9f, 0xb9, 0xd3, 0xf5, 0x7, 0x21, 0x4b, 0x6d, 0xb2, 0x94, 0xfe, 0xd8, 0x2a, 0xc, 0x66, 0x40, 0xc5, 0xe3, 0x89, 0xaf, 0x5d, 0x7b, 0x11, 0x37, 0xe8, 0xce, 0xa4, 0x82, 0x70, 0x56, 0x3c, 0x1a, 0x2b, 0xd, 0x67, 0x41, 0xb3, 0x95, 0xff, 0xd9, 0x6, 0x20, 0x4a, 0x6c, 0x9e, 0xb8, 0xd2, 0xf4, 0x71, 0x57, 0x3d, 0x1b, 0xe9, 0xcf, 0xa5, 0x83, 0x5c, 0x7a, 0x10, 0x36, 0xc4, 0xe2, 0x88, 0xae},
+ {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0, 0x4a, 0x6d, 0x4, 0x23, 0xd6, 0xf1, 0x98, 0xbf, 0x6f, 0x48, 0x21, 0x6, 0xf3, 0xd4, 0xbd, 0x9a, 0x94, 0xb3, 0xda, 0xfd, 0x8, 0x2f, 0x46, 0x61, 0xb1, 0x96, 0xff, 0xd8, 0x2d, 0xa, 0x63, 0x44, 0xde, 0xf9, 0x90, 0xb7, 0x42, 0x65, 0xc, 0x2b, 0xfb, 0xdc, 0xb5, 0x92, 0x67, 0x40, 0x29, 0xe, 0x35, 0x12, 0x7b, 0x5c, 0xa9, 0x8e, 0xe7, 0xc0, 0x10, 0x37, 0x5e, 0x79, 0x8c, 0xab, 0xc2, 0xe5, 0x7f, 0x58, 0x31, 0x16, 0xe3, 0xc4, 0xad, 0x8a, 0x5a, 0x7d, 0x14, 0x33, 0xc6, 0xe1, 0x88, 0xaf, 0xa1, 0x86, 0xef, 0xc8, 0x3d, 0x1a, 0x73, 0x54, 0x84, 0xa3, 0xca, 0xed, 0x18, 0x3f, 0x56, 0x71, 0xeb, 0xcc, 0xa5, 0x82, 0x77, 0x50, 0x39, 0x1e, 0xce, 0xe9, 0x80, 0xa7, 0x52, 0x75, 0x1c, 0x3b, 0x6a, 0x4d, 0x24, 0x3, 0xf6, 0xd1, 0xb8, 0x9f, 0x4f, 0x68, 0x1, 0x26, 0xd3, 0xf4, 0x9d, 0xba, 0x20, 0x7, 0x6e, 0x49, 0xbc, 0x9b, 0xf2, 0xd5, 0x5, 0x22, 0x4b, 0x6c, 0x99, 0xbe, 0xd7, 0xf0, 0xfe, 0xd9, 0xb0, 0x97, 0x62, 0x45, 0x2c, 0xb, 0xdb, 0xfc, 0x95, 0xb2, 0x47, 0x60, 0x9, 0x2e, 0xb4, 0x93, 0xfa, 0xdd, 0x28, 0xf, 0x66, 0x41, 0x91, 0xb6, 0xdf, 0xf8, 0xd, 0x2a, 0x43, 0x64, 0x5f, 0x78, 0x11, 0x36, 0xc3, 0xe4, 0x8d, 0xaa, 0x7a, 0x5d, 0x34, 0x13, 0xe6, 0xc1, 0xa8, 0x8f, 0x15, 0x32, 0x5b, 0x7c, 0x89, 0xae, 0xc7, 0xe0, 0x30, 0x17, 0x7e, 0x59, 0xac, 0x8b, 0xe2, 0xc5, 0xcb, 0xec, 0x85, 0xa2, 0x57, 0x70, 0x19, 0x3e, 0xee, 0xc9, 0xa0, 0x87, 0x72, 0x55, 0x3c, 0x1b, 0x81, 0xa6, 0xcf, 0xe8, 0x1d, 0x3a, 0x53, 0x74, 0xa4, 0x83, 0xea, 0xcd, 0x38, 0x1f, 0x76, 0x51},
+ {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85, 0xba, 0x92, 0xea, 0xc2, 0x1a, 0x32, 0x4a, 0x62, 0xe7, 0xcf, 0xb7, 0x9f, 0x47, 0x6f, 0x17, 0x3f, 0x69, 0x41, 0x39, 0x11, 0xc9, 0xe1, 0x99, 0xb1, 0x34, 0x1c, 0x64, 0x4c, 0x94, 0xbc, 0xc4, 0xec, 0xd3, 0xfb, 0x83, 0xab, 0x73, 0x5b, 0x23, 0xb, 0x8e, 0xa6, 0xde, 0xf6, 0x2e, 0x6, 0x7e, 0x56, 0xd2, 0xfa, 0x82, 0xaa, 0x72, 0x5a, 0x22, 0xa, 0x8f, 0xa7, 0xdf, 0xf7, 0x2f, 0x7, 0x7f, 0x57, 0x68, 0x40, 0x38, 0x10, 0xc8, 0xe0, 0x98, 0xb0, 0x35, 0x1d, 0x65, 0x4d, 0x95, 0xbd, 0xc5, 0xed, 0xbb, 0x93, 0xeb, 0xc3, 0x1b, 0x33, 0x4b, 0x63, 0xe6, 0xce, 0xb6, 0x9e, 0x46, 0x6e, 0x16, 0x3e, 0x1, 0x29, 0x51, 0x79, 0xa1, 0x89, 0xf1, 0xd9, 0x5c, 0x74, 0xc, 0x24, 0xfc, 0xd4, 0xac, 0x84, 0xb9, 0x91, 0xe9, 0xc1, 0x19, 0x31, 0x49, 0x61, 0xe4, 0xcc, 0xb4, 0x9c, 0x44, 0x6c, 0x14, 0x3c, 0x3, 0x2b, 0x53, 0x7b, 0xa3, 0x8b, 0xf3, 0xdb, 0x5e, 0x76, 0xe, 0x26, 0xfe, 0xd6, 0xae, 0x86, 0xd0, 0xf8, 0x80, 0xa8, 0x70, 0x58, 0x20, 0x8, 0x8d, 0xa5, 0xdd, 0xf5, 0x2d, 0x5, 0x7d, 0x55, 0x6a, 0x42, 0x3a, 0x12, 0xca, 0xe2, 0x9a, 0xb2, 0x37, 0x1f, 0x67, 0x4f, 0x97, 0xbf, 0xc7, 0xef, 0x6b, 0x43, 0x3b, 0x13, 0xcb, 0xe3, 0x9b, 0xb3, 0x36, 0x1e, 0x66, 0x4e, 0x96, 0xbe, 0xc6, 0xee, 0xd1, 0xf9, 0x81, 0xa9, 0x71, 0x59, 0x21, 0x9, 0x8c, 0xa4, 0xdc, 0xf4, 0x2c, 0x4, 0x7c, 0x54, 0x2, 0x2a, 0x52, 0x7a, 0xa2, 0x8a, 0xf2, 0xda, 0x5f, 0x77, 0xf, 0x27, 0xff, 0xd7, 0xaf, 0x87, 0xb8, 0x90, 0xe8, 0xc0, 0x18, 0x30, 0x48, 0x60, 0xe5, 0xcd, 0xb5, 0x9d, 0x45, 0x6d, 0x15, 0x3d},
+ {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a, 0xaa, 0x83, 0xf8, 0xd1, 0xe, 0x27, 0x5c, 0x75, 0xff, 0xd6, 0xad, 0x84, 0x5b, 0x72, 0x9, 0x20, 0x49, 0x60, 0x1b, 0x32, 0xed, 0xc4, 0xbf, 0x96, 0x1c, 0x35, 0x4e, 0x67, 0xb8, 0x91, 0xea, 0xc3, 0xe3, 0xca, 0xb1, 0x98, 0x47, 0x6e, 0x15, 0x3c, 0xb6, 0x9f, 0xe4, 0xcd, 0x12, 0x3b, 0x40, 0x69, 0x92, 0xbb, 0xc0, 0xe9, 0x36, 0x1f, 0x64, 0x4d, 0xc7, 0xee, 0x95, 0xbc, 0x63, 0x4a, 0x31, 0x18, 0x38, 0x11, 0x6a, 0x43, 0x9c, 0xb5, 0xce, 0xe7, 0x6d, 0x44, 0x3f, 0x16, 0xc9, 0xe0, 0x9b, 0xb2, 0xdb, 0xf2, 0x89, 0xa0, 0x7f, 0x56, 0x2d, 0x4, 0x8e, 0xa7, 0xdc, 0xf5, 0x2a, 0x3, 0x78, 0x51, 0x71, 0x58, 0x23, 0xa, 0xd5, 0xfc, 0x87, 0xae, 0x24, 0xd, 0x76, 0x5f, 0x80, 0xa9, 0xd2, 0xfb, 0x39, 0x10, 0x6b, 0x42, 0x9d, 0xb4, 0xcf, 0xe6, 0x6c, 0x45, 0x3e, 0x17, 0xc8, 0xe1, 0x9a, 0xb3, 0x93, 0xba, 0xc1, 0xe8, 0x37, 0x1e, 0x65, 0x4c, 0xc6, 0xef, 0x94, 0xbd, 0x62, 0x4b, 0x30, 0x19, 0x70, 0x59, 0x22, 0xb, 0xd4, 0xfd, 0x86, 0xaf, 0x25, 0xc, 0x77, 0x5e, 0x81, 0xa8, 0xd3, 0xfa, 0xda, 0xf3, 0x88, 0xa1, 0x7e, 0x57, 0x2c, 0x5, 0x8f, 0xa6, 0xdd, 0xf4, 0x2b, 0x2, 0x79, 0x50, 0xab, 0x82, 0xf9, 0xd0, 0xf, 0x26, 0x5d, 0x74, 0xfe, 0xd7, 0xac, 0x85, 0x5a, 0x73, 0x8, 0x21, 0x1, 0x28, 0x53, 0x7a, 0xa5, 0x8c, 0xf7, 0xde, 0x54, 0x7d, 0x6, 0x2f, 0xf0, 0xd9, 0xa2, 0x8b, 0xe2, 0xcb, 0xb0, 0x99, 0x46, 0x6f, 0x14, 0x3d, 0xb7, 0x9e, 0xe5, 0xcc, 0x13, 0x3a, 0x41, 0x68, 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97, 0x1d, 0x34, 0x4f, 0x66, 0xb9, 0x90, 0xeb, 0xc2},
+ {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b, 0x9a, 0xb0, 0xce, 0xe4, 0x32, 0x18, 0x66, 0x4c, 0xd7, 0xfd, 0x83, 0xa9, 0x7f, 0x55, 0x2b, 0x1, 0x29, 0x3, 0x7d, 0x57, 0x81, 0xab, 0xd5, 0xff, 0x64, 0x4e, 0x30, 0x1a, 0xcc, 0xe6, 0x98, 0xb2, 0xb3, 0x99, 0xe7, 0xcd, 0x1b, 0x31, 0x4f, 0x65, 0xfe, 0xd4, 0xaa, 0x80, 0x56, 0x7c, 0x2, 0x28, 0x52, 0x78, 0x6, 0x2c, 0xfa, 0xd0, 0xae, 0x84, 0x1f, 0x35, 0x4b, 0x61, 0xb7, 0x9d, 0xe3, 0xc9, 0xc8, 0xe2, 0x9c, 0xb6, 0x60, 0x4a, 0x34, 0x1e, 0x85, 0xaf, 0xd1, 0xfb, 0x2d, 0x7, 0x79, 0x53, 0x7b, 0x51, 0x2f, 0x5, 0xd3, 0xf9, 0x87, 0xad, 0x36, 0x1c, 0x62, 0x48, 0x9e, 0xb4, 0xca, 0xe0, 0xe1, 0xcb, 0xb5, 0x9f, 0x49, 0x63, 0x1d, 0x37, 0xac, 0x86, 0xf8, 0xd2, 0x4, 0x2e, 0x50, 0x7a, 0xa4, 0x8e, 0xf0, 0xda, 0xc, 0x26, 0x58, 0x72, 0xe9, 0xc3, 0xbd, 0x97, 0x41, 0x6b, 0x15, 0x3f, 0x3e, 0x14, 0x6a, 0x40, 0x96, 0xbc, 0xc2, 0xe8, 0x73, 0x59, 0x27, 0xd, 0xdb, 0xf1, 0x8f, 0xa5, 0x8d, 0xa7, 0xd9, 0xf3, 0x25, 0xf, 0x71, 0x5b, 0xc0, 0xea, 0x94, 0xbe, 0x68, 0x42, 0x3c, 0x16, 0x17, 0x3d, 0x43, 0x69, 0xbf, 0x95, 0xeb, 0xc1, 0x5a, 0x70, 0xe, 0x24, 0xf2, 0xd8, 0xa6, 0x8c, 0xf6, 0xdc, 0xa2, 0x88, 0x5e, 0x74, 0xa, 0x20, 0xbb, 0x91, 0xef, 0xc5, 0x13, 0x39, 0x47, 0x6d, 0x6c, 0x46, 0x38, 0x12, 0xc4, 0xee, 0x90, 0xba, 0x21, 0xb, 0x75, 0x5f, 0x89, 0xa3, 0xdd, 0xf7, 0xdf, 0xf5, 0x8b, 0xa1, 0x77, 0x5d, 0x23, 0x9, 0x92, 0xb8, 0xc6, 0xec, 0x3a, 0x10, 0x6e, 0x44, 0x45, 0x6f, 0x11, 0x3b, 0xed, 0xc7, 0xb9, 0x93, 0x8, 0x22, 0x5c, 0x76, 0xa0, 0x8a, 0xf4, 0xde},
+ {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94, 0x8a, 0xa1, 0xdc, 0xf7, 0x26, 0xd, 0x70, 0x5b, 0xcf, 0xe4, 0x99, 0xb2, 0x63, 0x48, 0x35, 0x1e, 0x9, 0x22, 0x5f, 0x74, 0xa5, 0x8e, 0xf3, 0xd8, 0x4c, 0x67, 0x1a, 0x31, 0xe0, 0xcb, 0xb6, 0x9d, 0x83, 0xa8, 0xd5, 0xfe, 0x2f, 0x4, 0x79, 0x52, 0xc6, 0xed, 0x90, 0xbb, 0x6a, 0x41, 0x3c, 0x17, 0x12, 0x39, 0x44, 0x6f, 0xbe, 0x95, 0xe8, 0xc3, 0x57, 0x7c, 0x1, 0x2a, 0xfb, 0xd0, 0xad, 0x86, 0x98, 0xb3, 0xce, 0xe5, 0x34, 0x1f, 0x62, 0x49, 0xdd, 0xf6, 0x8b, 0xa0, 0x71, 0x5a, 0x27, 0xc, 0x1b, 0x30, 0x4d, 0x66, 0xb7, 0x9c, 0xe1, 0xca, 0x5e, 0x75, 0x8, 0x23, 0xf2, 0xd9, 0xa4, 0x8f, 0x91, 0xba, 0xc7, 0xec, 0x3d, 0x16, 0x6b, 0x40, 0xd4, 0xff, 0x82, 0xa9, 0x78, 0x53, 0x2e, 0x5, 0x24, 0xf, 0x72, 0x59, 0x88, 0xa3, 0xde, 0xf5, 0x61, 0x4a, 0x37, 0x1c, 0xcd, 0xe6, 0x9b, 0xb0, 0xae, 0x85, 0xf8, 0xd3, 0x2, 0x29, 0x54, 0x7f, 0xeb, 0xc0, 0xbd, 0x96, 0x47, 0x6c, 0x11, 0x3a, 0x2d, 0x6, 0x7b, 0x50, 0x81, 0xaa, 0xd7, 0xfc, 0x68, 0x43, 0x3e, 0x15, 0xc4, 0xef, 0x92, 0xb9, 0xa7, 0x8c, 0xf1, 0xda, 0xb, 0x20, 0x5d, 0x76, 0xe2, 0xc9, 0xb4, 0x9f, 0x4e, 0x65, 0x18, 0x33, 0x36, 0x1d, 0x60, 0x4b, 0x9a, 0xb1, 0xcc, 0xe7, 0x73, 0x58, 0x25, 0xe, 0xdf, 0xf4, 0x89, 0xa2, 0xbc, 0x97, 0xea, 0xc1, 0x10, 0x3b, 0x46, 0x6d, 0xf9, 0xd2, 0xaf, 0x84, 0x55, 0x7e, 0x3, 0x28, 0x3f, 0x14, 0x69, 0x42, 0x93, 0xb8, 0xc5, 0xee, 0x7a, 0x51, 0x2c, 0x7, 0xd6, 0xfd, 0x80, 0xab, 0xb5, 0x9e, 0xe3, 0xc8, 0x19, 0x32, 0x4f, 0x64, 0xf0, 0xdb, 0xa6, 0x8d, 0x5c, 0x77, 0xa, 0x21},
+ {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9, 0xfa, 0xd6, 0xa2, 0x8e, 0x4a, 0x66, 0x12, 0x3e, 0x87, 0xab, 0xdf, 0xf3, 0x37, 0x1b, 0x6f, 0x43, 0xe9, 0xc5, 0xb1, 0x9d, 0x59, 0x75, 0x1, 0x2d, 0x94, 0xb8, 0xcc, 0xe0, 0x24, 0x8, 0x7c, 0x50, 0x13, 0x3f, 0x4b, 0x67, 0xa3, 0x8f, 0xfb, 0xd7, 0x6e, 0x42, 0x36, 0x1a, 0xde, 0xf2, 0x86, 0xaa, 0xcf, 0xe3, 0x97, 0xbb, 0x7f, 0x53, 0x27, 0xb, 0xb2, 0x9e, 0xea, 0xc6, 0x2, 0x2e, 0x5a, 0x76, 0x35, 0x19, 0x6d, 0x41, 0x85, 0xa9, 0xdd, 0xf1, 0x48, 0x64, 0x10, 0x3c, 0xf8, 0xd4, 0xa0, 0x8c, 0x26, 0xa, 0x7e, 0x52, 0x96, 0xba, 0xce, 0xe2, 0x5b, 0x77, 0x3, 0x2f, 0xeb, 0xc7, 0xb3, 0x9f, 0xdc, 0xf0, 0x84, 0xa8, 0x6c, 0x40, 0x34, 0x18, 0xa1, 0x8d, 0xf9, 0xd5, 0x11, 0x3d, 0x49, 0x65, 0x83, 0xaf, 0xdb, 0xf7, 0x33, 0x1f, 0x6b, 0x47, 0xfe, 0xd2, 0xa6, 0x8a, 0x4e, 0x62, 0x16, 0x3a, 0x79, 0x55, 0x21, 0xd, 0xc9, 0xe5, 0x91, 0xbd, 0x4, 0x28, 0x5c, 0x70, 0xb4, 0x98, 0xec, 0xc0, 0x6a, 0x46, 0x32, 0x1e, 0xda, 0xf6, 0x82, 0xae, 0x17, 0x3b, 0x4f, 0x63, 0xa7, 0x8b, 0xff, 0xd3, 0x90, 0xbc, 0xc8, 0xe4, 0x20, 0xc, 0x78, 0x54, 0xed, 0xc1, 0xb5, 0x99, 0x5d, 0x71, 0x5, 0x29, 0x4c, 0x60, 0x14, 0x38, 0xfc, 0xd0, 0xa4, 0x88, 0x31, 0x1d, 0x69, 0x45, 0x81, 0xad, 0xd9, 0xf5, 0xb6, 0x9a, 0xee, 0xc2, 0x6, 0x2a, 0x5e, 0x72, 0xcb, 0xe7, 0x93, 0xbf, 0x7b, 0x57, 0x23, 0xf, 0xa5, 0x89, 0xfd, 0xd1, 0x15, 0x39, 0x4d, 0x61, 0xd8, 0xf4, 0x80, 0xac, 0x68, 0x44, 0x30, 0x1c, 0x5f, 0x73, 0x7, 0x2b, 0xef, 0xc3, 0xb7, 0x9b, 0x22, 0xe, 0x7a, 0x56, 0x92, 0xbe, 0xca, 0xe6},
+ {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6, 0xea, 0xc7, 0xb0, 0x9d, 0x5e, 0x73, 0x4, 0x29, 0x9f, 0xb2, 0xc5, 0xe8, 0x2b, 0x6, 0x71, 0x5c, 0xc9, 0xe4, 0x93, 0xbe, 0x7d, 0x50, 0x27, 0xa, 0xbc, 0x91, 0xe6, 0xcb, 0x8, 0x25, 0x52, 0x7f, 0x23, 0xe, 0x79, 0x54, 0x97, 0xba, 0xcd, 0xe0, 0x56, 0x7b, 0xc, 0x21, 0xe2, 0xcf, 0xb8, 0x95, 0x8f, 0xa2, 0xd5, 0xf8, 0x3b, 0x16, 0x61, 0x4c, 0xfa, 0xd7, 0xa0, 0x8d, 0x4e, 0x63, 0x14, 0x39, 0x65, 0x48, 0x3f, 0x12, 0xd1, 0xfc, 0x8b, 0xa6, 0x10, 0x3d, 0x4a, 0x67, 0xa4, 0x89, 0xfe, 0xd3, 0x46, 0x6b, 0x1c, 0x31, 0xf2, 0xdf, 0xa8, 0x85, 0x33, 0x1e, 0x69, 0x44, 0x87, 0xaa, 0xdd, 0xf0, 0xac, 0x81, 0xf6, 0xdb, 0x18, 0x35, 0x42, 0x6f, 0xd9, 0xf4, 0x83, 0xae, 0x6d, 0x40, 0x37, 0x1a, 0x3, 0x2e, 0x59, 0x74, 0xb7, 0x9a, 0xed, 0xc0, 0x76, 0x5b, 0x2c, 0x1, 0xc2, 0xef, 0x98, 0xb5, 0xe9, 0xc4, 0xb3, 0x9e, 0x5d, 0x70, 0x7, 0x2a, 0x9c, 0xb1, 0xc6, 0xeb, 0x28, 0x5, 0x72, 0x5f, 0xca, 0xe7, 0x90, 0xbd, 0x7e, 0x53, 0x24, 0x9, 0xbf, 0x92, 0xe5, 0xc8, 0xb, 0x26, 0x51, 0x7c, 0x20, 0xd, 0x7a, 0x57, 0x94, 0xb9, 0xce, 0xe3, 0x55, 0x78, 0xf, 0x22, 0xe1, 0xcc, 0xbb, 0x96, 0x8c, 0xa1, 0xd6, 0xfb, 0x38, 0x15, 0x62, 0x4f, 0xf9, 0xd4, 0xa3, 0x8e, 0x4d, 0x60, 0x17, 0x3a, 0x66, 0x4b, 0x3c, 0x11, 0xd2, 0xff, 0x88, 0xa5, 0x13, 0x3e, 0x49, 0x64, 0xa7, 0x8a, 0xfd, 0xd0, 0x45, 0x68, 0x1f, 0x32, 0xf1, 0xdc, 0xab, 0x86, 0x30, 0x1d, 0x6a, 0x47, 0x84, 0xa9, 0xde, 0xf3, 0xaf, 0x82, 0xf5, 0xd8, 0x1b, 0x36, 0x41, 0x6c, 0xda, 0xf7, 0x80, 0xad, 0x6e, 0x43, 0x34, 0x19},
+ {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7, 0xda, 0xf4, 0x86, 0xa8, 0x62, 0x4c, 0x3e, 0x10, 0xb7, 0x99, 0xeb, 0xc5, 0xf, 0x21, 0x53, 0x7d, 0xa9, 0x87, 0xf5, 0xdb, 0x11, 0x3f, 0x4d, 0x63, 0xc4, 0xea, 0x98, 0xb6, 0x7c, 0x52, 0x20, 0xe, 0x73, 0x5d, 0x2f, 0x1, 0xcb, 0xe5, 0x97, 0xb9, 0x1e, 0x30, 0x42, 0x6c, 0xa6, 0x88, 0xfa, 0xd4, 0x4f, 0x61, 0x13, 0x3d, 0xf7, 0xd9, 0xab, 0x85, 0x22, 0xc, 0x7e, 0x50, 0x9a, 0xb4, 0xc6, 0xe8, 0x95, 0xbb, 0xc9, 0xe7, 0x2d, 0x3, 0x71, 0x5f, 0xf8, 0xd6, 0xa4, 0x8a, 0x40, 0x6e, 0x1c, 0x32, 0xe6, 0xc8, 0xba, 0x94, 0x5e, 0x70, 0x2, 0x2c, 0x8b, 0xa5, 0xd7, 0xf9, 0x33, 0x1d, 0x6f, 0x41, 0x3c, 0x12, 0x60, 0x4e, 0x84, 0xaa, 0xd8, 0xf6, 0x51, 0x7f, 0xd, 0x23, 0xe9, 0xc7, 0xb5, 0x9b, 0x9e, 0xb0, 0xc2, 0xec, 0x26, 0x8, 0x7a, 0x54, 0xf3, 0xdd, 0xaf, 0x81, 0x4b, 0x65, 0x17, 0x39, 0x44, 0x6a, 0x18, 0x36, 0xfc, 0xd2, 0xa0, 0x8e, 0x29, 0x7, 0x75, 0x5b, 0x91, 0xbf, 0xcd, 0xe3, 0x37, 0x19, 0x6b, 0x45, 0x8f, 0xa1, 0xd3, 0xfd, 0x5a, 0x74, 0x6, 0x28, 0xe2, 0xcc, 0xbe, 0x90, 0xed, 0xc3, 0xb1, 0x9f, 0x55, 0x7b, 0x9, 0x27, 0x80, 0xae, 0xdc, 0xf2, 0x38, 0x16, 0x64, 0x4a, 0xd1, 0xff, 0x8d, 0xa3, 0x69, 0x47, 0x35, 0x1b, 0xbc, 0x92, 0xe0, 0xce, 0x4, 0x2a, 0x58, 0x76, 0xb, 0x25, 0x57, 0x79, 0xb3, 0x9d, 0xef, 0xc1, 0x66, 0x48, 0x3a, 0x14, 0xde, 0xf0, 0x82, 0xac, 0x78, 0x56, 0x24, 0xa, 0xc0, 0xee, 0x9c, 0xb2, 0x15, 0x3b, 0x49, 0x67, 0xad, 0x83, 0xf1, 0xdf, 0xa2, 0x8c, 0xfe, 0xd0, 0x1a, 0x34, 0x46, 0x68, 0xcf, 0xe1, 0x93, 0xbd, 0x77, 0x59, 0x2b, 0x5},
+ {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8, 0xca, 0xe5, 0x94, 0xbb, 0x76, 0x59, 0x28, 0x7, 0xaf, 0x80, 0xf1, 0xde, 0x13, 0x3c, 0x4d, 0x62, 0x89, 0xa6, 0xd7, 0xf8, 0x35, 0x1a, 0x6b, 0x44, 0xec, 0xc3, 0xb2, 0x9d, 0x50, 0x7f, 0xe, 0x21, 0x43, 0x6c, 0x1d, 0x32, 0xff, 0xd0, 0xa1, 0x8e, 0x26, 0x9, 0x78, 0x57, 0x9a, 0xb5, 0xc4, 0xeb, 0xf, 0x20, 0x51, 0x7e, 0xb3, 0x9c, 0xed, 0xc2, 0x6a, 0x45, 0x34, 0x1b, 0xd6, 0xf9, 0x88, 0xa7, 0xc5, 0xea, 0x9b, 0xb4, 0x79, 0x56, 0x27, 0x8, 0xa0, 0x8f, 0xfe, 0xd1, 0x1c, 0x33, 0x42, 0x6d, 0x86, 0xa9, 0xd8, 0xf7, 0x3a, 0x15, 0x64, 0x4b, 0xe3, 0xcc, 0xbd, 0x92, 0x5f, 0x70, 0x1, 0x2e, 0x4c, 0x63, 0x12, 0x3d, 0xf0, 0xdf, 0xae, 0x81, 0x29, 0x6, 0x77, 0x58, 0x95, 0xba, 0xcb, 0xe4, 0x1e, 0x31, 0x40, 0x6f, 0xa2, 0x8d, 0xfc, 0xd3, 0x7b, 0x54, 0x25, 0xa, 0xc7, 0xe8, 0x99, 0xb6, 0xd4, 0xfb, 0x8a, 0xa5, 0x68, 0x47, 0x36, 0x19, 0xb1, 0x9e, 0xef, 0xc0, 0xd, 0x22, 0x53, 0x7c, 0x97, 0xb8, 0xc9, 0xe6, 0x2b, 0x4, 0x75, 0x5a, 0xf2, 0xdd, 0xac, 0x83, 0x4e, 0x61, 0x10, 0x3f, 0x5d, 0x72, 0x3, 0x2c, 0xe1, 0xce, 0xbf, 0x90, 0x38, 0x17, 0x66, 0x49, 0x84, 0xab, 0xda, 0xf5, 0x11, 0x3e, 0x4f, 0x60, 0xad, 0x82, 0xf3, 0xdc, 0x74, 0x5b, 0x2a, 0x5, 0xc8, 0xe7, 0x96, 0xb9, 0xdb, 0xf4, 0x85, 0xaa, 0x67, 0x48, 0x39, 0x16, 0xbe, 0x91, 0xe0, 0xcf, 0x2, 0x2d, 0x5c, 0x73, 0x98, 0xb7, 0xc6, 0xe9, 0x24, 0xb, 0x7a, 0x55, 0xfd, 0xd2, 0xa3, 0x8c, 0x41, 0x6e, 0x1f, 0x30, 0x52, 0x7d, 0xc, 0x23, 0xee, 0xc1, 0xb0, 0x9f, 0x37, 0x18, 0x69, 0x46, 0x8b, 0xa4, 0xd5, 0xfa},
+ {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd, 0x27, 0x17, 0x47, 0x77, 0xe7, 0xd7, 0x87, 0xb7, 0xba, 0x8a, 0xda, 0xea, 0x7a, 0x4a, 0x1a, 0x2a, 0x4e, 0x7e, 0x2e, 0x1e, 0x8e, 0xbe, 0xee, 0xde, 0xd3, 0xe3, 0xb3, 0x83, 0x13, 0x23, 0x73, 0x43, 0x69, 0x59, 0x9, 0x39, 0xa9, 0x99, 0xc9, 0xf9, 0xf4, 0xc4, 0x94, 0xa4, 0x34, 0x4, 0x54, 0x64, 0x9c, 0xac, 0xfc, 0xcc, 0x5c, 0x6c, 0x3c, 0xc, 0x1, 0x31, 0x61, 0x51, 0xc1, 0xf1, 0xa1, 0x91, 0xbb, 0x8b, 0xdb, 0xeb, 0x7b, 0x4b, 0x1b, 0x2b, 0x26, 0x16, 0x46, 0x76, 0xe6, 0xd6, 0x86, 0xb6, 0xd2, 0xe2, 0xb2, 0x82, 0x12, 0x22, 0x72, 0x42, 0x4f, 0x7f, 0x2f, 0x1f, 0x8f, 0xbf, 0xef, 0xdf, 0xf5, 0xc5, 0x95, 0xa5, 0x35, 0x5, 0x55, 0x65, 0x68, 0x58, 0x8, 0x38, 0xa8, 0x98, 0xc8, 0xf8, 0x25, 0x15, 0x45, 0x75, 0xe5, 0xd5, 0x85, 0xb5, 0xb8, 0x88, 0xd8, 0xe8, 0x78, 0x48, 0x18, 0x28, 0x2, 0x32, 0x62, 0x52, 0xc2, 0xf2, 0xa2, 0x92, 0x9f, 0xaf, 0xff, 0xcf, 0x5f, 0x6f, 0x3f, 0xf, 0x6b, 0x5b, 0xb, 0x3b, 0xab, 0x9b, 0xcb, 0xfb, 0xf6, 0xc6, 0x96, 0xa6, 0x36, 0x6, 0x56, 0x66, 0x4c, 0x7c, 0x2c, 0x1c, 0x8c, 0xbc, 0xec, 0xdc, 0xd1, 0xe1, 0xb1, 0x81, 0x11, 0x21, 0x71, 0x41, 0xb9, 0x89, 0xd9, 0xe9, 0x79, 0x49, 0x19, 0x29, 0x24, 0x14, 0x44, 0x74, 0xe4, 0xd4, 0x84, 0xb4, 0x9e, 0xae, 0xfe, 0xce, 0x5e, 0x6e, 0x3e, 0xe, 0x3, 0x33, 0x63, 0x53, 0xc3, 0xf3, 0xa3, 0x93, 0xf7, 0xc7, 0x97, 0xa7, 0x37, 0x7, 0x57, 0x67, 0x6a, 0x5a, 0xa, 0x3a, 0xaa, 0x9a, 0xca, 0xfa, 0xd0, 0xe0, 0xb0, 0x80, 0x10, 0x20, 0x70, 0x40, 0x4d, 0x7d, 0x2d, 0x1d, 0x8d, 0xbd, 0xed, 0xdd},
+ {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2, 0x37, 0x6, 0x55, 0x64, 0xf3, 0xc2, 0x91, 0xa0, 0xa2, 0x93, 0xc0, 0xf1, 0x66, 0x57, 0x4, 0x35, 0x6e, 0x5f, 0xc, 0x3d, 0xaa, 0x9b, 0xc8, 0xf9, 0xfb, 0xca, 0x99, 0xa8, 0x3f, 0xe, 0x5d, 0x6c, 0x59, 0x68, 0x3b, 0xa, 0x9d, 0xac, 0xff, 0xce, 0xcc, 0xfd, 0xae, 0x9f, 0x8, 0x39, 0x6a, 0x5b, 0xdc, 0xed, 0xbe, 0x8f, 0x18, 0x29, 0x7a, 0x4b, 0x49, 0x78, 0x2b, 0x1a, 0x8d, 0xbc, 0xef, 0xde, 0xeb, 0xda, 0x89, 0xb8, 0x2f, 0x1e, 0x4d, 0x7c, 0x7e, 0x4f, 0x1c, 0x2d, 0xba, 0x8b, 0xd8, 0xe9, 0xb2, 0x83, 0xd0, 0xe1, 0x76, 0x47, 0x14, 0x25, 0x27, 0x16, 0x45, 0x74, 0xe3, 0xd2, 0x81, 0xb0, 0x85, 0xb4, 0xe7, 0xd6, 0x41, 0x70, 0x23, 0x12, 0x10, 0x21, 0x72, 0x43, 0xd4, 0xe5, 0xb6, 0x87, 0xa5, 0x94, 0xc7, 0xf6, 0x61, 0x50, 0x3, 0x32, 0x30, 0x1, 0x52, 0x63, 0xf4, 0xc5, 0x96, 0xa7, 0x92, 0xa3, 0xf0, 0xc1, 0x56, 0x67, 0x34, 0x5, 0x7, 0x36, 0x65, 0x54, 0xc3, 0xf2, 0xa1, 0x90, 0xcb, 0xfa, 0xa9, 0x98, 0xf, 0x3e, 0x6d, 0x5c, 0x5e, 0x6f, 0x3c, 0xd, 0x9a, 0xab, 0xf8, 0xc9, 0xfc, 0xcd, 0x9e, 0xaf, 0x38, 0x9, 0x5a, 0x6b, 0x69, 0x58, 0xb, 0x3a, 0xad, 0x9c, 0xcf, 0xfe, 0x79, 0x48, 0x1b, 0x2a, 0xbd, 0x8c, 0xdf, 0xee, 0xec, 0xdd, 0x8e, 0xbf, 0x28, 0x19, 0x4a, 0x7b, 0x4e, 0x7f, 0x2c, 0x1d, 0x8a, 0xbb, 0xe8, 0xd9, 0xdb, 0xea, 0xb9, 0x88, 0x1f, 0x2e, 0x7d, 0x4c, 0x17, 0x26, 0x75, 0x44, 0xd3, 0xe2, 0xb1, 0x80, 0x82, 0xb3, 0xe0, 0xd1, 0x46, 0x77, 0x24, 0x15, 0x20, 0x11, 0x42, 0x73, 0xe4, 0xd5, 0x86, 0xb7, 0xb5, 0x84, 0xd7, 0xe6, 0x71, 0x40, 0x13, 0x22},
+ {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13, 0x7, 0x35, 0x63, 0x51, 0xcf, 0xfd, 0xab, 0x99, 0x8a, 0xb8, 0xee, 0xdc, 0x42, 0x70, 0x26, 0x14, 0xe, 0x3c, 0x6a, 0x58, 0xc6, 0xf4, 0xa2, 0x90, 0x83, 0xb1, 0xe7, 0xd5, 0x4b, 0x79, 0x2f, 0x1d, 0x9, 0x3b, 0x6d, 0x5f, 0xc1, 0xf3, 0xa5, 0x97, 0x84, 0xb6, 0xe0, 0xd2, 0x4c, 0x7e, 0x28, 0x1a, 0x1c, 0x2e, 0x78, 0x4a, 0xd4, 0xe6, 0xb0, 0x82, 0x91, 0xa3, 0xf5, 0xc7, 0x59, 0x6b, 0x3d, 0xf, 0x1b, 0x29, 0x7f, 0x4d, 0xd3, 0xe1, 0xb7, 0x85, 0x96, 0xa4, 0xf2, 0xc0, 0x5e, 0x6c, 0x3a, 0x8, 0x12, 0x20, 0x76, 0x44, 0xda, 0xe8, 0xbe, 0x8c, 0x9f, 0xad, 0xfb, 0xc9, 0x57, 0x65, 0x33, 0x1, 0x15, 0x27, 0x71, 0x43, 0xdd, 0xef, 0xb9, 0x8b, 0x98, 0xaa, 0xfc, 0xce, 0x50, 0x62, 0x34, 0x6, 0x38, 0xa, 0x5c, 0x6e, 0xf0, 0xc2, 0x94, 0xa6, 0xb5, 0x87, 0xd1, 0xe3, 0x7d, 0x4f, 0x19, 0x2b, 0x3f, 0xd, 0x5b, 0x69, 0xf7, 0xc5, 0x93, 0xa1, 0xb2, 0x80, 0xd6, 0xe4, 0x7a, 0x48, 0x1e, 0x2c, 0x36, 0x4, 0x52, 0x60, 0xfe, 0xcc, 0x9a, 0xa8, 0xbb, 0x89, 0xdf, 0xed, 0x73, 0x41, 0x17, 0x25, 0x31, 0x3, 0x55, 0x67, 0xf9, 0xcb, 0x9d, 0xaf, 0xbc, 0x8e, 0xd8, 0xea, 0x74, 0x46, 0x10, 0x22, 0x24, 0x16, 0x40, 0x72, 0xec, 0xde, 0x88, 0xba, 0xa9, 0x9b, 0xcd, 0xff, 0x61, 0x53, 0x5, 0x37, 0x23, 0x11, 0x47, 0x75, 0xeb, 0xd9, 0x8f, 0xbd, 0xae, 0x9c, 0xca, 0xf8, 0x66, 0x54, 0x2, 0x30, 0x2a, 0x18, 0x4e, 0x7c, 0xe2, 0xd0, 0x86, 0xb4, 0xa7, 0x95, 0xc3, 0xf1, 0x6f, 0x5d, 0xb, 0x39, 0x2d, 0x1f, 0x49, 0x7b, 0xe5, 0xd7, 0x81, 0xb3, 0xa0, 0x92, 0xc4, 0xf6, 0x68, 0x5a, 0xc, 0x3e},
+ {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c, 0x17, 0x24, 0x71, 0x42, 0xdb, 0xe8, 0xbd, 0x8e, 0x92, 0xa1, 0xf4, 0xc7, 0x5e, 0x6d, 0x38, 0xb, 0x2e, 0x1d, 0x48, 0x7b, 0xe2, 0xd1, 0x84, 0xb7, 0xab, 0x98, 0xcd, 0xfe, 0x67, 0x54, 0x1, 0x32, 0x39, 0xa, 0x5f, 0x6c, 0xf5, 0xc6, 0x93, 0xa0, 0xbc, 0x8f, 0xda, 0xe9, 0x70, 0x43, 0x16, 0x25, 0x5c, 0x6f, 0x3a, 0x9, 0x90, 0xa3, 0xf6, 0xc5, 0xd9, 0xea, 0xbf, 0x8c, 0x15, 0x26, 0x73, 0x40, 0x4b, 0x78, 0x2d, 0x1e, 0x87, 0xb4, 0xe1, 0xd2, 0xce, 0xfd, 0xa8, 0x9b, 0x2, 0x31, 0x64, 0x57, 0x72, 0x41, 0x14, 0x27, 0xbe, 0x8d, 0xd8, 0xeb, 0xf7, 0xc4, 0x91, 0xa2, 0x3b, 0x8, 0x5d, 0x6e, 0x65, 0x56, 0x3, 0x30, 0xa9, 0x9a, 0xcf, 0xfc, 0xe0, 0xd3, 0x86, 0xb5, 0x2c, 0x1f, 0x4a, 0x79, 0xb8, 0x8b, 0xde, 0xed, 0x74, 0x47, 0x12, 0x21, 0x3d, 0xe, 0x5b, 0x68, 0xf1, 0xc2, 0x97, 0xa4, 0xaf, 0x9c, 0xc9, 0xfa, 0x63, 0x50, 0x5, 0x36, 0x2a, 0x19, 0x4c, 0x7f, 0xe6, 0xd5, 0x80, 0xb3, 0x96, 0xa5, 0xf0, 0xc3, 0x5a, 0x69, 0x3c, 0xf, 0x13, 0x20, 0x75, 0x46, 0xdf, 0xec, 0xb9, 0x8a, 0x81, 0xb2, 0xe7, 0xd4, 0x4d, 0x7e, 0x2b, 0x18, 0x4, 0x37, 0x62, 0x51, 0xc8, 0xfb, 0xae, 0x9d, 0xe4, 0xd7, 0x82, 0xb1, 0x28, 0x1b, 0x4e, 0x7d, 0x61, 0x52, 0x7, 0x34, 0xad, 0x9e, 0xcb, 0xf8, 0xf3, 0xc0, 0x95, 0xa6, 0x3f, 0xc, 0x59, 0x6a, 0x76, 0x45, 0x10, 0x23, 0xba, 0x89, 0xdc, 0xef, 0xca, 0xf9, 0xac, 0x9f, 0x6, 0x35, 0x60, 0x53, 0x4f, 0x7c, 0x29, 0x1a, 0x83, 0xb0, 0xe5, 0xd6, 0xdd, 0xee, 0xbb, 0x88, 0x11, 0x22, 0x77, 0x44, 0x58, 0x6b, 0x3e, 0xd, 0x94, 0xa7, 0xf2, 0xc1},
+ {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31, 0x67, 0x53, 0xf, 0x3b, 0xb7, 0x83, 0xdf, 0xeb, 0xda, 0xee, 0xb2, 0x86, 0xa, 0x3e, 0x62, 0x56, 0xce, 0xfa, 0xa6, 0x92, 0x1e, 0x2a, 0x76, 0x42, 0x73, 0x47, 0x1b, 0x2f, 0xa3, 0x97, 0xcb, 0xff, 0xa9, 0x9d, 0xc1, 0xf5, 0x79, 0x4d, 0x11, 0x25, 0x14, 0x20, 0x7c, 0x48, 0xc4, 0xf0, 0xac, 0x98, 0x81, 0xb5, 0xe9, 0xdd, 0x51, 0x65, 0x39, 0xd, 0x3c, 0x8, 0x54, 0x60, 0xec, 0xd8, 0x84, 0xb0, 0xe6, 0xd2, 0x8e, 0xba, 0x36, 0x2, 0x5e, 0x6a, 0x5b, 0x6f, 0x33, 0x7, 0x8b, 0xbf, 0xe3, 0xd7, 0x4f, 0x7b, 0x27, 0x13, 0x9f, 0xab, 0xf7, 0xc3, 0xf2, 0xc6, 0x9a, 0xae, 0x22, 0x16, 0x4a, 0x7e, 0x28, 0x1c, 0x40, 0x74, 0xf8, 0xcc, 0x90, 0xa4, 0x95, 0xa1, 0xfd, 0xc9, 0x45, 0x71, 0x2d, 0x19, 0x1f, 0x2b, 0x77, 0x43, 0xcf, 0xfb, 0xa7, 0x93, 0xa2, 0x96, 0xca, 0xfe, 0x72, 0x46, 0x1a, 0x2e, 0x78, 0x4c, 0x10, 0x24, 0xa8, 0x9c, 0xc0, 0xf4, 0xc5, 0xf1, 0xad, 0x99, 0x15, 0x21, 0x7d, 0x49, 0xd1, 0xe5, 0xb9, 0x8d, 0x1, 0x35, 0x69, 0x5d, 0x6c, 0x58, 0x4, 0x30, 0xbc, 0x88, 0xd4, 0xe0, 0xb6, 0x82, 0xde, 0xea, 0x66, 0x52, 0xe, 0x3a, 0xb, 0x3f, 0x63, 0x57, 0xdb, 0xef, 0xb3, 0x87, 0x9e, 0xaa, 0xf6, 0xc2, 0x4e, 0x7a, 0x26, 0x12, 0x23, 0x17, 0x4b, 0x7f, 0xf3, 0xc7, 0x9b, 0xaf, 0xf9, 0xcd, 0x91, 0xa5, 0x29, 0x1d, 0x41, 0x75, 0x44, 0x70, 0x2c, 0x18, 0x94, 0xa0, 0xfc, 0xc8, 0x50, 0x64, 0x38, 0xc, 0x80, 0xb4, 0xe8, 0xdc, 0xed, 0xd9, 0x85, 0xb1, 0x3d, 0x9, 0x55, 0x61, 0x37, 0x3, 0x5f, 0x6b, 0xe7, 0xd3, 0x8f, 0xbb, 0x8a, 0xbe, 0xe2, 0xd6, 0x5a, 0x6e, 0x32, 0x6},
+ {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e, 0x77, 0x42, 0x1d, 0x28, 0xa3, 0x96, 0xc9, 0xfc, 0xc2, 0xf7, 0xa8, 0x9d, 0x16, 0x23, 0x7c, 0x49, 0xee, 0xdb, 0x84, 0xb1, 0x3a, 0xf, 0x50, 0x65, 0x5b, 0x6e, 0x31, 0x4, 0x8f, 0xba, 0xe5, 0xd0, 0x99, 0xac, 0xf3, 0xc6, 0x4d, 0x78, 0x27, 0x12, 0x2c, 0x19, 0x46, 0x73, 0xf8, 0xcd, 0x92, 0xa7, 0xc1, 0xf4, 0xab, 0x9e, 0x15, 0x20, 0x7f, 0x4a, 0x74, 0x41, 0x1e, 0x2b, 0xa0, 0x95, 0xca, 0xff, 0xb6, 0x83, 0xdc, 0xe9, 0x62, 0x57, 0x8, 0x3d, 0x3, 0x36, 0x69, 0x5c, 0xd7, 0xe2, 0xbd, 0x88, 0x2f, 0x1a, 0x45, 0x70, 0xfb, 0xce, 0x91, 0xa4, 0x9a, 0xaf, 0xf0, 0xc5, 0x4e, 0x7b, 0x24, 0x11, 0x58, 0x6d, 0x32, 0x7, 0x8c, 0xb9, 0xe6, 0xd3, 0xed, 0xd8, 0x87, 0xb2, 0x39, 0xc, 0x53, 0x66, 0x9f, 0xaa, 0xf5, 0xc0, 0x4b, 0x7e, 0x21, 0x14, 0x2a, 0x1f, 0x40, 0x75, 0xfe, 0xcb, 0x94, 0xa1, 0xe8, 0xdd, 0x82, 0xb7, 0x3c, 0x9, 0x56, 0x63, 0x5d, 0x68, 0x37, 0x2, 0x89, 0xbc, 0xe3, 0xd6, 0x71, 0x44, 0x1b, 0x2e, 0xa5, 0x90, 0xcf, 0xfa, 0xc4, 0xf1, 0xae, 0x9b, 0x10, 0x25, 0x7a, 0x4f, 0x6, 0x33, 0x6c, 0x59, 0xd2, 0xe7, 0xb8, 0x8d, 0xb3, 0x86, 0xd9, 0xec, 0x67, 0x52, 0xd, 0x38, 0x5e, 0x6b, 0x34, 0x1, 0x8a, 0xbf, 0xe0, 0xd5, 0xeb, 0xde, 0x81, 0xb4, 0x3f, 0xa, 0x55, 0x60, 0x29, 0x1c, 0x43, 0x76, 0xfd, 0xc8, 0x97, 0xa2, 0x9c, 0xa9, 0xf6, 0xc3, 0x48, 0x7d, 0x22, 0x17, 0xb0, 0x85, 0xda, 0xef, 0x64, 0x51, 0xe, 0x3b, 0x5, 0x30, 0x6f, 0x5a, 0xd1, 0xe4, 0xbb, 0x8e, 0xc7, 0xf2, 0xad, 0x98, 0x13, 0x26, 0x79, 0x4c, 0x72, 0x47, 0x18, 0x2d, 0xa6, 0x93, 0xcc, 0xf9},
+ {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f, 0x47, 0x71, 0x2b, 0x1d, 0x9f, 0xa9, 0xf3, 0xc5, 0xea, 0xdc, 0x86, 0xb0, 0x32, 0x4, 0x5e, 0x68, 0x8e, 0xb8, 0xe2, 0xd4, 0x56, 0x60, 0x3a, 0xc, 0x23, 0x15, 0x4f, 0x79, 0xfb, 0xcd, 0x97, 0xa1, 0xc9, 0xff, 0xa5, 0x93, 0x11, 0x27, 0x7d, 0x4b, 0x64, 0x52, 0x8, 0x3e, 0xbc, 0x8a, 0xd0, 0xe6, 0x1, 0x37, 0x6d, 0x5b, 0xd9, 0xef, 0xb5, 0x83, 0xac, 0x9a, 0xc0, 0xf6, 0x74, 0x42, 0x18, 0x2e, 0x46, 0x70, 0x2a, 0x1c, 0x9e, 0xa8, 0xf2, 0xc4, 0xeb, 0xdd, 0x87, 0xb1, 0x33, 0x5, 0x5f, 0x69, 0x8f, 0xb9, 0xe3, 0xd5, 0x57, 0x61, 0x3b, 0xd, 0x22, 0x14, 0x4e, 0x78, 0xfa, 0xcc, 0x96, 0xa0, 0xc8, 0xfe, 0xa4, 0x92, 0x10, 0x26, 0x7c, 0x4a, 0x65, 0x53, 0x9, 0x3f, 0xbd, 0x8b, 0xd1, 0xe7, 0x2, 0x34, 0x6e, 0x58, 0xda, 0xec, 0xb6, 0x80, 0xaf, 0x99, 0xc3, 0xf5, 0x77, 0x41, 0x1b, 0x2d, 0x45, 0x73, 0x29, 0x1f, 0x9d, 0xab, 0xf1, 0xc7, 0xe8, 0xde, 0x84, 0xb2, 0x30, 0x6, 0x5c, 0x6a, 0x8c, 0xba, 0xe0, 0xd6, 0x54, 0x62, 0x38, 0xe, 0x21, 0x17, 0x4d, 0x7b, 0xf9, 0xcf, 0x95, 0xa3, 0xcb, 0xfd, 0xa7, 0x91, 0x13, 0x25, 0x7f, 0x49, 0x66, 0x50, 0xa, 0x3c, 0xbe, 0x88, 0xd2, 0xe4, 0x3, 0x35, 0x6f, 0x59, 0xdb, 0xed, 0xb7, 0x81, 0xae, 0x98, 0xc2, 0xf4, 0x76, 0x40, 0x1a, 0x2c, 0x44, 0x72, 0x28, 0x1e, 0x9c, 0xaa, 0xf0, 0xc6, 0xe9, 0xdf, 0x85, 0xb3, 0x31, 0x7, 0x5d, 0x6b, 0x8d, 0xbb, 0xe1, 0xd7, 0x55, 0x63, 0x39, 0xf, 0x20, 0x16, 0x4c, 0x7a, 0xf8, 0xce, 0x94, 0xa2, 0xca, 0xfc, 0xa6, 0x90, 0x12, 0x24, 0x7e, 0x48, 0x67, 0x51, 0xb, 0x3d, 0xbf, 0x89, 0xd3, 0xe5},
+ {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20, 0x57, 0x60, 0x39, 0xe, 0x8b, 0xbc, 0xe5, 0xd2, 0xf2, 0xc5, 0x9c, 0xab, 0x2e, 0x19, 0x40, 0x77, 0xae, 0x99, 0xc0, 0xf7, 0x72, 0x45, 0x1c, 0x2b, 0xb, 0x3c, 0x65, 0x52, 0xd7, 0xe0, 0xb9, 0x8e, 0xf9, 0xce, 0x97, 0xa0, 0x25, 0x12, 0x4b, 0x7c, 0x5c, 0x6b, 0x32, 0x5, 0x80, 0xb7, 0xee, 0xd9, 0x41, 0x76, 0x2f, 0x18, 0x9d, 0xaa, 0xf3, 0xc4, 0xe4, 0xd3, 0x8a, 0xbd, 0x38, 0xf, 0x56, 0x61, 0x16, 0x21, 0x78, 0x4f, 0xca, 0xfd, 0xa4, 0x93, 0xb3, 0x84, 0xdd, 0xea, 0x6f, 0x58, 0x1, 0x36, 0xef, 0xd8, 0x81, 0xb6, 0x33, 0x4, 0x5d, 0x6a, 0x4a, 0x7d, 0x24, 0x13, 0x96, 0xa1, 0xf8, 0xcf, 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0xa, 0x3d, 0x1d, 0x2a, 0x73, 0x44, 0xc1, 0xf6, 0xaf, 0x98, 0x82, 0xb5, 0xec, 0xdb, 0x5e, 0x69, 0x30, 0x7, 0x27, 0x10, 0x49, 0x7e, 0xfb, 0xcc, 0x95, 0xa2, 0xd5, 0xe2, 0xbb, 0x8c, 0x9, 0x3e, 0x67, 0x50, 0x70, 0x47, 0x1e, 0x29, 0xac, 0x9b, 0xc2, 0xf5, 0x2c, 0x1b, 0x42, 0x75, 0xf0, 0xc7, 0x9e, 0xa9, 0x89, 0xbe, 0xe7, 0xd0, 0x55, 0x62, 0x3b, 0xc, 0x7b, 0x4c, 0x15, 0x22, 0xa7, 0x90, 0xc9, 0xfe, 0xde, 0xe9, 0xb0, 0x87, 0x2, 0x35, 0x6c, 0x5b, 0xc3, 0xf4, 0xad, 0x9a, 0x1f, 0x28, 0x71, 0x46, 0x66, 0x51, 0x8, 0x3f, 0xba, 0x8d, 0xd4, 0xe3, 0x94, 0xa3, 0xfa, 0xcd, 0x48, 0x7f, 0x26, 0x11, 0x31, 0x6, 0x5f, 0x68, 0xed, 0xda, 0x83, 0xb4, 0x6d, 0x5a, 0x3, 0x34, 0xb1, 0x86, 0xdf, 0xe8, 0xc8, 0xff, 0xa6, 0x91, 0x14, 0x23, 0x7a, 0x4d, 0x3a, 0xd, 0x54, 0x63, 0xe6, 0xd1, 0x88, 0xbf, 0x9f, 0xa8, 0xf1, 0xc6, 0x43, 0x74, 0x2d, 0x1a},
+ {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75, 0xa7, 0x9f, 0xd7, 0xef, 0x47, 0x7f, 0x37, 0xf, 0x7a, 0x42, 0xa, 0x32, 0x9a, 0xa2, 0xea, 0xd2, 0x53, 0x6b, 0x23, 0x1b, 0xb3, 0x8b, 0xc3, 0xfb, 0x8e, 0xb6, 0xfe, 0xc6, 0x6e, 0x56, 0x1e, 0x26, 0xf4, 0xcc, 0x84, 0xbc, 0x14, 0x2c, 0x64, 0x5c, 0x29, 0x11, 0x59, 0x61, 0xc9, 0xf1, 0xb9, 0x81, 0xa6, 0x9e, 0xd6, 0xee, 0x46, 0x7e, 0x36, 0xe, 0x7b, 0x43, 0xb, 0x33, 0x9b, 0xa3, 0xeb, 0xd3, 0x1, 0x39, 0x71, 0x49, 0xe1, 0xd9, 0x91, 0xa9, 0xdc, 0xe4, 0xac, 0x94, 0x3c, 0x4, 0x4c, 0x74, 0xf5, 0xcd, 0x85, 0xbd, 0x15, 0x2d, 0x65, 0x5d, 0x28, 0x10, 0x58, 0x60, 0xc8, 0xf0, 0xb8, 0x80, 0x52, 0x6a, 0x22, 0x1a, 0xb2, 0x8a, 0xc2, 0xfa, 0x8f, 0xb7, 0xff, 0xc7, 0x6f, 0x57, 0x1f, 0x27, 0x51, 0x69, 0x21, 0x19, 0xb1, 0x89, 0xc1, 0xf9, 0x8c, 0xb4, 0xfc, 0xc4, 0x6c, 0x54, 0x1c, 0x24, 0xf6, 0xce, 0x86, 0xbe, 0x16, 0x2e, 0x66, 0x5e, 0x2b, 0x13, 0x5b, 0x63, 0xcb, 0xf3, 0xbb, 0x83, 0x2, 0x3a, 0x72, 0x4a, 0xe2, 0xda, 0x92, 0xaa, 0xdf, 0xe7, 0xaf, 0x97, 0x3f, 0x7, 0x4f, 0x77, 0xa5, 0x9d, 0xd5, 0xed, 0x45, 0x7d, 0x35, 0xd, 0x78, 0x40, 0x8, 0x30, 0x98, 0xa0, 0xe8, 0xd0, 0xf7, 0xcf, 0x87, 0xbf, 0x17, 0x2f, 0x67, 0x5f, 0x2a, 0x12, 0x5a, 0x62, 0xca, 0xf2, 0xba, 0x82, 0x50, 0x68, 0x20, 0x18, 0xb0, 0x88, 0xc0, 0xf8, 0x8d, 0xb5, 0xfd, 0xc5, 0x6d, 0x55, 0x1d, 0x25, 0xa4, 0x9c, 0xd4, 0xec, 0x44, 0x7c, 0x34, 0xc, 0x79, 0x41, 0x9, 0x31, 0x99, 0xa1, 0xe9, 0xd1, 0x3, 0x3b, 0x73, 0x4b, 0xe3, 0xdb, 0x93, 0xab, 0xde, 0xe6, 0xae, 0x96, 0x3e, 0x6, 0x4e, 0x76},
+ {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a, 0xb7, 0x8e, 0xc5, 0xfc, 0x53, 0x6a, 0x21, 0x18, 0x62, 0x5b, 0x10, 0x29, 0x86, 0xbf, 0xf4, 0xcd, 0x73, 0x4a, 0x1, 0x38, 0x97, 0xae, 0xe5, 0xdc, 0xa6, 0x9f, 0xd4, 0xed, 0x42, 0x7b, 0x30, 0x9, 0xc4, 0xfd, 0xb6, 0x8f, 0x20, 0x19, 0x52, 0x6b, 0x11, 0x28, 0x63, 0x5a, 0xf5, 0xcc, 0x87, 0xbe, 0xe6, 0xdf, 0x94, 0xad, 0x2, 0x3b, 0x70, 0x49, 0x33, 0xa, 0x41, 0x78, 0xd7, 0xee, 0xa5, 0x9c, 0x51, 0x68, 0x23, 0x1a, 0xb5, 0x8c, 0xc7, 0xfe, 0x84, 0xbd, 0xf6, 0xcf, 0x60, 0x59, 0x12, 0x2b, 0x95, 0xac, 0xe7, 0xde, 0x71, 0x48, 0x3, 0x3a, 0x40, 0x79, 0x32, 0xb, 0xa4, 0x9d, 0xd6, 0xef, 0x22, 0x1b, 0x50, 0x69, 0xc6, 0xff, 0xb4, 0x8d, 0xf7, 0xce, 0x85, 0xbc, 0x13, 0x2a, 0x61, 0x58, 0xd1, 0xe8, 0xa3, 0x9a, 0x35, 0xc, 0x47, 0x7e, 0x4, 0x3d, 0x76, 0x4f, 0xe0, 0xd9, 0x92, 0xab, 0x66, 0x5f, 0x14, 0x2d, 0x82, 0xbb, 0xf0, 0xc9, 0xb3, 0x8a, 0xc1, 0xf8, 0x57, 0x6e, 0x25, 0x1c, 0xa2, 0x9b, 0xd0, 0xe9, 0x46, 0x7f, 0x34, 0xd, 0x77, 0x4e, 0x5, 0x3c, 0x93, 0xaa, 0xe1, 0xd8, 0x15, 0x2c, 0x67, 0x5e, 0xf1, 0xc8, 0x83, 0xba, 0xc0, 0xf9, 0xb2, 0x8b, 0x24, 0x1d, 0x56, 0x6f, 0x37, 0xe, 0x45, 0x7c, 0xd3, 0xea, 0xa1, 0x98, 0xe2, 0xdb, 0x90, 0xa9, 0x6, 0x3f, 0x74, 0x4d, 0x80, 0xb9, 0xf2, 0xcb, 0x64, 0x5d, 0x16, 0x2f, 0x55, 0x6c, 0x27, 0x1e, 0xb1, 0x88, 0xc3, 0xfa, 0x44, 0x7d, 0x36, 0xf, 0xa0, 0x99, 0xd2, 0xeb, 0x91, 0xa8, 0xe3, 0xda, 0x75, 0x4c, 0x7, 0x3e, 0xf3, 0xca, 0x81, 0xb8, 0x17, 0x2e, 0x65, 0x5c, 0x26, 0x1f, 0x54, 0x6d, 0xc2, 0xfb, 0xb0, 0x89},
+ {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b, 0x87, 0xbd, 0xf3, 0xc9, 0x6f, 0x55, 0x1b, 0x21, 0x4a, 0x70, 0x3e, 0x4, 0xa2, 0x98, 0xd6, 0xec, 0x13, 0x29, 0x67, 0x5d, 0xfb, 0xc1, 0x8f, 0xb5, 0xde, 0xe4, 0xaa, 0x90, 0x36, 0xc, 0x42, 0x78, 0x94, 0xae, 0xe0, 0xda, 0x7c, 0x46, 0x8, 0x32, 0x59, 0x63, 0x2d, 0x17, 0xb1, 0x8b, 0xc5, 0xff, 0x26, 0x1c, 0x52, 0x68, 0xce, 0xf4, 0xba, 0x80, 0xeb, 0xd1, 0x9f, 0xa5, 0x3, 0x39, 0x77, 0x4d, 0xa1, 0x9b, 0xd5, 0xef, 0x49, 0x73, 0x3d, 0x7, 0x6c, 0x56, 0x18, 0x22, 0x84, 0xbe, 0xf0, 0xca, 0x35, 0xf, 0x41, 0x7b, 0xdd, 0xe7, 0xa9, 0x93, 0xf8, 0xc2, 0x8c, 0xb6, 0x10, 0x2a, 0x64, 0x5e, 0xb2, 0x88, 0xc6, 0xfc, 0x5a, 0x60, 0x2e, 0x14, 0x7f, 0x45, 0xb, 0x31, 0x97, 0xad, 0xe3, 0xd9, 0x4c, 0x76, 0x38, 0x2, 0xa4, 0x9e, 0xd0, 0xea, 0x81, 0xbb, 0xf5, 0xcf, 0x69, 0x53, 0x1d, 0x27, 0xcb, 0xf1, 0xbf, 0x85, 0x23, 0x19, 0x57, 0x6d, 0x6, 0x3c, 0x72, 0x48, 0xee, 0xd4, 0x9a, 0xa0, 0x5f, 0x65, 0x2b, 0x11, 0xb7, 0x8d, 0xc3, 0xf9, 0x92, 0xa8, 0xe6, 0xdc, 0x7a, 0x40, 0xe, 0x34, 0xd8, 0xe2, 0xac, 0x96, 0x30, 0xa, 0x44, 0x7e, 0x15, 0x2f, 0x61, 0x5b, 0xfd, 0xc7, 0x89, 0xb3, 0x6a, 0x50, 0x1e, 0x24, 0x82, 0xb8, 0xf6, 0xcc, 0xa7, 0x9d, 0xd3, 0xe9, 0x4f, 0x75, 0x3b, 0x1, 0xed, 0xd7, 0x99, 0xa3, 0x5, 0x3f, 0x71, 0x4b, 0x20, 0x1a, 0x54, 0x6e, 0xc8, 0xf2, 0xbc, 0x86, 0x79, 0x43, 0xd, 0x37, 0x91, 0xab, 0xe5, 0xdf, 0xb4, 0x8e, 0xc0, 0xfa, 0x5c, 0x66, 0x28, 0x12, 0xfe, 0xc4, 0x8a, 0xb0, 0x16, 0x2c, 0x62, 0x58, 0x33, 0x9, 0x47, 0x7d, 0xdb, 0xe1, 0xaf, 0x95},
+ {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64, 0x97, 0xac, 0xe1, 0xda, 0x7b, 0x40, 0xd, 0x36, 0x52, 0x69, 0x24, 0x1f, 0xbe, 0x85, 0xc8, 0xf3, 0x33, 0x8, 0x45, 0x7e, 0xdf, 0xe4, 0xa9, 0x92, 0xf6, 0xcd, 0x80, 0xbb, 0x1a, 0x21, 0x6c, 0x57, 0xa4, 0x9f, 0xd2, 0xe9, 0x48, 0x73, 0x3e, 0x5, 0x61, 0x5a, 0x17, 0x2c, 0x8d, 0xb6, 0xfb, 0xc0, 0x66, 0x5d, 0x10, 0x2b, 0x8a, 0xb1, 0xfc, 0xc7, 0xa3, 0x98, 0xd5, 0xee, 0x4f, 0x74, 0x39, 0x2, 0xf1, 0xca, 0x87, 0xbc, 0x1d, 0x26, 0x6b, 0x50, 0x34, 0xf, 0x42, 0x79, 0xd8, 0xe3, 0xae, 0x95, 0x55, 0x6e, 0x23, 0x18, 0xb9, 0x82, 0xcf, 0xf4, 0x90, 0xab, 0xe6, 0xdd, 0x7c, 0x47, 0xa, 0x31, 0xc2, 0xf9, 0xb4, 0x8f, 0x2e, 0x15, 0x58, 0x63, 0x7, 0x3c, 0x71, 0x4a, 0xeb, 0xd0, 0x9d, 0xa6, 0xcc, 0xf7, 0xba, 0x81, 0x20, 0x1b, 0x56, 0x6d, 0x9, 0x32, 0x7f, 0x44, 0xe5, 0xde, 0x93, 0xa8, 0x5b, 0x60, 0x2d, 0x16, 0xb7, 0x8c, 0xc1, 0xfa, 0x9e, 0xa5, 0xe8, 0xd3, 0x72, 0x49, 0x4, 0x3f, 0xff, 0xc4, 0x89, 0xb2, 0x13, 0x28, 0x65, 0x5e, 0x3a, 0x1, 0x4c, 0x77, 0xd6, 0xed, 0xa0, 0x9b, 0x68, 0x53, 0x1e, 0x25, 0x84, 0xbf, 0xf2, 0xc9, 0xad, 0x96, 0xdb, 0xe0, 0x41, 0x7a, 0x37, 0xc, 0xaa, 0x91, 0xdc, 0xe7, 0x46, 0x7d, 0x30, 0xb, 0x6f, 0x54, 0x19, 0x22, 0x83, 0xb8, 0xf5, 0xce, 0x3d, 0x6, 0x4b, 0x70, 0xd1, 0xea, 0xa7, 0x9c, 0xf8, 0xc3, 0x8e, 0xb5, 0x14, 0x2f, 0x62, 0x59, 0x99, 0xa2, 0xef, 0xd4, 0x75, 0x4e, 0x3, 0x38, 0x5c, 0x67, 0x2a, 0x11, 0xb0, 0x8b, 0xc6, 0xfd, 0xe, 0x35, 0x78, 0x43, 0xe2, 0xd9, 0x94, 0xaf, 0xcb, 0xf0, 0xbd, 0x86, 0x27, 0x1c, 0x51, 0x6a},
+ {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49, 0xe7, 0xdb, 0x9f, 0xa3, 0x17, 0x2b, 0x6f, 0x53, 0x1a, 0x26, 0x62, 0x5e, 0xea, 0xd6, 0x92, 0xae, 0xd3, 0xef, 0xab, 0x97, 0x23, 0x1f, 0x5b, 0x67, 0x2e, 0x12, 0x56, 0x6a, 0xde, 0xe2, 0xa6, 0x9a, 0x34, 0x8, 0x4c, 0x70, 0xc4, 0xf8, 0xbc, 0x80, 0xc9, 0xf5, 0xb1, 0x8d, 0x39, 0x5, 0x41, 0x7d, 0xbb, 0x87, 0xc3, 0xff, 0x4b, 0x77, 0x33, 0xf, 0x46, 0x7a, 0x3e, 0x2, 0xb6, 0x8a, 0xce, 0xf2, 0x5c, 0x60, 0x24, 0x18, 0xac, 0x90, 0xd4, 0xe8, 0xa1, 0x9d, 0xd9, 0xe5, 0x51, 0x6d, 0x29, 0x15, 0x68, 0x54, 0x10, 0x2c, 0x98, 0xa4, 0xe0, 0xdc, 0x95, 0xa9, 0xed, 0xd1, 0x65, 0x59, 0x1d, 0x21, 0x8f, 0xb3, 0xf7, 0xcb, 0x7f, 0x43, 0x7, 0x3b, 0x72, 0x4e, 0xa, 0x36, 0x82, 0xbe, 0xfa, 0xc6, 0x6b, 0x57, 0x13, 0x2f, 0x9b, 0xa7, 0xe3, 0xdf, 0x96, 0xaa, 0xee, 0xd2, 0x66, 0x5a, 0x1e, 0x22, 0x8c, 0xb0, 0xf4, 0xc8, 0x7c, 0x40, 0x4, 0x38, 0x71, 0x4d, 0x9, 0x35, 0x81, 0xbd, 0xf9, 0xc5, 0xb8, 0x84, 0xc0, 0xfc, 0x48, 0x74, 0x30, 0xc, 0x45, 0x79, 0x3d, 0x1, 0xb5, 0x89, 0xcd, 0xf1, 0x5f, 0x63, 0x27, 0x1b, 0xaf, 0x93, 0xd7, 0xeb, 0xa2, 0x9e, 0xda, 0xe6, 0x52, 0x6e, 0x2a, 0x16, 0xd0, 0xec, 0xa8, 0x94, 0x20, 0x1c, 0x58, 0x64, 0x2d, 0x11, 0x55, 0x69, 0xdd, 0xe1, 0xa5, 0x99, 0x37, 0xb, 0x4f, 0x73, 0xc7, 0xfb, 0xbf, 0x83, 0xca, 0xf6, 0xb2, 0x8e, 0x3a, 0x6, 0x42, 0x7e, 0x3, 0x3f, 0x7b, 0x47, 0xf3, 0xcf, 0x8b, 0xb7, 0xfe, 0xc2, 0x86, 0xba, 0xe, 0x32, 0x76, 0x4a, 0xe4, 0xd8, 0x9c, 0xa0, 0x14, 0x28, 0x6c, 0x50, 0x19, 0x25, 0x61, 0x5d, 0xe9, 0xd5, 0x91, 0xad},
+ {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46, 0xf7, 0xca, 0x8d, 0xb0, 0x3, 0x3e, 0x79, 0x44, 0x2, 0x3f, 0x78, 0x45, 0xf6, 0xcb, 0x8c, 0xb1, 0xf3, 0xce, 0x89, 0xb4, 0x7, 0x3a, 0x7d, 0x40, 0x6, 0x3b, 0x7c, 0x41, 0xf2, 0xcf, 0x88, 0xb5, 0x4, 0x39, 0x7e, 0x43, 0xf0, 0xcd, 0x8a, 0xb7, 0xf1, 0xcc, 0x8b, 0xb6, 0x5, 0x38, 0x7f, 0x42, 0xfb, 0xc6, 0x81, 0xbc, 0xf, 0x32, 0x75, 0x48, 0xe, 0x33, 0x74, 0x49, 0xfa, 0xc7, 0x80, 0xbd, 0xc, 0x31, 0x76, 0x4b, 0xf8, 0xc5, 0x82, 0xbf, 0xf9, 0xc4, 0x83, 0xbe, 0xd, 0x30, 0x77, 0x4a, 0x8, 0x35, 0x72, 0x4f, 0xfc, 0xc1, 0x86, 0xbb, 0xfd, 0xc0, 0x87, 0xba, 0x9, 0x34, 0x73, 0x4e, 0xff, 0xc2, 0x85, 0xb8, 0xb, 0x36, 0x71, 0x4c, 0xa, 0x37, 0x70, 0x4d, 0xfe, 0xc3, 0x84, 0xb9, 0xeb, 0xd6, 0x91, 0xac, 0x1f, 0x22, 0x65, 0x58, 0x1e, 0x23, 0x64, 0x59, 0xea, 0xd7, 0x90, 0xad, 0x1c, 0x21, 0x66, 0x5b, 0xe8, 0xd5, 0x92, 0xaf, 0xe9, 0xd4, 0x93, 0xae, 0x1d, 0x20, 0x67, 0x5a, 0x18, 0x25, 0x62, 0x5f, 0xec, 0xd1, 0x96, 0xab, 0xed, 0xd0, 0x97, 0xaa, 0x19, 0x24, 0x63, 0x5e, 0xef, 0xd2, 0x95, 0xa8, 0x1b, 0x26, 0x61, 0x5c, 0x1a, 0x27, 0x60, 0x5d, 0xee, 0xd3, 0x94, 0xa9, 0x10, 0x2d, 0x6a, 0x57, 0xe4, 0xd9, 0x9e, 0xa3, 0xe5, 0xd8, 0x9f, 0xa2, 0x11, 0x2c, 0x6b, 0x56, 0xe7, 0xda, 0x9d, 0xa0, 0x13, 0x2e, 0x69, 0x54, 0x12, 0x2f, 0x68, 0x55, 0xe6, 0xdb, 0x9c, 0xa1, 0xe3, 0xde, 0x99, 0xa4, 0x17, 0x2a, 0x6d, 0x50, 0x16, 0x2b, 0x6c, 0x51, 0xe2, 0xdf, 0x98, 0xa5, 0x14, 0x29, 0x6e, 0x53, 0xe0, 0xdd, 0x9a, 0xa7, 0xe1, 0xdc, 0x9b, 0xa6, 0x15, 0x28, 0x6f, 0x52},
+ {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57, 0xc7, 0xf9, 0xbb, 0x85, 0x3f, 0x1, 0x43, 0x7d, 0x2a, 0x14, 0x56, 0x68, 0xd2, 0xec, 0xae, 0x90, 0x93, 0xad, 0xef, 0xd1, 0x6b, 0x55, 0x17, 0x29, 0x7e, 0x40, 0x2, 0x3c, 0x86, 0xb8, 0xfa, 0xc4, 0x54, 0x6a, 0x28, 0x16, 0xac, 0x92, 0xd0, 0xee, 0xb9, 0x87, 0xc5, 0xfb, 0x41, 0x7f, 0x3d, 0x3, 0x3b, 0x5, 0x47, 0x79, 0xc3, 0xfd, 0xbf, 0x81, 0xd6, 0xe8, 0xaa, 0x94, 0x2e, 0x10, 0x52, 0x6c, 0xfc, 0xc2, 0x80, 0xbe, 0x4, 0x3a, 0x78, 0x46, 0x11, 0x2f, 0x6d, 0x53, 0xe9, 0xd7, 0x95, 0xab, 0xa8, 0x96, 0xd4, 0xea, 0x50, 0x6e, 0x2c, 0x12, 0x45, 0x7b, 0x39, 0x7, 0xbd, 0x83, 0xc1, 0xff, 0x6f, 0x51, 0x13, 0x2d, 0x97, 0xa9, 0xeb, 0xd5, 0x82, 0xbc, 0xfe, 0xc0, 0x7a, 0x44, 0x6, 0x38, 0x76, 0x48, 0xa, 0x34, 0x8e, 0xb0, 0xf2, 0xcc, 0x9b, 0xa5, 0xe7, 0xd9, 0x63, 0x5d, 0x1f, 0x21, 0xb1, 0x8f, 0xcd, 0xf3, 0x49, 0x77, 0x35, 0xb, 0x5c, 0x62, 0x20, 0x1e, 0xa4, 0x9a, 0xd8, 0xe6, 0xe5, 0xdb, 0x99, 0xa7, 0x1d, 0x23, 0x61, 0x5f, 0x8, 0x36, 0x74, 0x4a, 0xf0, 0xce, 0x8c, 0xb2, 0x22, 0x1c, 0x5e, 0x60, 0xda, 0xe4, 0xa6, 0x98, 0xcf, 0xf1, 0xb3, 0x8d, 0x37, 0x9, 0x4b, 0x75, 0x4d, 0x73, 0x31, 0xf, 0xb5, 0x8b, 0xc9, 0xf7, 0xa0, 0x9e, 0xdc, 0xe2, 0x58, 0x66, 0x24, 0x1a, 0x8a, 0xb4, 0xf6, 0xc8, 0x72, 0x4c, 0xe, 0x30, 0x67, 0x59, 0x1b, 0x25, 0x9f, 0xa1, 0xe3, 0xdd, 0xde, 0xe0, 0xa2, 0x9c, 0x26, 0x18, 0x5a, 0x64, 0x33, 0xd, 0x4f, 0x71, 0xcb, 0xf5, 0xb7, 0x89, 0x19, 0x27, 0x65, 0x5b, 0xe1, 0xdf, 0x9d, 0xa3, 0xf4, 0xca, 0x88, 0xb6, 0xc, 0x32, 0x70, 0x4e},
+ {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58, 0xd7, 0xe8, 0xa9, 0x96, 0x2b, 0x14, 0x55, 0x6a, 0x32, 0xd, 0x4c, 0x73, 0xce, 0xf1, 0xb0, 0x8f, 0xb3, 0x8c, 0xcd, 0xf2, 0x4f, 0x70, 0x31, 0xe, 0x56, 0x69, 0x28, 0x17, 0xaa, 0x95, 0xd4, 0xeb, 0x64, 0x5b, 0x1a, 0x25, 0x98, 0xa7, 0xe6, 0xd9, 0x81, 0xbe, 0xff, 0xc0, 0x7d, 0x42, 0x3, 0x3c, 0x7b, 0x44, 0x5, 0x3a, 0x87, 0xb8, 0xf9, 0xc6, 0x9e, 0xa1, 0xe0, 0xdf, 0x62, 0x5d, 0x1c, 0x23, 0xac, 0x93, 0xd2, 0xed, 0x50, 0x6f, 0x2e, 0x11, 0x49, 0x76, 0x37, 0x8, 0xb5, 0x8a, 0xcb, 0xf4, 0xc8, 0xf7, 0xb6, 0x89, 0x34, 0xb, 0x4a, 0x75, 0x2d, 0x12, 0x53, 0x6c, 0xd1, 0xee, 0xaf, 0x90, 0x1f, 0x20, 0x61, 0x5e, 0xe3, 0xdc, 0x9d, 0xa2, 0xfa, 0xc5, 0x84, 0xbb, 0x6, 0x39, 0x78, 0x47, 0xf6, 0xc9, 0x88, 0xb7, 0xa, 0x35, 0x74, 0x4b, 0x13, 0x2c, 0x6d, 0x52, 0xef, 0xd0, 0x91, 0xae, 0x21, 0x1e, 0x5f, 0x60, 0xdd, 0xe2, 0xa3, 0x9c, 0xc4, 0xfb, 0xba, 0x85, 0x38, 0x7, 0x46, 0x79, 0x45, 0x7a, 0x3b, 0x4, 0xb9, 0x86, 0xc7, 0xf8, 0xa0, 0x9f, 0xde, 0xe1, 0x5c, 0x63, 0x22, 0x1d, 0x92, 0xad, 0xec, 0xd3, 0x6e, 0x51, 0x10, 0x2f, 0x77, 0x48, 0x9, 0x36, 0x8b, 0xb4, 0xf5, 0xca, 0x8d, 0xb2, 0xf3, 0xcc, 0x71, 0x4e, 0xf, 0x30, 0x68, 0x57, 0x16, 0x29, 0x94, 0xab, 0xea, 0xd5, 0x5a, 0x65, 0x24, 0x1b, 0xa6, 0x99, 0xd8, 0xe7, 0xbf, 0x80, 0xc1, 0xfe, 0x43, 0x7c, 0x3d, 0x2, 0x3e, 0x1, 0x40, 0x7f, 0xc2, 0xfd, 0xbc, 0x83, 0xdb, 0xe4, 0xa5, 0x9a, 0x27, 0x18, 0x59, 0x66, 0xe9, 0xd6, 0x97, 0xa8, 0x15, 0x2a, 0x6b, 0x54, 0xc, 0x33, 0x72, 0x4d, 0xf0, 0xcf, 0x8e, 0xb1},
+ {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7, 0x74, 0x34, 0xf4, 0xb4, 0x69, 0x29, 0xe9, 0xa9, 0x4e, 0xe, 0xce, 0x8e, 0x53, 0x13, 0xd3, 0x93, 0xe8, 0xa8, 0x68, 0x28, 0xf5, 0xb5, 0x75, 0x35, 0xd2, 0x92, 0x52, 0x12, 0xcf, 0x8f, 0x4f, 0xf, 0x9c, 0xdc, 0x1c, 0x5c, 0x81, 0xc1, 0x1, 0x41, 0xa6, 0xe6, 0x26, 0x66, 0xbb, 0xfb, 0x3b, 0x7b, 0xcd, 0x8d, 0x4d, 0xd, 0xd0, 0x90, 0x50, 0x10, 0xf7, 0xb7, 0x77, 0x37, 0xea, 0xaa, 0x6a, 0x2a, 0xb9, 0xf9, 0x39, 0x79, 0xa4, 0xe4, 0x24, 0x64, 0x83, 0xc3, 0x3, 0x43, 0x9e, 0xde, 0x1e, 0x5e, 0x25, 0x65, 0xa5, 0xe5, 0x38, 0x78, 0xb8, 0xf8, 0x1f, 0x5f, 0x9f, 0xdf, 0x2, 0x42, 0x82, 0xc2, 0x51, 0x11, 0xd1, 0x91, 0x4c, 0xc, 0xcc, 0x8c, 0x6b, 0x2b, 0xeb, 0xab, 0x76, 0x36, 0xf6, 0xb6, 0x87, 0xc7, 0x7, 0x47, 0x9a, 0xda, 0x1a, 0x5a, 0xbd, 0xfd, 0x3d, 0x7d, 0xa0, 0xe0, 0x20, 0x60, 0xf3, 0xb3, 0x73, 0x33, 0xee, 0xae, 0x6e, 0x2e, 0xc9, 0x89, 0x49, 0x9, 0xd4, 0x94, 0x54, 0x14, 0x6f, 0x2f, 0xef, 0xaf, 0x72, 0x32, 0xf2, 0xb2, 0x55, 0x15, 0xd5, 0x95, 0x48, 0x8, 0xc8, 0x88, 0x1b, 0x5b, 0x9b, 0xdb, 0x6, 0x46, 0x86, 0xc6, 0x21, 0x61, 0xa1, 0xe1, 0x3c, 0x7c, 0xbc, 0xfc, 0x4a, 0xa, 0xca, 0x8a, 0x57, 0x17, 0xd7, 0x97, 0x70, 0x30, 0xf0, 0xb0, 0x6d, 0x2d, 0xed, 0xad, 0x3e, 0x7e, 0xbe, 0xfe, 0x23, 0x63, 0xa3, 0xe3, 0x4, 0x44, 0x84, 0xc4, 0x19, 0x59, 0x99, 0xd9, 0xa2, 0xe2, 0x22, 0x62, 0xbf, 0xff, 0x3f, 0x7f, 0x98, 0xd8, 0x18, 0x58, 0x85, 0xc5, 0x5, 0x45, 0xd6, 0x96, 0x56, 0x16, 0xcb, 0x8b, 0x4b, 0xb, 0xec, 0xac, 0x6c, 0x2c, 0xf1, 0xb1, 0x71, 0x31},
+ {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8, 0x64, 0x25, 0xe6, 0xa7, 0x7d, 0x3c, 0xff, 0xbe, 0x56, 0x17, 0xd4, 0x95, 0x4f, 0xe, 0xcd, 0x8c, 0xc8, 0x89, 0x4a, 0xb, 0xd1, 0x90, 0x53, 0x12, 0xfa, 0xbb, 0x78, 0x39, 0xe3, 0xa2, 0x61, 0x20, 0xac, 0xed, 0x2e, 0x6f, 0xb5, 0xf4, 0x37, 0x76, 0x9e, 0xdf, 0x1c, 0x5d, 0x87, 0xc6, 0x5, 0x44, 0x8d, 0xcc, 0xf, 0x4e, 0x94, 0xd5, 0x16, 0x57, 0xbf, 0xfe, 0x3d, 0x7c, 0xa6, 0xe7, 0x24, 0x65, 0xe9, 0xa8, 0x6b, 0x2a, 0xf0, 0xb1, 0x72, 0x33, 0xdb, 0x9a, 0x59, 0x18, 0xc2, 0x83, 0x40, 0x1, 0x45, 0x4, 0xc7, 0x86, 0x5c, 0x1d, 0xde, 0x9f, 0x77, 0x36, 0xf5, 0xb4, 0x6e, 0x2f, 0xec, 0xad, 0x21, 0x60, 0xa3, 0xe2, 0x38, 0x79, 0xba, 0xfb, 0x13, 0x52, 0x91, 0xd0, 0xa, 0x4b, 0x88, 0xc9, 0x7, 0x46, 0x85, 0xc4, 0x1e, 0x5f, 0x9c, 0xdd, 0x35, 0x74, 0xb7, 0xf6, 0x2c, 0x6d, 0xae, 0xef, 0x63, 0x22, 0xe1, 0xa0, 0x7a, 0x3b, 0xf8, 0xb9, 0x51, 0x10, 0xd3, 0x92, 0x48, 0x9, 0xca, 0x8b, 0xcf, 0x8e, 0x4d, 0xc, 0xd6, 0x97, 0x54, 0x15, 0xfd, 0xbc, 0x7f, 0x3e, 0xe4, 0xa5, 0x66, 0x27, 0xab, 0xea, 0x29, 0x68, 0xb2, 0xf3, 0x30, 0x71, 0x99, 0xd8, 0x1b, 0x5a, 0x80, 0xc1, 0x2, 0x43, 0x8a, 0xcb, 0x8, 0x49, 0x93, 0xd2, 0x11, 0x50, 0xb8, 0xf9, 0x3a, 0x7b, 0xa1, 0xe0, 0x23, 0x62, 0xee, 0xaf, 0x6c, 0x2d, 0xf7, 0xb6, 0x75, 0x34, 0xdc, 0x9d, 0x5e, 0x1f, 0xc5, 0x84, 0x47, 0x6, 0x42, 0x3, 0xc0, 0x81, 0x5b, 0x1a, 0xd9, 0x98, 0x70, 0x31, 0xf2, 0xb3, 0x69, 0x28, 0xeb, 0xaa, 0x26, 0x67, 0xa4, 0xe5, 0x3f, 0x7e, 0xbd, 0xfc, 0x14, 0x55, 0x96, 0xd7, 0xd, 0x4c, 0x8f, 0xce},
+ {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9, 0x54, 0x16, 0xd0, 0x92, 0x41, 0x3, 0xc5, 0x87, 0x7e, 0x3c, 0xfa, 0xb8, 0x6b, 0x29, 0xef, 0xad, 0xa8, 0xea, 0x2c, 0x6e, 0xbd, 0xff, 0x39, 0x7b, 0x82, 0xc0, 0x6, 0x44, 0x97, 0xd5, 0x13, 0x51, 0xfc, 0xbe, 0x78, 0x3a, 0xe9, 0xab, 0x6d, 0x2f, 0xd6, 0x94, 0x52, 0x10, 0xc3, 0x81, 0x47, 0x5, 0x4d, 0xf, 0xc9, 0x8b, 0x58, 0x1a, 0xdc, 0x9e, 0x67, 0x25, 0xe3, 0xa1, 0x72, 0x30, 0xf6, 0xb4, 0x19, 0x5b, 0x9d, 0xdf, 0xc, 0x4e, 0x88, 0xca, 0x33, 0x71, 0xb7, 0xf5, 0x26, 0x64, 0xa2, 0xe0, 0xe5, 0xa7, 0x61, 0x23, 0xf0, 0xb2, 0x74, 0x36, 0xcf, 0x8d, 0x4b, 0x9, 0xda, 0x98, 0x5e, 0x1c, 0xb1, 0xf3, 0x35, 0x77, 0xa4, 0xe6, 0x20, 0x62, 0x9b, 0xd9, 0x1f, 0x5d, 0x8e, 0xcc, 0xa, 0x48, 0x9a, 0xd8, 0x1e, 0x5c, 0x8f, 0xcd, 0xb, 0x49, 0xb0, 0xf2, 0x34, 0x76, 0xa5, 0xe7, 0x21, 0x63, 0xce, 0x8c, 0x4a, 0x8, 0xdb, 0x99, 0x5f, 0x1d, 0xe4, 0xa6, 0x60, 0x22, 0xf1, 0xb3, 0x75, 0x37, 0x32, 0x70, 0xb6, 0xf4, 0x27, 0x65, 0xa3, 0xe1, 0x18, 0x5a, 0x9c, 0xde, 0xd, 0x4f, 0x89, 0xcb, 0x66, 0x24, 0xe2, 0xa0, 0x73, 0x31, 0xf7, 0xb5, 0x4c, 0xe, 0xc8, 0x8a, 0x59, 0x1b, 0xdd, 0x9f, 0xd7, 0x95, 0x53, 0x11, 0xc2, 0x80, 0x46, 0x4, 0xfd, 0xbf, 0x79, 0x3b, 0xe8, 0xaa, 0x6c, 0x2e, 0x83, 0xc1, 0x7, 0x45, 0x96, 0xd4, 0x12, 0x50, 0xa9, 0xeb, 0x2d, 0x6f, 0xbc, 0xfe, 0x38, 0x7a, 0x7f, 0x3d, 0xfb, 0xb9, 0x6a, 0x28, 0xee, 0xac, 0x55, 0x17, 0xd1, 0x93, 0x40, 0x2, 0xc4, 0x86, 0x2b, 0x69, 0xaf, 0xed, 0x3e, 0x7c, 0xba, 0xf8, 0x1, 0x43, 0x85, 0xc7, 0x14, 0x56, 0x90, 0xd2},
+ {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6, 0x44, 0x7, 0xc2, 0x81, 0x55, 0x16, 0xd3, 0x90, 0x66, 0x25, 0xe0, 0xa3, 0x77, 0x34, 0xf1, 0xb2, 0x88, 0xcb, 0xe, 0x4d, 0x99, 0xda, 0x1f, 0x5c, 0xaa, 0xe9, 0x2c, 0x6f, 0xbb, 0xf8, 0x3d, 0x7e, 0xcc, 0x8f, 0x4a, 0x9, 0xdd, 0x9e, 0x5b, 0x18, 0xee, 0xad, 0x68, 0x2b, 0xff, 0xbc, 0x79, 0x3a, 0xd, 0x4e, 0x8b, 0xc8, 0x1c, 0x5f, 0x9a, 0xd9, 0x2f, 0x6c, 0xa9, 0xea, 0x3e, 0x7d, 0xb8, 0xfb, 0x49, 0xa, 0xcf, 0x8c, 0x58, 0x1b, 0xde, 0x9d, 0x6b, 0x28, 0xed, 0xae, 0x7a, 0x39, 0xfc, 0xbf, 0x85, 0xc6, 0x3, 0x40, 0x94, 0xd7, 0x12, 0x51, 0xa7, 0xe4, 0x21, 0x62, 0xb6, 0xf5, 0x30, 0x73, 0xc1, 0x82, 0x47, 0x4, 0xd0, 0x93, 0x56, 0x15, 0xe3, 0xa0, 0x65, 0x26, 0xf2, 0xb1, 0x74, 0x37, 0x1a, 0x59, 0x9c, 0xdf, 0xb, 0x48, 0x8d, 0xce, 0x38, 0x7b, 0xbe, 0xfd, 0x29, 0x6a, 0xaf, 0xec, 0x5e, 0x1d, 0xd8, 0x9b, 0x4f, 0xc, 0xc9, 0x8a, 0x7c, 0x3f, 0xfa, 0xb9, 0x6d, 0x2e, 0xeb, 0xa8, 0x92, 0xd1, 0x14, 0x57, 0x83, 0xc0, 0x5, 0x46, 0xb0, 0xf3, 0x36, 0x75, 0xa1, 0xe2, 0x27, 0x64, 0xd6, 0x95, 0x50, 0x13, 0xc7, 0x84, 0x41, 0x2, 0xf4, 0xb7, 0x72, 0x31, 0xe5, 0xa6, 0x63, 0x20, 0x17, 0x54, 0x91, 0xd2, 0x6, 0x45, 0x80, 0xc3, 0x35, 0x76, 0xb3, 0xf0, 0x24, 0x67, 0xa2, 0xe1, 0x53, 0x10, 0xd5, 0x96, 0x42, 0x1, 0xc4, 0x87, 0x71, 0x32, 0xf7, 0xb4, 0x60, 0x23, 0xe6, 0xa5, 0x9f, 0xdc, 0x19, 0x5a, 0x8e, 0xcd, 0x8, 0x4b, 0xbd, 0xfe, 0x3b, 0x78, 0xac, 0xef, 0x2a, 0x69, 0xdb, 0x98, 0x5d, 0x1e, 0xca, 0x89, 0x4c, 0xf, 0xf9, 0xba, 0x7f, 0x3c, 0xe8, 0xab, 0x6e, 0x2d},
+ {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb, 0x34, 0x70, 0xbc, 0xf8, 0x39, 0x7d, 0xb1, 0xf5, 0x2e, 0x6a, 0xa6, 0xe2, 0x23, 0x67, 0xab, 0xef, 0x68, 0x2c, 0xe0, 0xa4, 0x65, 0x21, 0xed, 0xa9, 0x72, 0x36, 0xfa, 0xbe, 0x7f, 0x3b, 0xf7, 0xb3, 0x5c, 0x18, 0xd4, 0x90, 0x51, 0x15, 0xd9, 0x9d, 0x46, 0x2, 0xce, 0x8a, 0x4b, 0xf, 0xc3, 0x87, 0xd0, 0x94, 0x58, 0x1c, 0xdd, 0x99, 0x55, 0x11, 0xca, 0x8e, 0x42, 0x6, 0xc7, 0x83, 0x4f, 0xb, 0xe4, 0xa0, 0x6c, 0x28, 0xe9, 0xad, 0x61, 0x25, 0xfe, 0xba, 0x76, 0x32, 0xf3, 0xb7, 0x7b, 0x3f, 0xb8, 0xfc, 0x30, 0x74, 0xb5, 0xf1, 0x3d, 0x79, 0xa2, 0xe6, 0x2a, 0x6e, 0xaf, 0xeb, 0x27, 0x63, 0x8c, 0xc8, 0x4, 0x40, 0x81, 0xc5, 0x9, 0x4d, 0x96, 0xd2, 0x1e, 0x5a, 0x9b, 0xdf, 0x13, 0x57, 0xbd, 0xf9, 0x35, 0x71, 0xb0, 0xf4, 0x38, 0x7c, 0xa7, 0xe3, 0x2f, 0x6b, 0xaa, 0xee, 0x22, 0x66, 0x89, 0xcd, 0x1, 0x45, 0x84, 0xc0, 0xc, 0x48, 0x93, 0xd7, 0x1b, 0x5f, 0x9e, 0xda, 0x16, 0x52, 0xd5, 0x91, 0x5d, 0x19, 0xd8, 0x9c, 0x50, 0x14, 0xcf, 0x8b, 0x47, 0x3, 0xc2, 0x86, 0x4a, 0xe, 0xe1, 0xa5, 0x69, 0x2d, 0xec, 0xa8, 0x64, 0x20, 0xfb, 0xbf, 0x73, 0x37, 0xf6, 0xb2, 0x7e, 0x3a, 0x6d, 0x29, 0xe5, 0xa1, 0x60, 0x24, 0xe8, 0xac, 0x77, 0x33, 0xff, 0xbb, 0x7a, 0x3e, 0xf2, 0xb6, 0x59, 0x1d, 0xd1, 0x95, 0x54, 0x10, 0xdc, 0x98, 0x43, 0x7, 0xcb, 0x8f, 0x4e, 0xa, 0xc6, 0x82, 0x5, 0x41, 0x8d, 0xc9, 0x8, 0x4c, 0x80, 0xc4, 0x1f, 0x5b, 0x97, 0xd3, 0x12, 0x56, 0x9a, 0xde, 0x31, 0x75, 0xb9, 0xfd, 0x3c, 0x78, 0xb4, 0xf0, 0x2b, 0x6f, 0xa3, 0xe7, 0x26, 0x62, 0xae, 0xea},
+ {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4, 0x24, 0x61, 0xae, 0xeb, 0x2d, 0x68, 0xa7, 0xe2, 0x36, 0x73, 0xbc, 0xf9, 0x3f, 0x7a, 0xb5, 0xf0, 0x48, 0xd, 0xc2, 0x87, 0x41, 0x4, 0xcb, 0x8e, 0x5a, 0x1f, 0xd0, 0x95, 0x53, 0x16, 0xd9, 0x9c, 0x6c, 0x29, 0xe6, 0xa3, 0x65, 0x20, 0xef, 0xaa, 0x7e, 0x3b, 0xf4, 0xb1, 0x77, 0x32, 0xfd, 0xb8, 0x90, 0xd5, 0x1a, 0x5f, 0x99, 0xdc, 0x13, 0x56, 0x82, 0xc7, 0x8, 0x4d, 0x8b, 0xce, 0x1, 0x44, 0xb4, 0xf1, 0x3e, 0x7b, 0xbd, 0xf8, 0x37, 0x72, 0xa6, 0xe3, 0x2c, 0x69, 0xaf, 0xea, 0x25, 0x60, 0xd8, 0x9d, 0x52, 0x17, 0xd1, 0x94, 0x5b, 0x1e, 0xca, 0x8f, 0x40, 0x5, 0xc3, 0x86, 0x49, 0xc, 0xfc, 0xb9, 0x76, 0x33, 0xf5, 0xb0, 0x7f, 0x3a, 0xee, 0xab, 0x64, 0x21, 0xe7, 0xa2, 0x6d, 0x28, 0x3d, 0x78, 0xb7, 0xf2, 0x34, 0x71, 0xbe, 0xfb, 0x2f, 0x6a, 0xa5, 0xe0, 0x26, 0x63, 0xac, 0xe9, 0x19, 0x5c, 0x93, 0xd6, 0x10, 0x55, 0x9a, 0xdf, 0xb, 0x4e, 0x81, 0xc4, 0x2, 0x47, 0x88, 0xcd, 0x75, 0x30, 0xff, 0xba, 0x7c, 0x39, 0xf6, 0xb3, 0x67, 0x22, 0xed, 0xa8, 0x6e, 0x2b, 0xe4, 0xa1, 0x51, 0x14, 0xdb, 0x9e, 0x58, 0x1d, 0xd2, 0x97, 0x43, 0x6, 0xc9, 0x8c, 0x4a, 0xf, 0xc0, 0x85, 0xad, 0xe8, 0x27, 0x62, 0xa4, 0xe1, 0x2e, 0x6b, 0xbf, 0xfa, 0x35, 0x70, 0xb6, 0xf3, 0x3c, 0x79, 0x89, 0xcc, 0x3, 0x46, 0x80, 0xc5, 0xa, 0x4f, 0x9b, 0xde, 0x11, 0x54, 0x92, 0xd7, 0x18, 0x5d, 0xe5, 0xa0, 0x6f, 0x2a, 0xec, 0xa9, 0x66, 0x23, 0xf7, 0xb2, 0x7d, 0x38, 0xfe, 0xbb, 0x74, 0x31, 0xc1, 0x84, 0x4b, 0xe, 0xc8, 0x8d, 0x42, 0x7, 0xd3, 0x96, 0x59, 0x1c, 0xda, 0x9f, 0x50, 0x15},
+ {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5, 0x14, 0x52, 0x98, 0xde, 0x11, 0x57, 0x9d, 0xdb, 0x1e, 0x58, 0x92, 0xd4, 0x1b, 0x5d, 0x97, 0xd1, 0x28, 0x6e, 0xa4, 0xe2, 0x2d, 0x6b, 0xa1, 0xe7, 0x22, 0x64, 0xae, 0xe8, 0x27, 0x61, 0xab, 0xed, 0x3c, 0x7a, 0xb0, 0xf6, 0x39, 0x7f, 0xb5, 0xf3, 0x36, 0x70, 0xba, 0xfc, 0x33, 0x75, 0xbf, 0xf9, 0x50, 0x16, 0xdc, 0x9a, 0x55, 0x13, 0xd9, 0x9f, 0x5a, 0x1c, 0xd6, 0x90, 0x5f, 0x19, 0xd3, 0x95, 0x44, 0x2, 0xc8, 0x8e, 0x41, 0x7, 0xcd, 0x8b, 0x4e, 0x8, 0xc2, 0x84, 0x4b, 0xd, 0xc7, 0x81, 0x78, 0x3e, 0xf4, 0xb2, 0x7d, 0x3b, 0xf1, 0xb7, 0x72, 0x34, 0xfe, 0xb8, 0x77, 0x31, 0xfb, 0xbd, 0x6c, 0x2a, 0xe0, 0xa6, 0x69, 0x2f, 0xe5, 0xa3, 0x66, 0x20, 0xea, 0xac, 0x63, 0x25, 0xef, 0xa9, 0xa0, 0xe6, 0x2c, 0x6a, 0xa5, 0xe3, 0x29, 0x6f, 0xaa, 0xec, 0x26, 0x60, 0xaf, 0xe9, 0x23, 0x65, 0xb4, 0xf2, 0x38, 0x7e, 0xb1, 0xf7, 0x3d, 0x7b, 0xbe, 0xf8, 0x32, 0x74, 0xbb, 0xfd, 0x37, 0x71, 0x88, 0xce, 0x4, 0x42, 0x8d, 0xcb, 0x1, 0x47, 0x82, 0xc4, 0xe, 0x48, 0x87, 0xc1, 0xb, 0x4d, 0x9c, 0xda, 0x10, 0x56, 0x99, 0xdf, 0x15, 0x53, 0x96, 0xd0, 0x1a, 0x5c, 0x93, 0xd5, 0x1f, 0x59, 0xf0, 0xb6, 0x7c, 0x3a, 0xf5, 0xb3, 0x79, 0x3f, 0xfa, 0xbc, 0x76, 0x30, 0xff, 0xb9, 0x73, 0x35, 0xe4, 0xa2, 0x68, 0x2e, 0xe1, 0xa7, 0x6d, 0x2b, 0xee, 0xa8, 0x62, 0x24, 0xeb, 0xad, 0x67, 0x21, 0xd8, 0x9e, 0x54, 0x12, 0xdd, 0x9b, 0x51, 0x17, 0xd2, 0x94, 0x5e, 0x18, 0xd7, 0x91, 0x5b, 0x1d, 0xcc, 0x8a, 0x40, 0x6, 0xc9, 0x8f, 0x45, 0x3, 0xc6, 0x80, 0x4a, 0xc, 0xc3, 0x85, 0x4f, 0x9},
+ {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca, 0x4, 0x43, 0x8a, 0xcd, 0x5, 0x42, 0x8b, 0xcc, 0x6, 0x41, 0x88, 0xcf, 0x7, 0x40, 0x89, 0xce, 0x8, 0x4f, 0x86, 0xc1, 0x9, 0x4e, 0x87, 0xc0, 0xa, 0x4d, 0x84, 0xc3, 0xb, 0x4c, 0x85, 0xc2, 0xc, 0x4b, 0x82, 0xc5, 0xd, 0x4a, 0x83, 0xc4, 0xe, 0x49, 0x80, 0xc7, 0xf, 0x48, 0x81, 0xc6, 0x10, 0x57, 0x9e, 0xd9, 0x11, 0x56, 0x9f, 0xd8, 0x12, 0x55, 0x9c, 0xdb, 0x13, 0x54, 0x9d, 0xda, 0x14, 0x53, 0x9a, 0xdd, 0x15, 0x52, 0x9b, 0xdc, 0x16, 0x51, 0x98, 0xdf, 0x17, 0x50, 0x99, 0xde, 0x18, 0x5f, 0x96, 0xd1, 0x19, 0x5e, 0x97, 0xd0, 0x1a, 0x5d, 0x94, 0xd3, 0x1b, 0x5c, 0x95, 0xd2, 0x1c, 0x5b, 0x92, 0xd5, 0x1d, 0x5a, 0x93, 0xd4, 0x1e, 0x59, 0x90, 0xd7, 0x1f, 0x58, 0x91, 0xd6, 0x20, 0x67, 0xae, 0xe9, 0x21, 0x66, 0xaf, 0xe8, 0x22, 0x65, 0xac, 0xeb, 0x23, 0x64, 0xad, 0xea, 0x24, 0x63, 0xaa, 0xed, 0x25, 0x62, 0xab, 0xec, 0x26, 0x61, 0xa8, 0xef, 0x27, 0x60, 0xa9, 0xee, 0x28, 0x6f, 0xa6, 0xe1, 0x29, 0x6e, 0xa7, 0xe0, 0x2a, 0x6d, 0xa4, 0xe3, 0x2b, 0x6c, 0xa5, 0xe2, 0x2c, 0x6b, 0xa2, 0xe5, 0x2d, 0x6a, 0xa3, 0xe4, 0x2e, 0x69, 0xa0, 0xe7, 0x2f, 0x68, 0xa1, 0xe6, 0x30, 0x77, 0xbe, 0xf9, 0x31, 0x76, 0xbf, 0xf8, 0x32, 0x75, 0xbc, 0xfb, 0x33, 0x74, 0xbd, 0xfa, 0x34, 0x73, 0xba, 0xfd, 0x35, 0x72, 0xbb, 0xfc, 0x36, 0x71, 0xb8, 0xff, 0x37, 0x70, 0xb9, 0xfe, 0x38, 0x7f, 0xb6, 0xf1, 0x39, 0x7e, 0xb7, 0xf0, 0x3a, 0x7d, 0xb4, 0xf3, 0x3b, 0x7c, 0xb5, 0xf2, 0x3c, 0x7b, 0xb2, 0xf5, 0x3d, 0x7a, 0xb3, 0xf4, 0x3e, 0x79, 0xb0, 0xf7, 0x3f, 0x78, 0xb1, 0xf6},
+ {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f, 0xf4, 0xbc, 0x64, 0x2c, 0xc9, 0x81, 0x59, 0x11, 0x8e, 0xc6, 0x1e, 0x56, 0xb3, 0xfb, 0x23, 0x6b, 0xf5, 0xbd, 0x65, 0x2d, 0xc8, 0x80, 0x58, 0x10, 0x8f, 0xc7, 0x1f, 0x57, 0xb2, 0xfa, 0x22, 0x6a, 0x1, 0x49, 0x91, 0xd9, 0x3c, 0x74, 0xac, 0xe4, 0x7b, 0x33, 0xeb, 0xa3, 0x46, 0xe, 0xd6, 0x9e, 0xf7, 0xbf, 0x67, 0x2f, 0xca, 0x82, 0x5a, 0x12, 0x8d, 0xc5, 0x1d, 0x55, 0xb0, 0xf8, 0x20, 0x68, 0x3, 0x4b, 0x93, 0xdb, 0x3e, 0x76, 0xae, 0xe6, 0x79, 0x31, 0xe9, 0xa1, 0x44, 0xc, 0xd4, 0x9c, 0x2, 0x4a, 0x92, 0xda, 0x3f, 0x77, 0xaf, 0xe7, 0x78, 0x30, 0xe8, 0xa0, 0x45, 0xd, 0xd5, 0x9d, 0xf6, 0xbe, 0x66, 0x2e, 0xcb, 0x83, 0x5b, 0x13, 0x8c, 0xc4, 0x1c, 0x54, 0xb1, 0xf9, 0x21, 0x69, 0xf3, 0xbb, 0x63, 0x2b, 0xce, 0x86, 0x5e, 0x16, 0x89, 0xc1, 0x19, 0x51, 0xb4, 0xfc, 0x24, 0x6c, 0x7, 0x4f, 0x97, 0xdf, 0x3a, 0x72, 0xaa, 0xe2, 0x7d, 0x35, 0xed, 0xa5, 0x40, 0x8, 0xd0, 0x98, 0x6, 0x4e, 0x96, 0xde, 0x3b, 0x73, 0xab, 0xe3, 0x7c, 0x34, 0xec, 0xa4, 0x41, 0x9, 0xd1, 0x99, 0xf2, 0xba, 0x62, 0x2a, 0xcf, 0x87, 0x5f, 0x17, 0x88, 0xc0, 0x18, 0x50, 0xb5, 0xfd, 0x25, 0x6d, 0x4, 0x4c, 0x94, 0xdc, 0x39, 0x71, 0xa9, 0xe1, 0x7e, 0x36, 0xee, 0xa6, 0x43, 0xb, 0xd3, 0x9b, 0xf0, 0xb8, 0x60, 0x28, 0xcd, 0x85, 0x5d, 0x15, 0x8a, 0xc2, 0x1a, 0x52, 0xb7, 0xff, 0x27, 0x6f, 0xf1, 0xb9, 0x61, 0x29, 0xcc, 0x84, 0x5c, 0x14, 0x8b, 0xc3, 0x1b, 0x53, 0xb6, 0xfe, 0x26, 0x6e, 0x5, 0x4d, 0x95, 0xdd, 0x38, 0x70, 0xa8, 0xe0, 0x7f, 0x37, 0xef, 0xa7, 0x42, 0xa, 0xd2, 0x9a},
+ {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90, 0xe4, 0xad, 0x76, 0x3f, 0xdd, 0x94, 0x4f, 0x6, 0x96, 0xdf, 0x4, 0x4d, 0xaf, 0xe6, 0x3d, 0x74, 0xd5, 0x9c, 0x47, 0xe, 0xec, 0xa5, 0x7e, 0x37, 0xa7, 0xee, 0x35, 0x7c, 0x9e, 0xd7, 0xc, 0x45, 0x31, 0x78, 0xa3, 0xea, 0x8, 0x41, 0x9a, 0xd3, 0x43, 0xa, 0xd1, 0x98, 0x7a, 0x33, 0xe8, 0xa1, 0xb7, 0xfe, 0x25, 0x6c, 0x8e, 0xc7, 0x1c, 0x55, 0xc5, 0x8c, 0x57, 0x1e, 0xfc, 0xb5, 0x6e, 0x27, 0x53, 0x1a, 0xc1, 0x88, 0x6a, 0x23, 0xf8, 0xb1, 0x21, 0x68, 0xb3, 0xfa, 0x18, 0x51, 0x8a, 0xc3, 0x62, 0x2b, 0xf0, 0xb9, 0x5b, 0x12, 0xc9, 0x80, 0x10, 0x59, 0x82, 0xcb, 0x29, 0x60, 0xbb, 0xf2, 0x86, 0xcf, 0x14, 0x5d, 0xbf, 0xf6, 0x2d, 0x64, 0xf4, 0xbd, 0x66, 0x2f, 0xcd, 0x84, 0x5f, 0x16, 0x73, 0x3a, 0xe1, 0xa8, 0x4a, 0x3, 0xd8, 0x91, 0x1, 0x48, 0x93, 0xda, 0x38, 0x71, 0xaa, 0xe3, 0x97, 0xde, 0x5, 0x4c, 0xae, 0xe7, 0x3c, 0x75, 0xe5, 0xac, 0x77, 0x3e, 0xdc, 0x95, 0x4e, 0x7, 0xa6, 0xef, 0x34, 0x7d, 0x9f, 0xd6, 0xd, 0x44, 0xd4, 0x9d, 0x46, 0xf, 0xed, 0xa4, 0x7f, 0x36, 0x42, 0xb, 0xd0, 0x99, 0x7b, 0x32, 0xe9, 0xa0, 0x30, 0x79, 0xa2, 0xeb, 0x9, 0x40, 0x9b, 0xd2, 0xc4, 0x8d, 0x56, 0x1f, 0xfd, 0xb4, 0x6f, 0x26, 0xb6, 0xff, 0x24, 0x6d, 0x8f, 0xc6, 0x1d, 0x54, 0x20, 0x69, 0xb2, 0xfb, 0x19, 0x50, 0x8b, 0xc2, 0x52, 0x1b, 0xc0, 0x89, 0x6b, 0x22, 0xf9, 0xb0, 0x11, 0x58, 0x83, 0xca, 0x28, 0x61, 0xba, 0xf3, 0x63, 0x2a, 0xf1, 0xb8, 0x5a, 0x13, 0xc8, 0x81, 0xf5, 0xbc, 0x67, 0x2e, 0xcc, 0x85, 0x5e, 0x17, 0x87, 0xce, 0x15, 0x5c, 0xbe, 0xf7, 0x2c, 0x65},
+ {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81, 0xd4, 0x9e, 0x40, 0xa, 0xe1, 0xab, 0x75, 0x3f, 0xbe, 0xf4, 0x2a, 0x60, 0x8b, 0xc1, 0x1f, 0x55, 0xb5, 0xff, 0x21, 0x6b, 0x80, 0xca, 0x14, 0x5e, 0xdf, 0x95, 0x4b, 0x1, 0xea, 0xa0, 0x7e, 0x34, 0x61, 0x2b, 0xf5, 0xbf, 0x54, 0x1e, 0xc0, 0x8a, 0xb, 0x41, 0x9f, 0xd5, 0x3e, 0x74, 0xaa, 0xe0, 0x77, 0x3d, 0xe3, 0xa9, 0x42, 0x8, 0xd6, 0x9c, 0x1d, 0x57, 0x89, 0xc3, 0x28, 0x62, 0xbc, 0xf6, 0xa3, 0xe9, 0x37, 0x7d, 0x96, 0xdc, 0x2, 0x48, 0xc9, 0x83, 0x5d, 0x17, 0xfc, 0xb6, 0x68, 0x22, 0xc2, 0x88, 0x56, 0x1c, 0xf7, 0xbd, 0x63, 0x29, 0xa8, 0xe2, 0x3c, 0x76, 0x9d, 0xd7, 0x9, 0x43, 0x16, 0x5c, 0x82, 0xc8, 0x23, 0x69, 0xb7, 0xfd, 0x7c, 0x36, 0xe8, 0xa2, 0x49, 0x3, 0xdd, 0x97, 0xee, 0xa4, 0x7a, 0x30, 0xdb, 0x91, 0x4f, 0x5, 0x84, 0xce, 0x10, 0x5a, 0xb1, 0xfb, 0x25, 0x6f, 0x3a, 0x70, 0xae, 0xe4, 0xf, 0x45, 0x9b, 0xd1, 0x50, 0x1a, 0xc4, 0x8e, 0x65, 0x2f, 0xf1, 0xbb, 0x5b, 0x11, 0xcf, 0x85, 0x6e, 0x24, 0xfa, 0xb0, 0x31, 0x7b, 0xa5, 0xef, 0x4, 0x4e, 0x90, 0xda, 0x8f, 0xc5, 0x1b, 0x51, 0xba, 0xf0, 0x2e, 0x64, 0xe5, 0xaf, 0x71, 0x3b, 0xd0, 0x9a, 0x44, 0xe, 0x99, 0xd3, 0xd, 0x47, 0xac, 0xe6, 0x38, 0x72, 0xf3, 0xb9, 0x67, 0x2d, 0xc6, 0x8c, 0x52, 0x18, 0x4d, 0x7, 0xd9, 0x93, 0x78, 0x32, 0xec, 0xa6, 0x27, 0x6d, 0xb3, 0xf9, 0x12, 0x58, 0x86, 0xcc, 0x2c, 0x66, 0xb8, 0xf2, 0x19, 0x53, 0x8d, 0xc7, 0x46, 0xc, 0xd2, 0x98, 0x73, 0x39, 0xe7, 0xad, 0xf8, 0xb2, 0x6c, 0x26, 0xcd, 0x87, 0x59, 0x13, 0x92, 0xd8, 0x6, 0x4c, 0xa7, 0xed, 0x33, 0x79},
+ {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e, 0xc4, 0x8f, 0x52, 0x19, 0xf5, 0xbe, 0x63, 0x28, 0xa6, 0xed, 0x30, 0x7b, 0x97, 0xdc, 0x1, 0x4a, 0x95, 0xde, 0x3, 0x48, 0xa4, 0xef, 0x32, 0x79, 0xf7, 0xbc, 0x61, 0x2a, 0xc6, 0x8d, 0x50, 0x1b, 0x51, 0x1a, 0xc7, 0x8c, 0x60, 0x2b, 0xf6, 0xbd, 0x33, 0x78, 0xa5, 0xee, 0x2, 0x49, 0x94, 0xdf, 0x37, 0x7c, 0xa1, 0xea, 0x6, 0x4d, 0x90, 0xdb, 0x55, 0x1e, 0xc3, 0x88, 0x64, 0x2f, 0xf2, 0xb9, 0xf3, 0xb8, 0x65, 0x2e, 0xc2, 0x89, 0x54, 0x1f, 0x91, 0xda, 0x7, 0x4c, 0xa0, 0xeb, 0x36, 0x7d, 0xa2, 0xe9, 0x34, 0x7f, 0x93, 0xd8, 0x5, 0x4e, 0xc0, 0x8b, 0x56, 0x1d, 0xf1, 0xba, 0x67, 0x2c, 0x66, 0x2d, 0xf0, 0xbb, 0x57, 0x1c, 0xc1, 0x8a, 0x4, 0x4f, 0x92, 0xd9, 0x35, 0x7e, 0xa3, 0xe8, 0x6e, 0x25, 0xf8, 0xb3, 0x5f, 0x14, 0xc9, 0x82, 0xc, 0x47, 0x9a, 0xd1, 0x3d, 0x76, 0xab, 0xe0, 0xaa, 0xe1, 0x3c, 0x77, 0x9b, 0xd0, 0xd, 0x46, 0xc8, 0x83, 0x5e, 0x15, 0xf9, 0xb2, 0x6f, 0x24, 0xfb, 0xb0, 0x6d, 0x26, 0xca, 0x81, 0x5c, 0x17, 0x99, 0xd2, 0xf, 0x44, 0xa8, 0xe3, 0x3e, 0x75, 0x3f, 0x74, 0xa9, 0xe2, 0xe, 0x45, 0x98, 0xd3, 0x5d, 0x16, 0xcb, 0x80, 0x6c, 0x27, 0xfa, 0xb1, 0x59, 0x12, 0xcf, 0x84, 0x68, 0x23, 0xfe, 0xb5, 0x3b, 0x70, 0xad, 0xe6, 0xa, 0x41, 0x9c, 0xd7, 0x9d, 0xd6, 0xb, 0x40, 0xac, 0xe7, 0x3a, 0x71, 0xff, 0xb4, 0x69, 0x22, 0xce, 0x85, 0x58, 0x13, 0xcc, 0x87, 0x5a, 0x11, 0xfd, 0xb6, 0x6b, 0x20, 0xae, 0xe5, 0x38, 0x73, 0x9f, 0xd4, 0x9, 0x42, 0x8, 0x43, 0x9e, 0xd5, 0x39, 0x72, 0xaf, 0xe4, 0x6a, 0x21, 0xfc, 0xb7, 0x5b, 0x10, 0xcd, 0x86},
+ {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3, 0xb4, 0xf8, 0x2c, 0x60, 0x99, 0xd5, 0x1, 0x4d, 0xee, 0xa2, 0x76, 0x3a, 0xc3, 0x8f, 0x5b, 0x17, 0x75, 0x39, 0xed, 0xa1, 0x58, 0x14, 0xc0, 0x8c, 0x2f, 0x63, 0xb7, 0xfb, 0x2, 0x4e, 0x9a, 0xd6, 0xc1, 0x8d, 0x59, 0x15, 0xec, 0xa0, 0x74, 0x38, 0x9b, 0xd7, 0x3, 0x4f, 0xb6, 0xfa, 0x2e, 0x62, 0xea, 0xa6, 0x72, 0x3e, 0xc7, 0x8b, 0x5f, 0x13, 0xb0, 0xfc, 0x28, 0x64, 0x9d, 0xd1, 0x5, 0x49, 0x5e, 0x12, 0xc6, 0x8a, 0x73, 0x3f, 0xeb, 0xa7, 0x4, 0x48, 0x9c, 0xd0, 0x29, 0x65, 0xb1, 0xfd, 0x9f, 0xd3, 0x7, 0x4b, 0xb2, 0xfe, 0x2a, 0x66, 0xc5, 0x89, 0x5d, 0x11, 0xe8, 0xa4, 0x70, 0x3c, 0x2b, 0x67, 0xb3, 0xff, 0x6, 0x4a, 0x9e, 0xd2, 0x71, 0x3d, 0xe9, 0xa5, 0x5c, 0x10, 0xc4, 0x88, 0xc9, 0x85, 0x51, 0x1d, 0xe4, 0xa8, 0x7c, 0x30, 0x93, 0xdf, 0xb, 0x47, 0xbe, 0xf2, 0x26, 0x6a, 0x7d, 0x31, 0xe5, 0xa9, 0x50, 0x1c, 0xc8, 0x84, 0x27, 0x6b, 0xbf, 0xf3, 0xa, 0x46, 0x92, 0xde, 0xbc, 0xf0, 0x24, 0x68, 0x91, 0xdd, 0x9, 0x45, 0xe6, 0xaa, 0x7e, 0x32, 0xcb, 0x87, 0x53, 0x1f, 0x8, 0x44, 0x90, 0xdc, 0x25, 0x69, 0xbd, 0xf1, 0x52, 0x1e, 0xca, 0x86, 0x7f, 0x33, 0xe7, 0xab, 0x23, 0x6f, 0xbb, 0xf7, 0xe, 0x42, 0x96, 0xda, 0x79, 0x35, 0xe1, 0xad, 0x54, 0x18, 0xcc, 0x80, 0x97, 0xdb, 0xf, 0x43, 0xba, 0xf6, 0x22, 0x6e, 0xcd, 0x81, 0x55, 0x19, 0xe0, 0xac, 0x78, 0x34, 0x56, 0x1a, 0xce, 0x82, 0x7b, 0x37, 0xe3, 0xaf, 0xc, 0x40, 0x94, 0xd8, 0x21, 0x6d, 0xb9, 0xf5, 0xe2, 0xae, 0x7a, 0x36, 0xcf, 0x83, 0x57, 0x1b, 0xb8, 0xf4, 0x20, 0x6c, 0x95, 0xd9, 0xd, 0x41},
+ {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac, 0xa4, 0xe9, 0x3e, 0x73, 0x8d, 0xc0, 0x17, 0x5a, 0xf6, 0xbb, 0x6c, 0x21, 0xdf, 0x92, 0x45, 0x8, 0x55, 0x18, 0xcf, 0x82, 0x7c, 0x31, 0xe6, 0xab, 0x7, 0x4a, 0x9d, 0xd0, 0x2e, 0x63, 0xb4, 0xf9, 0xf1, 0xbc, 0x6b, 0x26, 0xd8, 0x95, 0x42, 0xf, 0xa3, 0xee, 0x39, 0x74, 0x8a, 0xc7, 0x10, 0x5d, 0xaa, 0xe7, 0x30, 0x7d, 0x83, 0xce, 0x19, 0x54, 0xf8, 0xb5, 0x62, 0x2f, 0xd1, 0x9c, 0x4b, 0x6, 0xe, 0x43, 0x94, 0xd9, 0x27, 0x6a, 0xbd, 0xf0, 0x5c, 0x11, 0xc6, 0x8b, 0x75, 0x38, 0xef, 0xa2, 0xff, 0xb2, 0x65, 0x28, 0xd6, 0x9b, 0x4c, 0x1, 0xad, 0xe0, 0x37, 0x7a, 0x84, 0xc9, 0x1e, 0x53, 0x5b, 0x16, 0xc1, 0x8c, 0x72, 0x3f, 0xe8, 0xa5, 0x9, 0x44, 0x93, 0xde, 0x20, 0x6d, 0xba, 0xf7, 0x49, 0x4, 0xd3, 0x9e, 0x60, 0x2d, 0xfa, 0xb7, 0x1b, 0x56, 0x81, 0xcc, 0x32, 0x7f, 0xa8, 0xe5, 0xed, 0xa0, 0x77, 0x3a, 0xc4, 0x89, 0x5e, 0x13, 0xbf, 0xf2, 0x25, 0x68, 0x96, 0xdb, 0xc, 0x41, 0x1c, 0x51, 0x86, 0xcb, 0x35, 0x78, 0xaf, 0xe2, 0x4e, 0x3, 0xd4, 0x99, 0x67, 0x2a, 0xfd, 0xb0, 0xb8, 0xf5, 0x22, 0x6f, 0x91, 0xdc, 0xb, 0x46, 0xea, 0xa7, 0x70, 0x3d, 0xc3, 0x8e, 0x59, 0x14, 0xe3, 0xae, 0x79, 0x34, 0xca, 0x87, 0x50, 0x1d, 0xb1, 0xfc, 0x2b, 0x66, 0x98, 0xd5, 0x2, 0x4f, 0x47, 0xa, 0xdd, 0x90, 0x6e, 0x23, 0xf4, 0xb9, 0x15, 0x58, 0x8f, 0xc2, 0x3c, 0x71, 0xa6, 0xeb, 0xb6, 0xfb, 0x2c, 0x61, 0x9f, 0xd2, 0x5, 0x48, 0xe4, 0xa9, 0x7e, 0x33, 0xcd, 0x80, 0x57, 0x1a, 0x12, 0x5f, 0x88, 0xc5, 0x3b, 0x76, 0xa1, 0xec, 0x40, 0xd, 0xda, 0x97, 0x69, 0x24, 0xf3, 0xbe},
+ {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd, 0x94, 0xda, 0x8, 0x46, 0xb1, 0xff, 0x2d, 0x63, 0xde, 0x90, 0x42, 0xc, 0xfb, 0xb5, 0x67, 0x29, 0x35, 0x7b, 0xa9, 0xe7, 0x10, 0x5e, 0x8c, 0xc2, 0x7f, 0x31, 0xe3, 0xad, 0x5a, 0x14, 0xc6, 0x88, 0xa1, 0xef, 0x3d, 0x73, 0x84, 0xca, 0x18, 0x56, 0xeb, 0xa5, 0x77, 0x39, 0xce, 0x80, 0x52, 0x1c, 0x6a, 0x24, 0xf6, 0xb8, 0x4f, 0x1, 0xd3, 0x9d, 0x20, 0x6e, 0xbc, 0xf2, 0x5, 0x4b, 0x99, 0xd7, 0xfe, 0xb0, 0x62, 0x2c, 0xdb, 0x95, 0x47, 0x9, 0xb4, 0xfa, 0x28, 0x66, 0x91, 0xdf, 0xd, 0x43, 0x5f, 0x11, 0xc3, 0x8d, 0x7a, 0x34, 0xe6, 0xa8, 0x15, 0x5b, 0x89, 0xc7, 0x30, 0x7e, 0xac, 0xe2, 0xcb, 0x85, 0x57, 0x19, 0xee, 0xa0, 0x72, 0x3c, 0x81, 0xcf, 0x1d, 0x53, 0xa4, 0xea, 0x38, 0x76, 0xd4, 0x9a, 0x48, 0x6, 0xf1, 0xbf, 0x6d, 0x23, 0x9e, 0xd0, 0x2, 0x4c, 0xbb, 0xf5, 0x27, 0x69, 0x40, 0xe, 0xdc, 0x92, 0x65, 0x2b, 0xf9, 0xb7, 0xa, 0x44, 0x96, 0xd8, 0x2f, 0x61, 0xb3, 0xfd, 0xe1, 0xaf, 0x7d, 0x33, 0xc4, 0x8a, 0x58, 0x16, 0xab, 0xe5, 0x37, 0x79, 0x8e, 0xc0, 0x12, 0x5c, 0x75, 0x3b, 0xe9, 0xa7, 0x50, 0x1e, 0xcc, 0x82, 0x3f, 0x71, 0xa3, 0xed, 0x1a, 0x54, 0x86, 0xc8, 0xbe, 0xf0, 0x22, 0x6c, 0x9b, 0xd5, 0x7, 0x49, 0xf4, 0xba, 0x68, 0x26, 0xd1, 0x9f, 0x4d, 0x3, 0x2a, 0x64, 0xb6, 0xf8, 0xf, 0x41, 0x93, 0xdd, 0x60, 0x2e, 0xfc, 0xb2, 0x45, 0xb, 0xd9, 0x97, 0x8b, 0xc5, 0x17, 0x59, 0xae, 0xe0, 0x32, 0x7c, 0xc1, 0x8f, 0x5d, 0x13, 0xe4, 0xaa, 0x78, 0x36, 0x1f, 0x51, 0x83, 0xcd, 0x3a, 0x74, 0xa6, 0xe8, 0x55, 0x1b, 0xc9, 0x87, 0x70, 0x3e, 0xec, 0xa2},
+ {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2, 0x84, 0xcb, 0x1a, 0x55, 0xa5, 0xea, 0x3b, 0x74, 0xc6, 0x89, 0x58, 0x17, 0xe7, 0xa8, 0x79, 0x36, 0x15, 0x5a, 0x8b, 0xc4, 0x34, 0x7b, 0xaa, 0xe5, 0x57, 0x18, 0xc9, 0x86, 0x76, 0x39, 0xe8, 0xa7, 0x91, 0xde, 0xf, 0x40, 0xb0, 0xff, 0x2e, 0x61, 0xd3, 0x9c, 0x4d, 0x2, 0xf2, 0xbd, 0x6c, 0x23, 0x2a, 0x65, 0xb4, 0xfb, 0xb, 0x44, 0x95, 0xda, 0x68, 0x27, 0xf6, 0xb9, 0x49, 0x6, 0xd7, 0x98, 0xae, 0xe1, 0x30, 0x7f, 0x8f, 0xc0, 0x11, 0x5e, 0xec, 0xa3, 0x72, 0x3d, 0xcd, 0x82, 0x53, 0x1c, 0x3f, 0x70, 0xa1, 0xee, 0x1e, 0x51, 0x80, 0xcf, 0x7d, 0x32, 0xe3, 0xac, 0x5c, 0x13, 0xc2, 0x8d, 0xbb, 0xf4, 0x25, 0x6a, 0x9a, 0xd5, 0x4, 0x4b, 0xf9, 0xb6, 0x67, 0x28, 0xd8, 0x97, 0x46, 0x9, 0x54, 0x1b, 0xca, 0x85, 0x75, 0x3a, 0xeb, 0xa4, 0x16, 0x59, 0x88, 0xc7, 0x37, 0x78, 0xa9, 0xe6, 0xd0, 0x9f, 0x4e, 0x1, 0xf1, 0xbe, 0x6f, 0x20, 0x92, 0xdd, 0xc, 0x43, 0xb3, 0xfc, 0x2d, 0x62, 0x41, 0xe, 0xdf, 0x90, 0x60, 0x2f, 0xfe, 0xb1, 0x3, 0x4c, 0x9d, 0xd2, 0x22, 0x6d, 0xbc, 0xf3, 0xc5, 0x8a, 0x5b, 0x14, 0xe4, 0xab, 0x7a, 0x35, 0x87, 0xc8, 0x19, 0x56, 0xa6, 0xe9, 0x38, 0x77, 0x7e, 0x31, 0xe0, 0xaf, 0x5f, 0x10, 0xc1, 0x8e, 0x3c, 0x73, 0xa2, 0xed, 0x1d, 0x52, 0x83, 0xcc, 0xfa, 0xb5, 0x64, 0x2b, 0xdb, 0x94, 0x45, 0xa, 0xb8, 0xf7, 0x26, 0x69, 0x99, 0xd6, 0x7, 0x48, 0x6b, 0x24, 0xf5, 0xba, 0x4a, 0x5, 0xd4, 0x9b, 0x29, 0x66, 0xb7, 0xf8, 0x8, 0x47, 0x96, 0xd9, 0xef, 0xa0, 0x71, 0x3e, 0xce, 0x81, 0x50, 0x1f, 0xad, 0xe2, 0x33, 0x7c, 0x8c, 0xc3, 0x12, 0x5d},
+ {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17, 0x69, 0x39, 0xc9, 0x99, 0x34, 0x64, 0x94, 0xc4, 0xd3, 0x83, 0x73, 0x23, 0x8e, 0xde, 0x2e, 0x7e, 0xd2, 0x82, 0x72, 0x22, 0x8f, 0xdf, 0x2f, 0x7f, 0x68, 0x38, 0xc8, 0x98, 0x35, 0x65, 0x95, 0xc5, 0xbb, 0xeb, 0x1b, 0x4b, 0xe6, 0xb6, 0x46, 0x16, 0x1, 0x51, 0xa1, 0xf1, 0x5c, 0xc, 0xfc, 0xac, 0xb9, 0xe9, 0x19, 0x49, 0xe4, 0xb4, 0x44, 0x14, 0x3, 0x53, 0xa3, 0xf3, 0x5e, 0xe, 0xfe, 0xae, 0xd0, 0x80, 0x70, 0x20, 0x8d, 0xdd, 0x2d, 0x7d, 0x6a, 0x3a, 0xca, 0x9a, 0x37, 0x67, 0x97, 0xc7, 0x6b, 0x3b, 0xcb, 0x9b, 0x36, 0x66, 0x96, 0xc6, 0xd1, 0x81, 0x71, 0x21, 0x8c, 0xdc, 0x2c, 0x7c, 0x2, 0x52, 0xa2, 0xf2, 0x5f, 0xf, 0xff, 0xaf, 0xb8, 0xe8, 0x18, 0x48, 0xe5, 0xb5, 0x45, 0x15, 0x6f, 0x3f, 0xcf, 0x9f, 0x32, 0x62, 0x92, 0xc2, 0xd5, 0x85, 0x75, 0x25, 0x88, 0xd8, 0x28, 0x78, 0x6, 0x56, 0xa6, 0xf6, 0x5b, 0xb, 0xfb, 0xab, 0xbc, 0xec, 0x1c, 0x4c, 0xe1, 0xb1, 0x41, 0x11, 0xbd, 0xed, 0x1d, 0x4d, 0xe0, 0xb0, 0x40, 0x10, 0x7, 0x57, 0xa7, 0xf7, 0x5a, 0xa, 0xfa, 0xaa, 0xd4, 0x84, 0x74, 0x24, 0x89, 0xd9, 0x29, 0x79, 0x6e, 0x3e, 0xce, 0x9e, 0x33, 0x63, 0x93, 0xc3, 0xd6, 0x86, 0x76, 0x26, 0x8b, 0xdb, 0x2b, 0x7b, 0x6c, 0x3c, 0xcc, 0x9c, 0x31, 0x61, 0x91, 0xc1, 0xbf, 0xef, 0x1f, 0x4f, 0xe2, 0xb2, 0x42, 0x12, 0x5, 0x55, 0xa5, 0xf5, 0x58, 0x8, 0xf8, 0xa8, 0x4, 0x54, 0xa4, 0xf4, 0x59, 0x9, 0xf9, 0xa9, 0xbe, 0xee, 0x1e, 0x4e, 0xe3, 0xb3, 0x43, 0x13, 0x6d, 0x3d, 0xcd, 0x9d, 0x30, 0x60, 0x90, 0xc0, 0xd7, 0x87, 0x77, 0x27, 0x8a, 0xda, 0x2a, 0x7a},
+ {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18, 0x79, 0x28, 0xdb, 0x8a, 0x20, 0x71, 0x82, 0xd3, 0xcb, 0x9a, 0x69, 0x38, 0x92, 0xc3, 0x30, 0x61, 0xf2, 0xa3, 0x50, 0x1, 0xab, 0xfa, 0x9, 0x58, 0x40, 0x11, 0xe2, 0xb3, 0x19, 0x48, 0xbb, 0xea, 0x8b, 0xda, 0x29, 0x78, 0xd2, 0x83, 0x70, 0x21, 0x39, 0x68, 0x9b, 0xca, 0x60, 0x31, 0xc2, 0x93, 0xf9, 0xa8, 0x5b, 0xa, 0xa0, 0xf1, 0x2, 0x53, 0x4b, 0x1a, 0xe9, 0xb8, 0x12, 0x43, 0xb0, 0xe1, 0x80, 0xd1, 0x22, 0x73, 0xd9, 0x88, 0x7b, 0x2a, 0x32, 0x63, 0x90, 0xc1, 0x6b, 0x3a, 0xc9, 0x98, 0xb, 0x5a, 0xa9, 0xf8, 0x52, 0x3, 0xf0, 0xa1, 0xb9, 0xe8, 0x1b, 0x4a, 0xe0, 0xb1, 0x42, 0x13, 0x72, 0x23, 0xd0, 0x81, 0x2b, 0x7a, 0x89, 0xd8, 0xc0, 0x91, 0x62, 0x33, 0x99, 0xc8, 0x3b, 0x6a, 0xef, 0xbe, 0x4d, 0x1c, 0xb6, 0xe7, 0x14, 0x45, 0x5d, 0xc, 0xff, 0xae, 0x4, 0x55, 0xa6, 0xf7, 0x96, 0xc7, 0x34, 0x65, 0xcf, 0x9e, 0x6d, 0x3c, 0x24, 0x75, 0x86, 0xd7, 0x7d, 0x2c, 0xdf, 0x8e, 0x1d, 0x4c, 0xbf, 0xee, 0x44, 0x15, 0xe6, 0xb7, 0xaf, 0xfe, 0xd, 0x5c, 0xf6, 0xa7, 0x54, 0x5, 0x64, 0x35, 0xc6, 0x97, 0x3d, 0x6c, 0x9f, 0xce, 0xd6, 0x87, 0x74, 0x25, 0x8f, 0xde, 0x2d, 0x7c, 0x16, 0x47, 0xb4, 0xe5, 0x4f, 0x1e, 0xed, 0xbc, 0xa4, 0xf5, 0x6, 0x57, 0xfd, 0xac, 0x5f, 0xe, 0x6f, 0x3e, 0xcd, 0x9c, 0x36, 0x67, 0x94, 0xc5, 0xdd, 0x8c, 0x7f, 0x2e, 0x84, 0xd5, 0x26, 0x77, 0xe4, 0xb5, 0x46, 0x17, 0xbd, 0xec, 0x1f, 0x4e, 0x56, 0x7, 0xf4, 0xa5, 0xf, 0x5e, 0xad, 0xfc, 0x9d, 0xcc, 0x3f, 0x6e, 0xc4, 0x95, 0x66, 0x37, 0x2f, 0x7e, 0x8d, 0xdc, 0x76, 0x27, 0xd4, 0x85},
+ {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9, 0x49, 0x1b, 0xed, 0xbf, 0x1c, 0x4e, 0xb8, 0xea, 0xe3, 0xb1, 0x47, 0x15, 0xb6, 0xe4, 0x12, 0x40, 0x92, 0xc0, 0x36, 0x64, 0xc7, 0x95, 0x63, 0x31, 0x38, 0x6a, 0x9c, 0xce, 0x6d, 0x3f, 0xc9, 0x9b, 0xdb, 0x89, 0x7f, 0x2d, 0x8e, 0xdc, 0x2a, 0x78, 0x71, 0x23, 0xd5, 0x87, 0x24, 0x76, 0x80, 0xd2, 0x39, 0x6b, 0x9d, 0xcf, 0x6c, 0x3e, 0xc8, 0x9a, 0x93, 0xc1, 0x37, 0x65, 0xc6, 0x94, 0x62, 0x30, 0x70, 0x22, 0xd4, 0x86, 0x25, 0x77, 0x81, 0xd3, 0xda, 0x88, 0x7e, 0x2c, 0x8f, 0xdd, 0x2b, 0x79, 0xab, 0xf9, 0xf, 0x5d, 0xfe, 0xac, 0x5a, 0x8, 0x1, 0x53, 0xa5, 0xf7, 0x54, 0x6, 0xf0, 0xa2, 0xe2, 0xb0, 0x46, 0x14, 0xb7, 0xe5, 0x13, 0x41, 0x48, 0x1a, 0xec, 0xbe, 0x1d, 0x4f, 0xb9, 0xeb, 0x72, 0x20, 0xd6, 0x84, 0x27, 0x75, 0x83, 0xd1, 0xd8, 0x8a, 0x7c, 0x2e, 0x8d, 0xdf, 0x29, 0x7b, 0x3b, 0x69, 0x9f, 0xcd, 0x6e, 0x3c, 0xca, 0x98, 0x91, 0xc3, 0x35, 0x67, 0xc4, 0x96, 0x60, 0x32, 0xe0, 0xb2, 0x44, 0x16, 0xb5, 0xe7, 0x11, 0x43, 0x4a, 0x18, 0xee, 0xbc, 0x1f, 0x4d, 0xbb, 0xe9, 0xa9, 0xfb, 0xd, 0x5f, 0xfc, 0xae, 0x58, 0xa, 0x3, 0x51, 0xa7, 0xf5, 0x56, 0x4, 0xf2, 0xa0, 0x4b, 0x19, 0xef, 0xbd, 0x1e, 0x4c, 0xba, 0xe8, 0xe1, 0xb3, 0x45, 0x17, 0xb4, 0xe6, 0x10, 0x42, 0x2, 0x50, 0xa6, 0xf4, 0x57, 0x5, 0xf3, 0xa1, 0xa8, 0xfa, 0xc, 0x5e, 0xfd, 0xaf, 0x59, 0xb, 0xd9, 0x8b, 0x7d, 0x2f, 0x8c, 0xde, 0x28, 0x7a, 0x73, 0x21, 0xd7, 0x85, 0x26, 0x74, 0x82, 0xd0, 0x90, 0xc2, 0x34, 0x66, 0xc5, 0x97, 0x61, 0x33, 0x3a, 0x68, 0x9e, 0xcc, 0x6f, 0x3d, 0xcb, 0x99},
+ {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6, 0x59, 0xa, 0xff, 0xac, 0x8, 0x5b, 0xae, 0xfd, 0xfb, 0xa8, 0x5d, 0xe, 0xaa, 0xf9, 0xc, 0x5f, 0xb2, 0xe1, 0x14, 0x47, 0xe3, 0xb0, 0x45, 0x16, 0x10, 0x43, 0xb6, 0xe5, 0x41, 0x12, 0xe7, 0xb4, 0xeb, 0xb8, 0x4d, 0x1e, 0xba, 0xe9, 0x1c, 0x4f, 0x49, 0x1a, 0xef, 0xbc, 0x18, 0x4b, 0xbe, 0xed, 0x79, 0x2a, 0xdf, 0x8c, 0x28, 0x7b, 0x8e, 0xdd, 0xdb, 0x88, 0x7d, 0x2e, 0x8a, 0xd9, 0x2c, 0x7f, 0x20, 0x73, 0x86, 0xd5, 0x71, 0x22, 0xd7, 0x84, 0x82, 0xd1, 0x24, 0x77, 0xd3, 0x80, 0x75, 0x26, 0xcb, 0x98, 0x6d, 0x3e, 0x9a, 0xc9, 0x3c, 0x6f, 0x69, 0x3a, 0xcf, 0x9c, 0x38, 0x6b, 0x9e, 0xcd, 0x92, 0xc1, 0x34, 0x67, 0xc3, 0x90, 0x65, 0x36, 0x30, 0x63, 0x96, 0xc5, 0x61, 0x32, 0xc7, 0x94, 0xf2, 0xa1, 0x54, 0x7, 0xa3, 0xf0, 0x5, 0x56, 0x50, 0x3, 0xf6, 0xa5, 0x1, 0x52, 0xa7, 0xf4, 0xab, 0xf8, 0xd, 0x5e, 0xfa, 0xa9, 0x5c, 0xf, 0x9, 0x5a, 0xaf, 0xfc, 0x58, 0xb, 0xfe, 0xad, 0x40, 0x13, 0xe6, 0xb5, 0x11, 0x42, 0xb7, 0xe4, 0xe2, 0xb1, 0x44, 0x17, 0xb3, 0xe0, 0x15, 0x46, 0x19, 0x4a, 0xbf, 0xec, 0x48, 0x1b, 0xee, 0xbd, 0xbb, 0xe8, 0x1d, 0x4e, 0xea, 0xb9, 0x4c, 0x1f, 0x8b, 0xd8, 0x2d, 0x7e, 0xda, 0x89, 0x7c, 0x2f, 0x29, 0x7a, 0x8f, 0xdc, 0x78, 0x2b, 0xde, 0x8d, 0xd2, 0x81, 0x74, 0x27, 0x83, 0xd0, 0x25, 0x76, 0x70, 0x23, 0xd6, 0x85, 0x21, 0x72, 0x87, 0xd4, 0x39, 0x6a, 0x9f, 0xcc, 0x68, 0x3b, 0xce, 0x9d, 0x9b, 0xc8, 0x3d, 0x6e, 0xca, 0x99, 0x6c, 0x3f, 0x60, 0x33, 0xc6, 0x95, 0x31, 0x62, 0x97, 0xc4, 0xc2, 0x91, 0x64, 0x37, 0x93, 0xc0, 0x35, 0x66},
+ {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b, 0x29, 0x7d, 0x81, 0xd5, 0x64, 0x30, 0xcc, 0x98, 0xb3, 0xe7, 0x1b, 0x4f, 0xfe, 0xaa, 0x56, 0x2, 0x52, 0x6, 0xfa, 0xae, 0x1f, 0x4b, 0xb7, 0xe3, 0xc8, 0x9c, 0x60, 0x34, 0x85, 0xd1, 0x2d, 0x79, 0x7b, 0x2f, 0xd3, 0x87, 0x36, 0x62, 0x9e, 0xca, 0xe1, 0xb5, 0x49, 0x1d, 0xac, 0xf8, 0x4, 0x50, 0xa4, 0xf0, 0xc, 0x58, 0xe9, 0xbd, 0x41, 0x15, 0x3e, 0x6a, 0x96, 0xc2, 0x73, 0x27, 0xdb, 0x8f, 0x8d, 0xd9, 0x25, 0x71, 0xc0, 0x94, 0x68, 0x3c, 0x17, 0x43, 0xbf, 0xeb, 0x5a, 0xe, 0xf2, 0xa6, 0xf6, 0xa2, 0x5e, 0xa, 0xbb, 0xef, 0x13, 0x47, 0x6c, 0x38, 0xc4, 0x90, 0x21, 0x75, 0x89, 0xdd, 0xdf, 0x8b, 0x77, 0x23, 0x92, 0xc6, 0x3a, 0x6e, 0x45, 0x11, 0xed, 0xb9, 0x8, 0x5c, 0xa0, 0xf4, 0x55, 0x1, 0xfd, 0xa9, 0x18, 0x4c, 0xb0, 0xe4, 0xcf, 0x9b, 0x67, 0x33, 0x82, 0xd6, 0x2a, 0x7e, 0x7c, 0x28, 0xd4, 0x80, 0x31, 0x65, 0x99, 0xcd, 0xe6, 0xb2, 0x4e, 0x1a, 0xab, 0xff, 0x3, 0x57, 0x7, 0x53, 0xaf, 0xfb, 0x4a, 0x1e, 0xe2, 0xb6, 0x9d, 0xc9, 0x35, 0x61, 0xd0, 0x84, 0x78, 0x2c, 0x2e, 0x7a, 0x86, 0xd2, 0x63, 0x37, 0xcb, 0x9f, 0xb4, 0xe0, 0x1c, 0x48, 0xf9, 0xad, 0x51, 0x5, 0xf1, 0xa5, 0x59, 0xd, 0xbc, 0xe8, 0x14, 0x40, 0x6b, 0x3f, 0xc3, 0x97, 0x26, 0x72, 0x8e, 0xda, 0xd8, 0x8c, 0x70, 0x24, 0x95, 0xc1, 0x3d, 0x69, 0x42, 0x16, 0xea, 0xbe, 0xf, 0x5b, 0xa7, 0xf3, 0xa3, 0xf7, 0xb, 0x5f, 0xee, 0xba, 0x46, 0x12, 0x39, 0x6d, 0x91, 0xc5, 0x74, 0x20, 0xdc, 0x88, 0x8a, 0xde, 0x22, 0x76, 0xc7, 0x93, 0x6f, 0x3b, 0x10, 0x44, 0xb8, 0xec, 0x5d, 0x9, 0xf5, 0xa1},
+ {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24, 0x39, 0x6c, 0x93, 0xc6, 0x70, 0x25, 0xda, 0x8f, 0xab, 0xfe, 0x1, 0x54, 0xe2, 0xb7, 0x48, 0x1d, 0x72, 0x27, 0xd8, 0x8d, 0x3b, 0x6e, 0x91, 0xc4, 0xe0, 0xb5, 0x4a, 0x1f, 0xa9, 0xfc, 0x3, 0x56, 0x4b, 0x1e, 0xe1, 0xb4, 0x2, 0x57, 0xa8, 0xfd, 0xd9, 0x8c, 0x73, 0x26, 0x90, 0xc5, 0x3a, 0x6f, 0xe4, 0xb1, 0x4e, 0x1b, 0xad, 0xf8, 0x7, 0x52, 0x76, 0x23, 0xdc, 0x89, 0x3f, 0x6a, 0x95, 0xc0, 0xdd, 0x88, 0x77, 0x22, 0x94, 0xc1, 0x3e, 0x6b, 0x4f, 0x1a, 0xe5, 0xb0, 0x6, 0x53, 0xac, 0xf9, 0x96, 0xc3, 0x3c, 0x69, 0xdf, 0x8a, 0x75, 0x20, 0x4, 0x51, 0xae, 0xfb, 0x4d, 0x18, 0xe7, 0xb2, 0xaf, 0xfa, 0x5, 0x50, 0xe6, 0xb3, 0x4c, 0x19, 0x3d, 0x68, 0x97, 0xc2, 0x74, 0x21, 0xde, 0x8b, 0xd5, 0x80, 0x7f, 0x2a, 0x9c, 0xc9, 0x36, 0x63, 0x47, 0x12, 0xed, 0xb8, 0xe, 0x5b, 0xa4, 0xf1, 0xec, 0xb9, 0x46, 0x13, 0xa5, 0xf0, 0xf, 0x5a, 0x7e, 0x2b, 0xd4, 0x81, 0x37, 0x62, 0x9d, 0xc8, 0xa7, 0xf2, 0xd, 0x58, 0xee, 0xbb, 0x44, 0x11, 0x35, 0x60, 0x9f, 0xca, 0x7c, 0x29, 0xd6, 0x83, 0x9e, 0xcb, 0x34, 0x61, 0xd7, 0x82, 0x7d, 0x28, 0xc, 0x59, 0xa6, 0xf3, 0x45, 0x10, 0xef, 0xba, 0x31, 0x64, 0x9b, 0xce, 0x78, 0x2d, 0xd2, 0x87, 0xa3, 0xf6, 0x9, 0x5c, 0xea, 0xbf, 0x40, 0x15, 0x8, 0x5d, 0xa2, 0xf7, 0x41, 0x14, 0xeb, 0xbe, 0x9a, 0xcf, 0x30, 0x65, 0xd3, 0x86, 0x79, 0x2c, 0x43, 0x16, 0xe9, 0xbc, 0xa, 0x5f, 0xa0, 0xf5, 0xd1, 0x84, 0x7b, 0x2e, 0x98, 0xcd, 0x32, 0x67, 0x7a, 0x2f, 0xd0, 0x85, 0x33, 0x66, 0x99, 0xcc, 0xe8, 0xbd, 0x42, 0x17, 0xa1, 0xf4, 0xb, 0x5e},
+ {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35, 0x9, 0x5f, 0xa5, 0xf3, 0x4c, 0x1a, 0xe0, 0xb6, 0x83, 0xd5, 0x2f, 0x79, 0xc6, 0x90, 0x6a, 0x3c, 0x12, 0x44, 0xbe, 0xe8, 0x57, 0x1, 0xfb, 0xad, 0x98, 0xce, 0x34, 0x62, 0xdd, 0x8b, 0x71, 0x27, 0x1b, 0x4d, 0xb7, 0xe1, 0x5e, 0x8, 0xf2, 0xa4, 0x91, 0xc7, 0x3d, 0x6b, 0xd4, 0x82, 0x78, 0x2e, 0x24, 0x72, 0x88, 0xde, 0x61, 0x37, 0xcd, 0x9b, 0xae, 0xf8, 0x2, 0x54, 0xeb, 0xbd, 0x47, 0x11, 0x2d, 0x7b, 0x81, 0xd7, 0x68, 0x3e, 0xc4, 0x92, 0xa7, 0xf1, 0xb, 0x5d, 0xe2, 0xb4, 0x4e, 0x18, 0x36, 0x60, 0x9a, 0xcc, 0x73, 0x25, 0xdf, 0x89, 0xbc, 0xea, 0x10, 0x46, 0xf9, 0xaf, 0x55, 0x3, 0x3f, 0x69, 0x93, 0xc5, 0x7a, 0x2c, 0xd6, 0x80, 0xb5, 0xe3, 0x19, 0x4f, 0xf0, 0xa6, 0x5c, 0xa, 0x48, 0x1e, 0xe4, 0xb2, 0xd, 0x5b, 0xa1, 0xf7, 0xc2, 0x94, 0x6e, 0x38, 0x87, 0xd1, 0x2b, 0x7d, 0x41, 0x17, 0xed, 0xbb, 0x4, 0x52, 0xa8, 0xfe, 0xcb, 0x9d, 0x67, 0x31, 0x8e, 0xd8, 0x22, 0x74, 0x5a, 0xc, 0xf6, 0xa0, 0x1f, 0x49, 0xb3, 0xe5, 0xd0, 0x86, 0x7c, 0x2a, 0x95, 0xc3, 0x39, 0x6f, 0x53, 0x5, 0xff, 0xa9, 0x16, 0x40, 0xba, 0xec, 0xd9, 0x8f, 0x75, 0x23, 0x9c, 0xca, 0x30, 0x66, 0x6c, 0x3a, 0xc0, 0x96, 0x29, 0x7f, 0x85, 0xd3, 0xe6, 0xb0, 0x4a, 0x1c, 0xa3, 0xf5, 0xf, 0x59, 0x65, 0x33, 0xc9, 0x9f, 0x20, 0x76, 0x8c, 0xda, 0xef, 0xb9, 0x43, 0x15, 0xaa, 0xfc, 0x6, 0x50, 0x7e, 0x28, 0xd2, 0x84, 0x3b, 0x6d, 0x97, 0xc1, 0xf4, 0xa2, 0x58, 0xe, 0xb1, 0xe7, 0x1d, 0x4b, 0x77, 0x21, 0xdb, 0x8d, 0x32, 0x64, 0x9e, 0xc8, 0xfd, 0xab, 0x51, 0x7, 0xb8, 0xee, 0x14, 0x42},
+ {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a, 0x19, 0x4e, 0xb7, 0xe0, 0x58, 0xf, 0xf6, 0xa1, 0x9b, 0xcc, 0x35, 0x62, 0xda, 0x8d, 0x74, 0x23, 0x32, 0x65, 0x9c, 0xcb, 0x73, 0x24, 0xdd, 0x8a, 0xb0, 0xe7, 0x1e, 0x49, 0xf1, 0xa6, 0x5f, 0x8, 0x2b, 0x7c, 0x85, 0xd2, 0x6a, 0x3d, 0xc4, 0x93, 0xa9, 0xfe, 0x7, 0x50, 0xe8, 0xbf, 0x46, 0x11, 0x64, 0x33, 0xca, 0x9d, 0x25, 0x72, 0x8b, 0xdc, 0xe6, 0xb1, 0x48, 0x1f, 0xa7, 0xf0, 0x9, 0x5e, 0x7d, 0x2a, 0xd3, 0x84, 0x3c, 0x6b, 0x92, 0xc5, 0xff, 0xa8, 0x51, 0x6, 0xbe, 0xe9, 0x10, 0x47, 0x56, 0x1, 0xf8, 0xaf, 0x17, 0x40, 0xb9, 0xee, 0xd4, 0x83, 0x7a, 0x2d, 0x95, 0xc2, 0x3b, 0x6c, 0x4f, 0x18, 0xe1, 0xb6, 0xe, 0x59, 0xa0, 0xf7, 0xcd, 0x9a, 0x63, 0x34, 0x8c, 0xdb, 0x22, 0x75, 0xc8, 0x9f, 0x66, 0x31, 0x89, 0xde, 0x27, 0x70, 0x4a, 0x1d, 0xe4, 0xb3, 0xb, 0x5c, 0xa5, 0xf2, 0xd1, 0x86, 0x7f, 0x28, 0x90, 0xc7, 0x3e, 0x69, 0x53, 0x4, 0xfd, 0xaa, 0x12, 0x45, 0xbc, 0xeb, 0xfa, 0xad, 0x54, 0x3, 0xbb, 0xec, 0x15, 0x42, 0x78, 0x2f, 0xd6, 0x81, 0x39, 0x6e, 0x97, 0xc0, 0xe3, 0xb4, 0x4d, 0x1a, 0xa2, 0xf5, 0xc, 0x5b, 0x61, 0x36, 0xcf, 0x98, 0x20, 0x77, 0x8e, 0xd9, 0xac, 0xfb, 0x2, 0x55, 0xed, 0xba, 0x43, 0x14, 0x2e, 0x79, 0x80, 0xd7, 0x6f, 0x38, 0xc1, 0x96, 0xb5, 0xe2, 0x1b, 0x4c, 0xf4, 0xa3, 0x5a, 0xd, 0x37, 0x60, 0x99, 0xce, 0x76, 0x21, 0xd8, 0x8f, 0x9e, 0xc9, 0x30, 0x67, 0xdf, 0x88, 0x71, 0x26, 0x1c, 0x4b, 0xb2, 0xe5, 0x5d, 0xa, 0xf3, 0xa4, 0x87, 0xd0, 0x29, 0x7e, 0xc6, 0x91, 0x68, 0x3f, 0x5, 0x52, 0xab, 0xfc, 0x44, 0x13, 0xea, 0xbd},
+ {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f, 0xe9, 0xb1, 0x59, 0x1, 0x94, 0xcc, 0x24, 0x7c, 0x13, 0x4b, 0xa3, 0xfb, 0x6e, 0x36, 0xde, 0x86, 0xcf, 0x97, 0x7f, 0x27, 0xb2, 0xea, 0x2, 0x5a, 0x35, 0x6d, 0x85, 0xdd, 0x48, 0x10, 0xf8, 0xa0, 0x26, 0x7e, 0x96, 0xce, 0x5b, 0x3, 0xeb, 0xb3, 0xdc, 0x84, 0x6c, 0x34, 0xa1, 0xf9, 0x11, 0x49, 0x83, 0xdb, 0x33, 0x6b, 0xfe, 0xa6, 0x4e, 0x16, 0x79, 0x21, 0xc9, 0x91, 0x4, 0x5c, 0xb4, 0xec, 0x6a, 0x32, 0xda, 0x82, 0x17, 0x4f, 0xa7, 0xff, 0x90, 0xc8, 0x20, 0x78, 0xed, 0xb5, 0x5d, 0x5, 0x4c, 0x14, 0xfc, 0xa4, 0x31, 0x69, 0x81, 0xd9, 0xb6, 0xee, 0x6, 0x5e, 0xcb, 0x93, 0x7b, 0x23, 0xa5, 0xfd, 0x15, 0x4d, 0xd8, 0x80, 0x68, 0x30, 0x5f, 0x7, 0xef, 0xb7, 0x22, 0x7a, 0x92, 0xca, 0x1b, 0x43, 0xab, 0xf3, 0x66, 0x3e, 0xd6, 0x8e, 0xe1, 0xb9, 0x51, 0x9, 0x9c, 0xc4, 0x2c, 0x74, 0xf2, 0xaa, 0x42, 0x1a, 0x8f, 0xd7, 0x3f, 0x67, 0x8, 0x50, 0xb8, 0xe0, 0x75, 0x2d, 0xc5, 0x9d, 0xd4, 0x8c, 0x64, 0x3c, 0xa9, 0xf1, 0x19, 0x41, 0x2e, 0x76, 0x9e, 0xc6, 0x53, 0xb, 0xe3, 0xbb, 0x3d, 0x65, 0x8d, 0xd5, 0x40, 0x18, 0xf0, 0xa8, 0xc7, 0x9f, 0x77, 0x2f, 0xba, 0xe2, 0xa, 0x52, 0x98, 0xc0, 0x28, 0x70, 0xe5, 0xbd, 0x55, 0xd, 0x62, 0x3a, 0xd2, 0x8a, 0x1f, 0x47, 0xaf, 0xf7, 0x71, 0x29, 0xc1, 0x99, 0xc, 0x54, 0xbc, 0xe4, 0x8b, 0xd3, 0x3b, 0x63, 0xf6, 0xae, 0x46, 0x1e, 0x57, 0xf, 0xe7, 0xbf, 0x2a, 0x72, 0x9a, 0xc2, 0xad, 0xf5, 0x1d, 0x45, 0xd0, 0x88, 0x60, 0x38, 0xbe, 0xe6, 0xe, 0x56, 0xc3, 0x9b, 0x73, 0x2b, 0x44, 0x1c, 0xf4, 0xac, 0x39, 0x61, 0x89, 0xd1},
+ {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60, 0xf9, 0xa0, 0x4b, 0x12, 0x80, 0xd9, 0x32, 0x6b, 0xb, 0x52, 0xb9, 0xe0, 0x72, 0x2b, 0xc0, 0x99, 0xef, 0xb6, 0x5d, 0x4, 0x96, 0xcf, 0x24, 0x7d, 0x1d, 0x44, 0xaf, 0xf6, 0x64, 0x3d, 0xd6, 0x8f, 0x16, 0x4f, 0xa4, 0xfd, 0x6f, 0x36, 0xdd, 0x84, 0xe4, 0xbd, 0x56, 0xf, 0x9d, 0xc4, 0x2f, 0x76, 0xc3, 0x9a, 0x71, 0x28, 0xba, 0xe3, 0x8, 0x51, 0x31, 0x68, 0x83, 0xda, 0x48, 0x11, 0xfa, 0xa3, 0x3a, 0x63, 0x88, 0xd1, 0x43, 0x1a, 0xf1, 0xa8, 0xc8, 0x91, 0x7a, 0x23, 0xb1, 0xe8, 0x3, 0x5a, 0x2c, 0x75, 0x9e, 0xc7, 0x55, 0xc, 0xe7, 0xbe, 0xde, 0x87, 0x6c, 0x35, 0xa7, 0xfe, 0x15, 0x4c, 0xd5, 0x8c, 0x67, 0x3e, 0xac, 0xf5, 0x1e, 0x47, 0x27, 0x7e, 0x95, 0xcc, 0x5e, 0x7, 0xec, 0xb5, 0x9b, 0xc2, 0x29, 0x70, 0xe2, 0xbb, 0x50, 0x9, 0x69, 0x30, 0xdb, 0x82, 0x10, 0x49, 0xa2, 0xfb, 0x62, 0x3b, 0xd0, 0x89, 0x1b, 0x42, 0xa9, 0xf0, 0x90, 0xc9, 0x22, 0x7b, 0xe9, 0xb0, 0x5b, 0x2, 0x74, 0x2d, 0xc6, 0x9f, 0xd, 0x54, 0xbf, 0xe6, 0x86, 0xdf, 0x34, 0x6d, 0xff, 0xa6, 0x4d, 0x14, 0x8d, 0xd4, 0x3f, 0x66, 0xf4, 0xad, 0x46, 0x1f, 0x7f, 0x26, 0xcd, 0x94, 0x6, 0x5f, 0xb4, 0xed, 0x58, 0x1, 0xea, 0xb3, 0x21, 0x78, 0x93, 0xca, 0xaa, 0xf3, 0x18, 0x41, 0xd3, 0x8a, 0x61, 0x38, 0xa1, 0xf8, 0x13, 0x4a, 0xd8, 0x81, 0x6a, 0x33, 0x53, 0xa, 0xe1, 0xb8, 0x2a, 0x73, 0x98, 0xc1, 0xb7, 0xee, 0x5, 0x5c, 0xce, 0x97, 0x7c, 0x25, 0x45, 0x1c, 0xf7, 0xae, 0x3c, 0x65, 0x8e, 0xd7, 0x4e, 0x17, 0xfc, 0xa5, 0x37, 0x6e, 0x85, 0xdc, 0xbc, 0xe5, 0xe, 0x57, 0xc5, 0x9c, 0x77, 0x2e},
+ {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71, 0xc9, 0x93, 0x7d, 0x27, 0xbc, 0xe6, 0x8, 0x52, 0x23, 0x79, 0x97, 0xcd, 0x56, 0xc, 0xe2, 0xb8, 0x8f, 0xd5, 0x3b, 0x61, 0xfa, 0xa0, 0x4e, 0x14, 0x65, 0x3f, 0xd1, 0x8b, 0x10, 0x4a, 0xa4, 0xfe, 0x46, 0x1c, 0xf2, 0xa8, 0x33, 0x69, 0x87, 0xdd, 0xac, 0xf6, 0x18, 0x42, 0xd9, 0x83, 0x6d, 0x37, 0x3, 0x59, 0xb7, 0xed, 0x76, 0x2c, 0xc2, 0x98, 0xe9, 0xb3, 0x5d, 0x7, 0x9c, 0xc6, 0x28, 0x72, 0xca, 0x90, 0x7e, 0x24, 0xbf, 0xe5, 0xb, 0x51, 0x20, 0x7a, 0x94, 0xce, 0x55, 0xf, 0xe1, 0xbb, 0x8c, 0xd6, 0x38, 0x62, 0xf9, 0xa3, 0x4d, 0x17, 0x66, 0x3c, 0xd2, 0x88, 0x13, 0x49, 0xa7, 0xfd, 0x45, 0x1f, 0xf1, 0xab, 0x30, 0x6a, 0x84, 0xde, 0xaf, 0xf5, 0x1b, 0x41, 0xda, 0x80, 0x6e, 0x34, 0x6, 0x5c, 0xb2, 0xe8, 0x73, 0x29, 0xc7, 0x9d, 0xec, 0xb6, 0x58, 0x2, 0x99, 0xc3, 0x2d, 0x77, 0xcf, 0x95, 0x7b, 0x21, 0xba, 0xe0, 0xe, 0x54, 0x25, 0x7f, 0x91, 0xcb, 0x50, 0xa, 0xe4, 0xbe, 0x89, 0xd3, 0x3d, 0x67, 0xfc, 0xa6, 0x48, 0x12, 0x63, 0x39, 0xd7, 0x8d, 0x16, 0x4c, 0xa2, 0xf8, 0x40, 0x1a, 0xf4, 0xae, 0x35, 0x6f, 0x81, 0xdb, 0xaa, 0xf0, 0x1e, 0x44, 0xdf, 0x85, 0x6b, 0x31, 0x5, 0x5f, 0xb1, 0xeb, 0x70, 0x2a, 0xc4, 0x9e, 0xef, 0xb5, 0x5b, 0x1, 0x9a, 0xc0, 0x2e, 0x74, 0xcc, 0x96, 0x78, 0x22, 0xb9, 0xe3, 0xd, 0x57, 0x26, 0x7c, 0x92, 0xc8, 0x53, 0x9, 0xe7, 0xbd, 0x8a, 0xd0, 0x3e, 0x64, 0xff, 0xa5, 0x4b, 0x11, 0x60, 0x3a, 0xd4, 0x8e, 0x15, 0x4f, 0xa1, 0xfb, 0x43, 0x19, 0xf7, 0xad, 0x36, 0x6c, 0x82, 0xd8, 0xa9, 0xf3, 0x1d, 0x47, 0xdc, 0x86, 0x68, 0x32},
+ {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e, 0xd9, 0x82, 0x6f, 0x34, 0xa8, 0xf3, 0x1e, 0x45, 0x3b, 0x60, 0x8d, 0xd6, 0x4a, 0x11, 0xfc, 0xa7, 0xaf, 0xf4, 0x19, 0x42, 0xde, 0x85, 0x68, 0x33, 0x4d, 0x16, 0xfb, 0xa0, 0x3c, 0x67, 0x8a, 0xd1, 0x76, 0x2d, 0xc0, 0x9b, 0x7, 0x5c, 0xb1, 0xea, 0x94, 0xcf, 0x22, 0x79, 0xe5, 0xbe, 0x53, 0x8, 0x43, 0x18, 0xf5, 0xae, 0x32, 0x69, 0x84, 0xdf, 0xa1, 0xfa, 0x17, 0x4c, 0xd0, 0x8b, 0x66, 0x3d, 0x9a, 0xc1, 0x2c, 0x77, 0xeb, 0xb0, 0x5d, 0x6, 0x78, 0x23, 0xce, 0x95, 0x9, 0x52, 0xbf, 0xe4, 0xec, 0xb7, 0x5a, 0x1, 0x9d, 0xc6, 0x2b, 0x70, 0xe, 0x55, 0xb8, 0xe3, 0x7f, 0x24, 0xc9, 0x92, 0x35, 0x6e, 0x83, 0xd8, 0x44, 0x1f, 0xf2, 0xa9, 0xd7, 0x8c, 0x61, 0x3a, 0xa6, 0xfd, 0x10, 0x4b, 0x86, 0xdd, 0x30, 0x6b, 0xf7, 0xac, 0x41, 0x1a, 0x64, 0x3f, 0xd2, 0x89, 0x15, 0x4e, 0xa3, 0xf8, 0x5f, 0x4, 0xe9, 0xb2, 0x2e, 0x75, 0x98, 0xc3, 0xbd, 0xe6, 0xb, 0x50, 0xcc, 0x97, 0x7a, 0x21, 0x29, 0x72, 0x9f, 0xc4, 0x58, 0x3, 0xee, 0xb5, 0xcb, 0x90, 0x7d, 0x26, 0xba, 0xe1, 0xc, 0x57, 0xf0, 0xab, 0x46, 0x1d, 0x81, 0xda, 0x37, 0x6c, 0x12, 0x49, 0xa4, 0xff, 0x63, 0x38, 0xd5, 0x8e, 0xc5, 0x9e, 0x73, 0x28, 0xb4, 0xef, 0x2, 0x59, 0x27, 0x7c, 0x91, 0xca, 0x56, 0xd, 0xe0, 0xbb, 0x1c, 0x47, 0xaa, 0xf1, 0x6d, 0x36, 0xdb, 0x80, 0xfe, 0xa5, 0x48, 0x13, 0x8f, 0xd4, 0x39, 0x62, 0x6a, 0x31, 0xdc, 0x87, 0x1b, 0x40, 0xad, 0xf6, 0x88, 0xd3, 0x3e, 0x65, 0xf9, 0xa2, 0x4f, 0x14, 0xb3, 0xe8, 0x5, 0x5e, 0xc2, 0x99, 0x74, 0x2f, 0x51, 0xa, 0xe7, 0xbc, 0x20, 0x7b, 0x96, 0xcd},
+ {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53, 0xa9, 0xf5, 0x11, 0x4d, 0xc4, 0x98, 0x7c, 0x20, 0x73, 0x2f, 0xcb, 0x97, 0x1e, 0x42, 0xa6, 0xfa, 0x4f, 0x13, 0xf7, 0xab, 0x22, 0x7e, 0x9a, 0xc6, 0x95, 0xc9, 0x2d, 0x71, 0xf8, 0xa4, 0x40, 0x1c, 0xe6, 0xba, 0x5e, 0x2, 0x8b, 0xd7, 0x33, 0x6f, 0x3c, 0x60, 0x84, 0xd8, 0x51, 0xd, 0xe9, 0xb5, 0x9e, 0xc2, 0x26, 0x7a, 0xf3, 0xaf, 0x4b, 0x17, 0x44, 0x18, 0xfc, 0xa0, 0x29, 0x75, 0x91, 0xcd, 0x37, 0x6b, 0x8f, 0xd3, 0x5a, 0x6, 0xe2, 0xbe, 0xed, 0xb1, 0x55, 0x9, 0x80, 0xdc, 0x38, 0x64, 0xd1, 0x8d, 0x69, 0x35, 0xbc, 0xe0, 0x4, 0x58, 0xb, 0x57, 0xb3, 0xef, 0x66, 0x3a, 0xde, 0x82, 0x78, 0x24, 0xc0, 0x9c, 0x15, 0x49, 0xad, 0xf1, 0xa2, 0xfe, 0x1a, 0x46, 0xcf, 0x93, 0x77, 0x2b, 0x21, 0x7d, 0x99, 0xc5, 0x4c, 0x10, 0xf4, 0xa8, 0xfb, 0xa7, 0x43, 0x1f, 0x96, 0xca, 0x2e, 0x72, 0x88, 0xd4, 0x30, 0x6c, 0xe5, 0xb9, 0x5d, 0x1, 0x52, 0xe, 0xea, 0xb6, 0x3f, 0x63, 0x87, 0xdb, 0x6e, 0x32, 0xd6, 0x8a, 0x3, 0x5f, 0xbb, 0xe7, 0xb4, 0xe8, 0xc, 0x50, 0xd9, 0x85, 0x61, 0x3d, 0xc7, 0x9b, 0x7f, 0x23, 0xaa, 0xf6, 0x12, 0x4e, 0x1d, 0x41, 0xa5, 0xf9, 0x70, 0x2c, 0xc8, 0x94, 0xbf, 0xe3, 0x7, 0x5b, 0xd2, 0x8e, 0x6a, 0x36, 0x65, 0x39, 0xdd, 0x81, 0x8, 0x54, 0xb0, 0xec, 0x16, 0x4a, 0xae, 0xf2, 0x7b, 0x27, 0xc3, 0x9f, 0xcc, 0x90, 0x74, 0x28, 0xa1, 0xfd, 0x19, 0x45, 0xf0, 0xac, 0x48, 0x14, 0x9d, 0xc1, 0x25, 0x79, 0x2a, 0x76, 0x92, 0xce, 0x47, 0x1b, 0xff, 0xa3, 0x59, 0x5, 0xe1, 0xbd, 0x34, 0x68, 0x8c, 0xd0, 0x83, 0xdf, 0x3b, 0x67, 0xee, 0xb2, 0x56, 0xa},
+ {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c, 0xb9, 0xe4, 0x3, 0x5e, 0xd0, 0x8d, 0x6a, 0x37, 0x6b, 0x36, 0xd1, 0x8c, 0x2, 0x5f, 0xb8, 0xe5, 0x6f, 0x32, 0xd5, 0x88, 0x6, 0x5b, 0xbc, 0xe1, 0xbd, 0xe0, 0x7, 0x5a, 0xd4, 0x89, 0x6e, 0x33, 0xd6, 0x8b, 0x6c, 0x31, 0xbf, 0xe2, 0x5, 0x58, 0x4, 0x59, 0xbe, 0xe3, 0x6d, 0x30, 0xd7, 0x8a, 0xde, 0x83, 0x64, 0x39, 0xb7, 0xea, 0xd, 0x50, 0xc, 0x51, 0xb6, 0xeb, 0x65, 0x38, 0xdf, 0x82, 0x67, 0x3a, 0xdd, 0x80, 0xe, 0x53, 0xb4, 0xe9, 0xb5, 0xe8, 0xf, 0x52, 0xdc, 0x81, 0x66, 0x3b, 0xb1, 0xec, 0xb, 0x56, 0xd8, 0x85, 0x62, 0x3f, 0x63, 0x3e, 0xd9, 0x84, 0xa, 0x57, 0xb0, 0xed, 0x8, 0x55, 0xb2, 0xef, 0x61, 0x3c, 0xdb, 0x86, 0xda, 0x87, 0x60, 0x3d, 0xb3, 0xee, 0x9, 0x54, 0xa1, 0xfc, 0x1b, 0x46, 0xc8, 0x95, 0x72, 0x2f, 0x73, 0x2e, 0xc9, 0x94, 0x1a, 0x47, 0xa0, 0xfd, 0x18, 0x45, 0xa2, 0xff, 0x71, 0x2c, 0xcb, 0x96, 0xca, 0x97, 0x70, 0x2d, 0xa3, 0xfe, 0x19, 0x44, 0xce, 0x93, 0x74, 0x29, 0xa7, 0xfa, 0x1d, 0x40, 0x1c, 0x41, 0xa6, 0xfb, 0x75, 0x28, 0xcf, 0x92, 0x77, 0x2a, 0xcd, 0x90, 0x1e, 0x43, 0xa4, 0xf9, 0xa5, 0xf8, 0x1f, 0x42, 0xcc, 0x91, 0x76, 0x2b, 0x7f, 0x22, 0xc5, 0x98, 0x16, 0x4b, 0xac, 0xf1, 0xad, 0xf0, 0x17, 0x4a, 0xc4, 0x99, 0x7e, 0x23, 0xc6, 0x9b, 0x7c, 0x21, 0xaf, 0xf2, 0x15, 0x48, 0x14, 0x49, 0xae, 0xf3, 0x7d, 0x20, 0xc7, 0x9a, 0x10, 0x4d, 0xaa, 0xf7, 0x79, 0x24, 0xc3, 0x9e, 0xc2, 0x9f, 0x78, 0x25, 0xab, 0xf6, 0x11, 0x4c, 0xa9, 0xf4, 0x13, 0x4e, 0xc0, 0x9d, 0x7a, 0x27, 0x7b, 0x26, 0xc1, 0x9c, 0x12, 0x4f, 0xa8, 0xf5},
+ {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d, 0x89, 0xd7, 0x35, 0x6b, 0xec, 0xb2, 0x50, 0xe, 0x43, 0x1d, 0xff, 0xa1, 0x26, 0x78, 0x9a, 0xc4, 0xf, 0x51, 0xb3, 0xed, 0x6a, 0x34, 0xd6, 0x88, 0xc5, 0x9b, 0x79, 0x27, 0xa0, 0xfe, 0x1c, 0x42, 0x86, 0xd8, 0x3a, 0x64, 0xe3, 0xbd, 0x5f, 0x1, 0x4c, 0x12, 0xf0, 0xae, 0x29, 0x77, 0x95, 0xcb, 0x1e, 0x40, 0xa2, 0xfc, 0x7b, 0x25, 0xc7, 0x99, 0xd4, 0x8a, 0x68, 0x36, 0xb1, 0xef, 0xd, 0x53, 0x97, 0xc9, 0x2b, 0x75, 0xf2, 0xac, 0x4e, 0x10, 0x5d, 0x3, 0xe1, 0xbf, 0x38, 0x66, 0x84, 0xda, 0x11, 0x4f, 0xad, 0xf3, 0x74, 0x2a, 0xc8, 0x96, 0xdb, 0x85, 0x67, 0x39, 0xbe, 0xe0, 0x2, 0x5c, 0x98, 0xc6, 0x24, 0x7a, 0xfd, 0xa3, 0x41, 0x1f, 0x52, 0xc, 0xee, 0xb0, 0x37, 0x69, 0x8b, 0xd5, 0x3c, 0x62, 0x80, 0xde, 0x59, 0x7, 0xe5, 0xbb, 0xf6, 0xa8, 0x4a, 0x14, 0x93, 0xcd, 0x2f, 0x71, 0xb5, 0xeb, 0x9, 0x57, 0xd0, 0x8e, 0x6c, 0x32, 0x7f, 0x21, 0xc3, 0x9d, 0x1a, 0x44, 0xa6, 0xf8, 0x33, 0x6d, 0x8f, 0xd1, 0x56, 0x8, 0xea, 0xb4, 0xf9, 0xa7, 0x45, 0x1b, 0x9c, 0xc2, 0x20, 0x7e, 0xba, 0xe4, 0x6, 0x58, 0xdf, 0x81, 0x63, 0x3d, 0x70, 0x2e, 0xcc, 0x92, 0x15, 0x4b, 0xa9, 0xf7, 0x22, 0x7c, 0x9e, 0xc0, 0x47, 0x19, 0xfb, 0xa5, 0xe8, 0xb6, 0x54, 0xa, 0x8d, 0xd3, 0x31, 0x6f, 0xab, 0xf5, 0x17, 0x49, 0xce, 0x90, 0x72, 0x2c, 0x61, 0x3f, 0xdd, 0x83, 0x4, 0x5a, 0xb8, 0xe6, 0x2d, 0x73, 0x91, 0xcf, 0x48, 0x16, 0xf4, 0xaa, 0xe7, 0xb9, 0x5b, 0x5, 0x82, 0xdc, 0x3e, 0x60, 0xa4, 0xfa, 0x18, 0x46, 0xc1, 0x9f, 0x7d, 0x23, 0x6e, 0x30, 0xd2, 0x8c, 0xb, 0x55, 0xb7, 0xe9},
+ {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42, 0x99, 0xc6, 0x27, 0x78, 0xf8, 0xa7, 0x46, 0x19, 0x5b, 0x4, 0xe5, 0xba, 0x3a, 0x65, 0x84, 0xdb, 0x2f, 0x70, 0x91, 0xce, 0x4e, 0x11, 0xf0, 0xaf, 0xed, 0xb2, 0x53, 0xc, 0x8c, 0xd3, 0x32, 0x6d, 0xb6, 0xe9, 0x8, 0x57, 0xd7, 0x88, 0x69, 0x36, 0x74, 0x2b, 0xca, 0x95, 0x15, 0x4a, 0xab, 0xf4, 0x5e, 0x1, 0xe0, 0xbf, 0x3f, 0x60, 0x81, 0xde, 0x9c, 0xc3, 0x22, 0x7d, 0xfd, 0xa2, 0x43, 0x1c, 0xc7, 0x98, 0x79, 0x26, 0xa6, 0xf9, 0x18, 0x47, 0x5, 0x5a, 0xbb, 0xe4, 0x64, 0x3b, 0xda, 0x85, 0x71, 0x2e, 0xcf, 0x90, 0x10, 0x4f, 0xae, 0xf1, 0xb3, 0xec, 0xd, 0x52, 0xd2, 0x8d, 0x6c, 0x33, 0xe8, 0xb7, 0x56, 0x9, 0x89, 0xd6, 0x37, 0x68, 0x2a, 0x75, 0x94, 0xcb, 0x4b, 0x14, 0xf5, 0xaa, 0xbc, 0xe3, 0x2, 0x5d, 0xdd, 0x82, 0x63, 0x3c, 0x7e, 0x21, 0xc0, 0x9f, 0x1f, 0x40, 0xa1, 0xfe, 0x25, 0x7a, 0x9b, 0xc4, 0x44, 0x1b, 0xfa, 0xa5, 0xe7, 0xb8, 0x59, 0x6, 0x86, 0xd9, 0x38, 0x67, 0x93, 0xcc, 0x2d, 0x72, 0xf2, 0xad, 0x4c, 0x13, 0x51, 0xe, 0xef, 0xb0, 0x30, 0x6f, 0x8e, 0xd1, 0xa, 0x55, 0xb4, 0xeb, 0x6b, 0x34, 0xd5, 0x8a, 0xc8, 0x97, 0x76, 0x29, 0xa9, 0xf6, 0x17, 0x48, 0xe2, 0xbd, 0x5c, 0x3, 0x83, 0xdc, 0x3d, 0x62, 0x20, 0x7f, 0x9e, 0xc1, 0x41, 0x1e, 0xff, 0xa0, 0x7b, 0x24, 0xc5, 0x9a, 0x1a, 0x45, 0xa4, 0xfb, 0xb9, 0xe6, 0x7, 0x58, 0xd8, 0x87, 0x66, 0x39, 0xcd, 0x92, 0x73, 0x2c, 0xac, 0xf3, 0x12, 0x4d, 0xf, 0x50, 0xb1, 0xee, 0x6e, 0x31, 0xd0, 0x8f, 0x54, 0xb, 0xea, 0xb5, 0x35, 0x6a, 0x8b, 0xd4, 0x96, 0xc9, 0x28, 0x77, 0xf7, 0xa8, 0x49, 0x16},
+ {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a, 0x4e, 0x2e, 0x8e, 0xee, 0xd3, 0xb3, 0x13, 0x73, 0x69, 0x9, 0xa9, 0xc9, 0xf4, 0x94, 0x34, 0x54, 0x9c, 0xfc, 0x5c, 0x3c, 0x1, 0x61, 0xc1, 0xa1, 0xbb, 0xdb, 0x7b, 0x1b, 0x26, 0x46, 0xe6, 0x86, 0xd2, 0xb2, 0x12, 0x72, 0x4f, 0x2f, 0x8f, 0xef, 0xf5, 0x95, 0x35, 0x55, 0x68, 0x8, 0xa8, 0xc8, 0x25, 0x45, 0xe5, 0x85, 0xb8, 0xd8, 0x78, 0x18, 0x2, 0x62, 0xc2, 0xa2, 0x9f, 0xff, 0x5f, 0x3f, 0x6b, 0xb, 0xab, 0xcb, 0xf6, 0x96, 0x36, 0x56, 0x4c, 0x2c, 0x8c, 0xec, 0xd1, 0xb1, 0x11, 0x71, 0xb9, 0xd9, 0x79, 0x19, 0x24, 0x44, 0xe4, 0x84, 0x9e, 0xfe, 0x5e, 0x3e, 0x3, 0x63, 0xc3, 0xa3, 0xf7, 0x97, 0x37, 0x57, 0x6a, 0xa, 0xaa, 0xca, 0xd0, 0xb0, 0x10, 0x70, 0x4d, 0x2d, 0x8d, 0xed, 0x4a, 0x2a, 0x8a, 0xea, 0xd7, 0xb7, 0x17, 0x77, 0x6d, 0xd, 0xad, 0xcd, 0xf0, 0x90, 0x30, 0x50, 0x4, 0x64, 0xc4, 0xa4, 0x99, 0xf9, 0x59, 0x39, 0x23, 0x43, 0xe3, 0x83, 0xbe, 0xde, 0x7e, 0x1e, 0xd6, 0xb6, 0x16, 0x76, 0x4b, 0x2b, 0x8b, 0xeb, 0xf1, 0x91, 0x31, 0x51, 0x6c, 0xc, 0xac, 0xcc, 0x98, 0xf8, 0x58, 0x38, 0x5, 0x65, 0xc5, 0xa5, 0xbf, 0xdf, 0x7f, 0x1f, 0x22, 0x42, 0xe2, 0x82, 0x6f, 0xf, 0xaf, 0xcf, 0xf2, 0x92, 0x32, 0x52, 0x48, 0x28, 0x88, 0xe8, 0xd5, 0xb5, 0x15, 0x75, 0x21, 0x41, 0xe1, 0x81, 0xbc, 0xdc, 0x7c, 0x1c, 0x6, 0x66, 0xc6, 0xa6, 0x9b, 0xfb, 0x5b, 0x3b, 0xf3, 0x93, 0x33, 0x53, 0x6e, 0xe, 0xae, 0xce, 0xd4, 0xb4, 0x14, 0x74, 0x49, 0x29, 0x89, 0xe9, 0xbd, 0xdd, 0x7d, 0x1d, 0x20, 0x40, 0xe0, 0x80, 0x9a, 0xfa, 0x5a, 0x3a, 0x7, 0x67, 0xc7, 0xa7},
+ {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15, 0x5e, 0x3f, 0x9c, 0xfd, 0xc7, 0xa6, 0x5, 0x64, 0x71, 0x10, 0xb3, 0xd2, 0xe8, 0x89, 0x2a, 0x4b, 0xbc, 0xdd, 0x7e, 0x1f, 0x25, 0x44, 0xe7, 0x86, 0x93, 0xf2, 0x51, 0x30, 0xa, 0x6b, 0xc8, 0xa9, 0xe2, 0x83, 0x20, 0x41, 0x7b, 0x1a, 0xb9, 0xd8, 0xcd, 0xac, 0xf, 0x6e, 0x54, 0x35, 0x96, 0xf7, 0x65, 0x4, 0xa7, 0xc6, 0xfc, 0x9d, 0x3e, 0x5f, 0x4a, 0x2b, 0x88, 0xe9, 0xd3, 0xb2, 0x11, 0x70, 0x3b, 0x5a, 0xf9, 0x98, 0xa2, 0xc3, 0x60, 0x1, 0x14, 0x75, 0xd6, 0xb7, 0x8d, 0xec, 0x4f, 0x2e, 0xd9, 0xb8, 0x1b, 0x7a, 0x40, 0x21, 0x82, 0xe3, 0xf6, 0x97, 0x34, 0x55, 0x6f, 0xe, 0xad, 0xcc, 0x87, 0xe6, 0x45, 0x24, 0x1e, 0x7f, 0xdc, 0xbd, 0xa8, 0xc9, 0x6a, 0xb, 0x31, 0x50, 0xf3, 0x92, 0xca, 0xab, 0x8, 0x69, 0x53, 0x32, 0x91, 0xf0, 0xe5, 0x84, 0x27, 0x46, 0x7c, 0x1d, 0xbe, 0xdf, 0x94, 0xf5, 0x56, 0x37, 0xd, 0x6c, 0xcf, 0xae, 0xbb, 0xda, 0x79, 0x18, 0x22, 0x43, 0xe0, 0x81, 0x76, 0x17, 0xb4, 0xd5, 0xef, 0x8e, 0x2d, 0x4c, 0x59, 0x38, 0x9b, 0xfa, 0xc0, 0xa1, 0x2, 0x63, 0x28, 0x49, 0xea, 0x8b, 0xb1, 0xd0, 0x73, 0x12, 0x7, 0x66, 0xc5, 0xa4, 0x9e, 0xff, 0x5c, 0x3d, 0xaf, 0xce, 0x6d, 0xc, 0x36, 0x57, 0xf4, 0x95, 0x80, 0xe1, 0x42, 0x23, 0x19, 0x78, 0xdb, 0xba, 0xf1, 0x90, 0x33, 0x52, 0x68, 0x9, 0xaa, 0xcb, 0xde, 0xbf, 0x1c, 0x7d, 0x47, 0x26, 0x85, 0xe4, 0x13, 0x72, 0xd1, 0xb0, 0x8a, 0xeb, 0x48, 0x29, 0x3c, 0x5d, 0xfe, 0x9f, 0xa5, 0xc4, 0x67, 0x6, 0x4d, 0x2c, 0x8f, 0xee, 0xd4, 0xb5, 0x16, 0x77, 0x62, 0x3, 0xa0, 0xc1, 0xfb, 0x9a, 0x39, 0x58},
+ {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4, 0x6e, 0xc, 0xaa, 0xc8, 0xfb, 0x99, 0x3f, 0x5d, 0x59, 0x3b, 0x9d, 0xff, 0xcc, 0xae, 0x8, 0x6a, 0xdc, 0xbe, 0x18, 0x7a, 0x49, 0x2b, 0x8d, 0xef, 0xeb, 0x89, 0x2f, 0x4d, 0x7e, 0x1c, 0xba, 0xd8, 0xb2, 0xd0, 0x76, 0x14, 0x27, 0x45, 0xe3, 0x81, 0x85, 0xe7, 0x41, 0x23, 0x10, 0x72, 0xd4, 0xb6, 0xa5, 0xc7, 0x61, 0x3, 0x30, 0x52, 0xf4, 0x96, 0x92, 0xf0, 0x56, 0x34, 0x7, 0x65, 0xc3, 0xa1, 0xcb, 0xa9, 0xf, 0x6d, 0x5e, 0x3c, 0x9a, 0xf8, 0xfc, 0x9e, 0x38, 0x5a, 0x69, 0xb, 0xad, 0xcf, 0x79, 0x1b, 0xbd, 0xdf, 0xec, 0x8e, 0x28, 0x4a, 0x4e, 0x2c, 0x8a, 0xe8, 0xdb, 0xb9, 0x1f, 0x7d, 0x17, 0x75, 0xd3, 0xb1, 0x82, 0xe0, 0x46, 0x24, 0x20, 0x42, 0xe4, 0x86, 0xb5, 0xd7, 0x71, 0x13, 0x57, 0x35, 0x93, 0xf1, 0xc2, 0xa0, 0x6, 0x64, 0x60, 0x2, 0xa4, 0xc6, 0xf5, 0x97, 0x31, 0x53, 0x39, 0x5b, 0xfd, 0x9f, 0xac, 0xce, 0x68, 0xa, 0xe, 0x6c, 0xca, 0xa8, 0x9b, 0xf9, 0x5f, 0x3d, 0x8b, 0xe9, 0x4f, 0x2d, 0x1e, 0x7c, 0xda, 0xb8, 0xbc, 0xde, 0x78, 0x1a, 0x29, 0x4b, 0xed, 0x8f, 0xe5, 0x87, 0x21, 0x43, 0x70, 0x12, 0xb4, 0xd6, 0xd2, 0xb0, 0x16, 0x74, 0x47, 0x25, 0x83, 0xe1, 0xf2, 0x90, 0x36, 0x54, 0x67, 0x5, 0xa3, 0xc1, 0xc5, 0xa7, 0x1, 0x63, 0x50, 0x32, 0x94, 0xf6, 0x9c, 0xfe, 0x58, 0x3a, 0x9, 0x6b, 0xcd, 0xaf, 0xab, 0xc9, 0x6f, 0xd, 0x3e, 0x5c, 0xfa, 0x98, 0x2e, 0x4c, 0xea, 0x88, 0xbb, 0xd9, 0x7f, 0x1d, 0x19, 0x7b, 0xdd, 0xbf, 0x8c, 0xee, 0x48, 0x2a, 0x40, 0x22, 0x84, 0xe6, 0xd5, 0xb7, 0x11, 0x73, 0x77, 0x15, 0xb3, 0xd1, 0xe2, 0x80, 0x26, 0x44},
+ {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb, 0x7e, 0x1d, 0xb8, 0xdb, 0xef, 0x8c, 0x29, 0x4a, 0x41, 0x22, 0x87, 0xe4, 0xd0, 0xb3, 0x16, 0x75, 0xfc, 0x9f, 0x3a, 0x59, 0x6d, 0xe, 0xab, 0xc8, 0xc3, 0xa0, 0x5, 0x66, 0x52, 0x31, 0x94, 0xf7, 0x82, 0xe1, 0x44, 0x27, 0x13, 0x70, 0xd5, 0xb6, 0xbd, 0xde, 0x7b, 0x18, 0x2c, 0x4f, 0xea, 0x89, 0xe5, 0x86, 0x23, 0x40, 0x74, 0x17, 0xb2, 0xd1, 0xda, 0xb9, 0x1c, 0x7f, 0x4b, 0x28, 0x8d, 0xee, 0x9b, 0xf8, 0x5d, 0x3e, 0xa, 0x69, 0xcc, 0xaf, 0xa4, 0xc7, 0x62, 0x1, 0x35, 0x56, 0xf3, 0x90, 0x19, 0x7a, 0xdf, 0xbc, 0x88, 0xeb, 0x4e, 0x2d, 0x26, 0x45, 0xe0, 0x83, 0xb7, 0xd4, 0x71, 0x12, 0x67, 0x4, 0xa1, 0xc2, 0xf6, 0x95, 0x30, 0x53, 0x58, 0x3b, 0x9e, 0xfd, 0xc9, 0xaa, 0xf, 0x6c, 0xd7, 0xb4, 0x11, 0x72, 0x46, 0x25, 0x80, 0xe3, 0xe8, 0x8b, 0x2e, 0x4d, 0x79, 0x1a, 0xbf, 0xdc, 0xa9, 0xca, 0x6f, 0xc, 0x38, 0x5b, 0xfe, 0x9d, 0x96, 0xf5, 0x50, 0x33, 0x7, 0x64, 0xc1, 0xa2, 0x2b, 0x48, 0xed, 0x8e, 0xba, 0xd9, 0x7c, 0x1f, 0x14, 0x77, 0xd2, 0xb1, 0x85, 0xe6, 0x43, 0x20, 0x55, 0x36, 0x93, 0xf0, 0xc4, 0xa7, 0x2, 0x61, 0x6a, 0x9, 0xac, 0xcf, 0xfb, 0x98, 0x3d, 0x5e, 0x32, 0x51, 0xf4, 0x97, 0xa3, 0xc0, 0x65, 0x6, 0xd, 0x6e, 0xcb, 0xa8, 0x9c, 0xff, 0x5a, 0x39, 0x4c, 0x2f, 0x8a, 0xe9, 0xdd, 0xbe, 0x1b, 0x78, 0x73, 0x10, 0xb5, 0xd6, 0xe2, 0x81, 0x24, 0x47, 0xce, 0xad, 0x8, 0x6b, 0x5f, 0x3c, 0x99, 0xfa, 0xf1, 0x92, 0x37, 0x54, 0x60, 0x3, 0xa6, 0xc5, 0xb0, 0xd3, 0x76, 0x15, 0x21, 0x42, 0xe7, 0x84, 0x8f, 0xec, 0x49, 0x2a, 0x1e, 0x7d, 0xd8, 0xbb},
+ {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26, 0xe, 0x6a, 0xc6, 0xa2, 0x83, 0xe7, 0x4b, 0x2f, 0x9, 0x6d, 0xc1, 0xa5, 0x84, 0xe0, 0x4c, 0x28, 0x1c, 0x78, 0xd4, 0xb0, 0x91, 0xf5, 0x59, 0x3d, 0x1b, 0x7f, 0xd3, 0xb7, 0x96, 0xf2, 0x5e, 0x3a, 0x12, 0x76, 0xda, 0xbe, 0x9f, 0xfb, 0x57, 0x33, 0x15, 0x71, 0xdd, 0xb9, 0x98, 0xfc, 0x50, 0x34, 0x38, 0x5c, 0xf0, 0x94, 0xb5, 0xd1, 0x7d, 0x19, 0x3f, 0x5b, 0xf7, 0x93, 0xb2, 0xd6, 0x7a, 0x1e, 0x36, 0x52, 0xfe, 0x9a, 0xbb, 0xdf, 0x73, 0x17, 0x31, 0x55, 0xf9, 0x9d, 0xbc, 0xd8, 0x74, 0x10, 0x24, 0x40, 0xec, 0x88, 0xa9, 0xcd, 0x61, 0x5, 0x23, 0x47, 0xeb, 0x8f, 0xae, 0xca, 0x66, 0x2, 0x2a, 0x4e, 0xe2, 0x86, 0xa7, 0xc3, 0x6f, 0xb, 0x2d, 0x49, 0xe5, 0x81, 0xa0, 0xc4, 0x68, 0xc, 0x70, 0x14, 0xb8, 0xdc, 0xfd, 0x99, 0x35, 0x51, 0x77, 0x13, 0xbf, 0xdb, 0xfa, 0x9e, 0x32, 0x56, 0x7e, 0x1a, 0xb6, 0xd2, 0xf3, 0x97, 0x3b, 0x5f, 0x79, 0x1d, 0xb1, 0xd5, 0xf4, 0x90, 0x3c, 0x58, 0x6c, 0x8, 0xa4, 0xc0, 0xe1, 0x85, 0x29, 0x4d, 0x6b, 0xf, 0xa3, 0xc7, 0xe6, 0x82, 0x2e, 0x4a, 0x62, 0x6, 0xaa, 0xce, 0xef, 0x8b, 0x27, 0x43, 0x65, 0x1, 0xad, 0xc9, 0xe8, 0x8c, 0x20, 0x44, 0x48, 0x2c, 0x80, 0xe4, 0xc5, 0xa1, 0xd, 0x69, 0x4f, 0x2b, 0x87, 0xe3, 0xc2, 0xa6, 0xa, 0x6e, 0x46, 0x22, 0x8e, 0xea, 0xcb, 0xaf, 0x3, 0x67, 0x41, 0x25, 0x89, 0xed, 0xcc, 0xa8, 0x4, 0x60, 0x54, 0x30, 0x9c, 0xf8, 0xd9, 0xbd, 0x11, 0x75, 0x53, 0x37, 0x9b, 0xff, 0xde, 0xba, 0x16, 0x72, 0x5a, 0x3e, 0x92, 0xf6, 0xd7, 0xb3, 0x1f, 0x7b, 0x5d, 0x39, 0x95, 0xf1, 0xd0, 0xb4, 0x18, 0x7c},
+ {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29, 0x1e, 0x7b, 0xd4, 0xb1, 0x97, 0xf2, 0x5d, 0x38, 0x11, 0x74, 0xdb, 0xbe, 0x98, 0xfd, 0x52, 0x37, 0x3c, 0x59, 0xf6, 0x93, 0xb5, 0xd0, 0x7f, 0x1a, 0x33, 0x56, 0xf9, 0x9c, 0xba, 0xdf, 0x70, 0x15, 0x22, 0x47, 0xe8, 0x8d, 0xab, 0xce, 0x61, 0x4, 0x2d, 0x48, 0xe7, 0x82, 0xa4, 0xc1, 0x6e, 0xb, 0x78, 0x1d, 0xb2, 0xd7, 0xf1, 0x94, 0x3b, 0x5e, 0x77, 0x12, 0xbd, 0xd8, 0xfe, 0x9b, 0x34, 0x51, 0x66, 0x3, 0xac, 0xc9, 0xef, 0x8a, 0x25, 0x40, 0x69, 0xc, 0xa3, 0xc6, 0xe0, 0x85, 0x2a, 0x4f, 0x44, 0x21, 0x8e, 0xeb, 0xcd, 0xa8, 0x7, 0x62, 0x4b, 0x2e, 0x81, 0xe4, 0xc2, 0xa7, 0x8, 0x6d, 0x5a, 0x3f, 0x90, 0xf5, 0xd3, 0xb6, 0x19, 0x7c, 0x55, 0x30, 0x9f, 0xfa, 0xdc, 0xb9, 0x16, 0x73, 0xf0, 0x95, 0x3a, 0x5f, 0x79, 0x1c, 0xb3, 0xd6, 0xff, 0x9a, 0x35, 0x50, 0x76, 0x13, 0xbc, 0xd9, 0xee, 0x8b, 0x24, 0x41, 0x67, 0x2, 0xad, 0xc8, 0xe1, 0x84, 0x2b, 0x4e, 0x68, 0xd, 0xa2, 0xc7, 0xcc, 0xa9, 0x6, 0x63, 0x45, 0x20, 0x8f, 0xea, 0xc3, 0xa6, 0x9, 0x6c, 0x4a, 0x2f, 0x80, 0xe5, 0xd2, 0xb7, 0x18, 0x7d, 0x5b, 0x3e, 0x91, 0xf4, 0xdd, 0xb8, 0x17, 0x72, 0x54, 0x31, 0x9e, 0xfb, 0x88, 0xed, 0x42, 0x27, 0x1, 0x64, 0xcb, 0xae, 0x87, 0xe2, 0x4d, 0x28, 0xe, 0x6b, 0xc4, 0xa1, 0x96, 0xf3, 0x5c, 0x39, 0x1f, 0x7a, 0xd5, 0xb0, 0x99, 0xfc, 0x53, 0x36, 0x10, 0x75, 0xda, 0xbf, 0xb4, 0xd1, 0x7e, 0x1b, 0x3d, 0x58, 0xf7, 0x92, 0xbb, 0xde, 0x71, 0x14, 0x32, 0x57, 0xf8, 0x9d, 0xaa, 0xcf, 0x60, 0x5, 0x23, 0x46, 0xe9, 0x8c, 0xa5, 0xc0, 0x6f, 0xa, 0x2c, 0x49, 0xe6, 0x83},
+ {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38, 0x2e, 0x48, 0xe2, 0x84, 0xab, 0xcd, 0x67, 0x1, 0x39, 0x5f, 0xf5, 0x93, 0xbc, 0xda, 0x70, 0x16, 0x5c, 0x3a, 0x90, 0xf6, 0xd9, 0xbf, 0x15, 0x73, 0x4b, 0x2d, 0x87, 0xe1, 0xce, 0xa8, 0x2, 0x64, 0x72, 0x14, 0xbe, 0xd8, 0xf7, 0x91, 0x3b, 0x5d, 0x65, 0x3, 0xa9, 0xcf, 0xe0, 0x86, 0x2c, 0x4a, 0xb8, 0xde, 0x74, 0x12, 0x3d, 0x5b, 0xf1, 0x97, 0xaf, 0xc9, 0x63, 0x5, 0x2a, 0x4c, 0xe6, 0x80, 0x96, 0xf0, 0x5a, 0x3c, 0x13, 0x75, 0xdf, 0xb9, 0x81, 0xe7, 0x4d, 0x2b, 0x4, 0x62, 0xc8, 0xae, 0xe4, 0x82, 0x28, 0x4e, 0x61, 0x7, 0xad, 0xcb, 0xf3, 0x95, 0x3f, 0x59, 0x76, 0x10, 0xba, 0xdc, 0xca, 0xac, 0x6, 0x60, 0x4f, 0x29, 0x83, 0xe5, 0xdd, 0xbb, 0x11, 0x77, 0x58, 0x3e, 0x94, 0xf2, 0x6d, 0xb, 0xa1, 0xc7, 0xe8, 0x8e, 0x24, 0x42, 0x7a, 0x1c, 0xb6, 0xd0, 0xff, 0x99, 0x33, 0x55, 0x43, 0x25, 0x8f, 0xe9, 0xc6, 0xa0, 0xa, 0x6c, 0x54, 0x32, 0x98, 0xfe, 0xd1, 0xb7, 0x1d, 0x7b, 0x31, 0x57, 0xfd, 0x9b, 0xb4, 0xd2, 0x78, 0x1e, 0x26, 0x40, 0xea, 0x8c, 0xa3, 0xc5, 0x6f, 0x9, 0x1f, 0x79, 0xd3, 0xb5, 0x9a, 0xfc, 0x56, 0x30, 0x8, 0x6e, 0xc4, 0xa2, 0x8d, 0xeb, 0x41, 0x27, 0xd5, 0xb3, 0x19, 0x7f, 0x50, 0x36, 0x9c, 0xfa, 0xc2, 0xa4, 0xe, 0x68, 0x47, 0x21, 0x8b, 0xed, 0xfb, 0x9d, 0x37, 0x51, 0x7e, 0x18, 0xb2, 0xd4, 0xec, 0x8a, 0x20, 0x46, 0x69, 0xf, 0xa5, 0xc3, 0x89, 0xef, 0x45, 0x23, 0xc, 0x6a, 0xc0, 0xa6, 0x9e, 0xf8, 0x52, 0x34, 0x1b, 0x7d, 0xd7, 0xb1, 0xa7, 0xc1, 0x6b, 0xd, 0x22, 0x44, 0xee, 0x88, 0xb0, 0xd6, 0x7c, 0x1a, 0x35, 0x53, 0xf9, 0x9f},
+ {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37, 0x3e, 0x59, 0xf0, 0x97, 0xbf, 0xd8, 0x71, 0x16, 0x21, 0x46, 0xef, 0x88, 0xa0, 0xc7, 0x6e, 0x9, 0x7c, 0x1b, 0xb2, 0xd5, 0xfd, 0x9a, 0x33, 0x54, 0x63, 0x4, 0xad, 0xca, 0xe2, 0x85, 0x2c, 0x4b, 0x42, 0x25, 0x8c, 0xeb, 0xc3, 0xa4, 0xd, 0x6a, 0x5d, 0x3a, 0x93, 0xf4, 0xdc, 0xbb, 0x12, 0x75, 0xf8, 0x9f, 0x36, 0x51, 0x79, 0x1e, 0xb7, 0xd0, 0xe7, 0x80, 0x29, 0x4e, 0x66, 0x1, 0xa8, 0xcf, 0xc6, 0xa1, 0x8, 0x6f, 0x47, 0x20, 0x89, 0xee, 0xd9, 0xbe, 0x17, 0x70, 0x58, 0x3f, 0x96, 0xf1, 0x84, 0xe3, 0x4a, 0x2d, 0x5, 0x62, 0xcb, 0xac, 0x9b, 0xfc, 0x55, 0x32, 0x1a, 0x7d, 0xd4, 0xb3, 0xba, 0xdd, 0x74, 0x13, 0x3b, 0x5c, 0xf5, 0x92, 0xa5, 0xc2, 0x6b, 0xc, 0x24, 0x43, 0xea, 0x8d, 0xed, 0x8a, 0x23, 0x44, 0x6c, 0xb, 0xa2, 0xc5, 0xf2, 0x95, 0x3c, 0x5b, 0x73, 0x14, 0xbd, 0xda, 0xd3, 0xb4, 0x1d, 0x7a, 0x52, 0x35, 0x9c, 0xfb, 0xcc, 0xab, 0x2, 0x65, 0x4d, 0x2a, 0x83, 0xe4, 0x91, 0xf6, 0x5f, 0x38, 0x10, 0x77, 0xde, 0xb9, 0x8e, 0xe9, 0x40, 0x27, 0xf, 0x68, 0xc1, 0xa6, 0xaf, 0xc8, 0x61, 0x6, 0x2e, 0x49, 0xe0, 0x87, 0xb0, 0xd7, 0x7e, 0x19, 0x31, 0x56, 0xff, 0x98, 0x15, 0x72, 0xdb, 0xbc, 0x94, 0xf3, 0x5a, 0x3d, 0xa, 0x6d, 0xc4, 0xa3, 0x8b, 0xec, 0x45, 0x22, 0x2b, 0x4c, 0xe5, 0x82, 0xaa, 0xcd, 0x64, 0x3, 0x34, 0x53, 0xfa, 0x9d, 0xb5, 0xd2, 0x7b, 0x1c, 0x69, 0xe, 0xa7, 0xc0, 0xe8, 0x8f, 0x26, 0x41, 0x76, 0x11, 0xb8, 0xdf, 0xf7, 0x90, 0x39, 0x5e, 0x57, 0x30, 0x99, 0xfe, 0xd6, 0xb1, 0x18, 0x7f, 0x48, 0x2f, 0x86, 0xe1, 0xc9, 0xae, 0x7, 0x60},
+ {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62, 0xce, 0xa6, 0x1e, 0x76, 0x73, 0x1b, 0xa3, 0xcb, 0xa9, 0xc1, 0x79, 0x11, 0x14, 0x7c, 0xc4, 0xac, 0x81, 0xe9, 0x51, 0x39, 0x3c, 0x54, 0xec, 0x84, 0xe6, 0x8e, 0x36, 0x5e, 0x5b, 0x33, 0x8b, 0xe3, 0x4f, 0x27, 0x9f, 0xf7, 0xf2, 0x9a, 0x22, 0x4a, 0x28, 0x40, 0xf8, 0x90, 0x95, 0xfd, 0x45, 0x2d, 0x1f, 0x77, 0xcf, 0xa7, 0xa2, 0xca, 0x72, 0x1a, 0x78, 0x10, 0xa8, 0xc0, 0xc5, 0xad, 0x15, 0x7d, 0xd1, 0xb9, 0x1, 0x69, 0x6c, 0x4, 0xbc, 0xd4, 0xb6, 0xde, 0x66, 0xe, 0xb, 0x63, 0xdb, 0xb3, 0x9e, 0xf6, 0x4e, 0x26, 0x23, 0x4b, 0xf3, 0x9b, 0xf9, 0x91, 0x29, 0x41, 0x44, 0x2c, 0x94, 0xfc, 0x50, 0x38, 0x80, 0xe8, 0xed, 0x85, 0x3d, 0x55, 0x37, 0x5f, 0xe7, 0x8f, 0x8a, 0xe2, 0x5a, 0x32, 0x3e, 0x56, 0xee, 0x86, 0x83, 0xeb, 0x53, 0x3b, 0x59, 0x31, 0x89, 0xe1, 0xe4, 0x8c, 0x34, 0x5c, 0xf0, 0x98, 0x20, 0x48, 0x4d, 0x25, 0x9d, 0xf5, 0x97, 0xff, 0x47, 0x2f, 0x2a, 0x42, 0xfa, 0x92, 0xbf, 0xd7, 0x6f, 0x7, 0x2, 0x6a, 0xd2, 0xba, 0xd8, 0xb0, 0x8, 0x60, 0x65, 0xd, 0xb5, 0xdd, 0x71, 0x19, 0xa1, 0xc9, 0xcc, 0xa4, 0x1c, 0x74, 0x16, 0x7e, 0xc6, 0xae, 0xab, 0xc3, 0x7b, 0x13, 0x21, 0x49, 0xf1, 0x99, 0x9c, 0xf4, 0x4c, 0x24, 0x46, 0x2e, 0x96, 0xfe, 0xfb, 0x93, 0x2b, 0x43, 0xef, 0x87, 0x3f, 0x57, 0x52, 0x3a, 0x82, 0xea, 0x88, 0xe0, 0x58, 0x30, 0x35, 0x5d, 0xe5, 0x8d, 0xa0, 0xc8, 0x70, 0x18, 0x1d, 0x75, 0xcd, 0xa5, 0xc7, 0xaf, 0x17, 0x7f, 0x7a, 0x12, 0xaa, 0xc2, 0x6e, 0x6, 0xbe, 0xd6, 0xd3, 0xbb, 0x3, 0x6b, 0x9, 0x61, 0xd9, 0xb1, 0xb4, 0xdc, 0x64, 0xc},
+ {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d, 0xde, 0xb7, 0xc, 0x65, 0x67, 0xe, 0xb5, 0xdc, 0xb1, 0xd8, 0x63, 0xa, 0x8, 0x61, 0xda, 0xb3, 0xa1, 0xc8, 0x73, 0x1a, 0x18, 0x71, 0xca, 0xa3, 0xce, 0xa7, 0x1c, 0x75, 0x77, 0x1e, 0xa5, 0xcc, 0x7f, 0x16, 0xad, 0xc4, 0xc6, 0xaf, 0x14, 0x7d, 0x10, 0x79, 0xc2, 0xab, 0xa9, 0xc0, 0x7b, 0x12, 0x5f, 0x36, 0x8d, 0xe4, 0xe6, 0x8f, 0x34, 0x5d, 0x30, 0x59, 0xe2, 0x8b, 0x89, 0xe0, 0x5b, 0x32, 0x81, 0xe8, 0x53, 0x3a, 0x38, 0x51, 0xea, 0x83, 0xee, 0x87, 0x3c, 0x55, 0x57, 0x3e, 0x85, 0xec, 0xfe, 0x97, 0x2c, 0x45, 0x47, 0x2e, 0x95, 0xfc, 0x91, 0xf8, 0x43, 0x2a, 0x28, 0x41, 0xfa, 0x93, 0x20, 0x49, 0xf2, 0x9b, 0x99, 0xf0, 0x4b, 0x22, 0x4f, 0x26, 0x9d, 0xf4, 0xf6, 0x9f, 0x24, 0x4d, 0xbe, 0xd7, 0x6c, 0x5, 0x7, 0x6e, 0xd5, 0xbc, 0xd1, 0xb8, 0x3, 0x6a, 0x68, 0x1, 0xba, 0xd3, 0x60, 0x9, 0xb2, 0xdb, 0xd9, 0xb0, 0xb, 0x62, 0xf, 0x66, 0xdd, 0xb4, 0xb6, 0xdf, 0x64, 0xd, 0x1f, 0x76, 0xcd, 0xa4, 0xa6, 0xcf, 0x74, 0x1d, 0x70, 0x19, 0xa2, 0xcb, 0xc9, 0xa0, 0x1b, 0x72, 0xc1, 0xa8, 0x13, 0x7a, 0x78, 0x11, 0xaa, 0xc3, 0xae, 0xc7, 0x7c, 0x15, 0x17, 0x7e, 0xc5, 0xac, 0xe1, 0x88, 0x33, 0x5a, 0x58, 0x31, 0x8a, 0xe3, 0x8e, 0xe7, 0x5c, 0x35, 0x37, 0x5e, 0xe5, 0x8c, 0x3f, 0x56, 0xed, 0x84, 0x86, 0xef, 0x54, 0x3d, 0x50, 0x39, 0x82, 0xeb, 0xe9, 0x80, 0x3b, 0x52, 0x40, 0x29, 0x92, 0xfb, 0xf9, 0x90, 0x2b, 0x42, 0x2f, 0x46, 0xfd, 0x94, 0x96, 0xff, 0x44, 0x2d, 0x9e, 0xf7, 0x4c, 0x25, 0x27, 0x4e, 0xf5, 0x9c, 0xf1, 0x98, 0x23, 0x4a, 0x48, 0x21, 0x9a, 0xf3},
+ {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c, 0xee, 0x84, 0x3a, 0x50, 0x5b, 0x31, 0x8f, 0xe5, 0x99, 0xf3, 0x4d, 0x27, 0x2c, 0x46, 0xf8, 0x92, 0xc1, 0xab, 0x15, 0x7f, 0x74, 0x1e, 0xa0, 0xca, 0xb6, 0xdc, 0x62, 0x8, 0x3, 0x69, 0xd7, 0xbd, 0x2f, 0x45, 0xfb, 0x91, 0x9a, 0xf0, 0x4e, 0x24, 0x58, 0x32, 0x8c, 0xe6, 0xed, 0x87, 0x39, 0x53, 0x9f, 0xf5, 0x4b, 0x21, 0x2a, 0x40, 0xfe, 0x94, 0xe8, 0x82, 0x3c, 0x56, 0x5d, 0x37, 0x89, 0xe3, 0x71, 0x1b, 0xa5, 0xcf, 0xc4, 0xae, 0x10, 0x7a, 0x6, 0x6c, 0xd2, 0xb8, 0xb3, 0xd9, 0x67, 0xd, 0x5e, 0x34, 0x8a, 0xe0, 0xeb, 0x81, 0x3f, 0x55, 0x29, 0x43, 0xfd, 0x97, 0x9c, 0xf6, 0x48, 0x22, 0xb0, 0xda, 0x64, 0xe, 0x5, 0x6f, 0xd1, 0xbb, 0xc7, 0xad, 0x13, 0x79, 0x72, 0x18, 0xa6, 0xcc, 0x23, 0x49, 0xf7, 0x9d, 0x96, 0xfc, 0x42, 0x28, 0x54, 0x3e, 0x80, 0xea, 0xe1, 0x8b, 0x35, 0x5f, 0xcd, 0xa7, 0x19, 0x73, 0x78, 0x12, 0xac, 0xc6, 0xba, 0xd0, 0x6e, 0x4, 0xf, 0x65, 0xdb, 0xb1, 0xe2, 0x88, 0x36, 0x5c, 0x57, 0x3d, 0x83, 0xe9, 0x95, 0xff, 0x41, 0x2b, 0x20, 0x4a, 0xf4, 0x9e, 0xc, 0x66, 0xd8, 0xb2, 0xb9, 0xd3, 0x6d, 0x7, 0x7b, 0x11, 0xaf, 0xc5, 0xce, 0xa4, 0x1a, 0x70, 0xbc, 0xd6, 0x68, 0x2, 0x9, 0x63, 0xdd, 0xb7, 0xcb, 0xa1, 0x1f, 0x75, 0x7e, 0x14, 0xaa, 0xc0, 0x52, 0x38, 0x86, 0xec, 0xe7, 0x8d, 0x33, 0x59, 0x25, 0x4f, 0xf1, 0x9b, 0x90, 0xfa, 0x44, 0x2e, 0x7d, 0x17, 0xa9, 0xc3, 0xc8, 0xa2, 0x1c, 0x76, 0xa, 0x60, 0xde, 0xb4, 0xbf, 0xd5, 0x6b, 0x1, 0x93, 0xf9, 0x47, 0x2d, 0x26, 0x4c, 0xf2, 0x98, 0xe4, 0x8e, 0x30, 0x5a, 0x51, 0x3b, 0x85, 0xef},
+ {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73, 0xfe, 0x95, 0x28, 0x43, 0x4f, 0x24, 0x99, 0xf2, 0x81, 0xea, 0x57, 0x3c, 0x30, 0x5b, 0xe6, 0x8d, 0xe1, 0x8a, 0x37, 0x5c, 0x50, 0x3b, 0x86, 0xed, 0x9e, 0xf5, 0x48, 0x23, 0x2f, 0x44, 0xf9, 0x92, 0x1f, 0x74, 0xc9, 0xa2, 0xae, 0xc5, 0x78, 0x13, 0x60, 0xb, 0xb6, 0xdd, 0xd1, 0xba, 0x7, 0x6c, 0xdf, 0xb4, 0x9, 0x62, 0x6e, 0x5, 0xb8, 0xd3, 0xa0, 0xcb, 0x76, 0x1d, 0x11, 0x7a, 0xc7, 0xac, 0x21, 0x4a, 0xf7, 0x9c, 0x90, 0xfb, 0x46, 0x2d, 0x5e, 0x35, 0x88, 0xe3, 0xef, 0x84, 0x39, 0x52, 0x3e, 0x55, 0xe8, 0x83, 0x8f, 0xe4, 0x59, 0x32, 0x41, 0x2a, 0x97, 0xfc, 0xf0, 0x9b, 0x26, 0x4d, 0xc0, 0xab, 0x16, 0x7d, 0x71, 0x1a, 0xa7, 0xcc, 0xbf, 0xd4, 0x69, 0x2, 0xe, 0x65, 0xd8, 0xb3, 0xa3, 0xc8, 0x75, 0x1e, 0x12, 0x79, 0xc4, 0xaf, 0xdc, 0xb7, 0xa, 0x61, 0x6d, 0x6, 0xbb, 0xd0, 0x5d, 0x36, 0x8b, 0xe0, 0xec, 0x87, 0x3a, 0x51, 0x22, 0x49, 0xf4, 0x9f, 0x93, 0xf8, 0x45, 0x2e, 0x42, 0x29, 0x94, 0xff, 0xf3, 0x98, 0x25, 0x4e, 0x3d, 0x56, 0xeb, 0x80, 0x8c, 0xe7, 0x5a, 0x31, 0xbc, 0xd7, 0x6a, 0x1, 0xd, 0x66, 0xdb, 0xb0, 0xc3, 0xa8, 0x15, 0x7e, 0x72, 0x19, 0xa4, 0xcf, 0x7c, 0x17, 0xaa, 0xc1, 0xcd, 0xa6, 0x1b, 0x70, 0x3, 0x68, 0xd5, 0xbe, 0xb2, 0xd9, 0x64, 0xf, 0x82, 0xe9, 0x54, 0x3f, 0x33, 0x58, 0xe5, 0x8e, 0xfd, 0x96, 0x2b, 0x40, 0x4c, 0x27, 0x9a, 0xf1, 0x9d, 0xf6, 0x4b, 0x20, 0x2c, 0x47, 0xfa, 0x91, 0xe2, 0x89, 0x34, 0x5f, 0x53, 0x38, 0x85, 0xee, 0x63, 0x8, 0xb5, 0xde, 0xd2, 0xb9, 0x4, 0x6f, 0x1c, 0x77, 0xca, 0xa1, 0xad, 0xc6, 0x7b, 0x10},
+ {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e, 0x8e, 0xe2, 0x56, 0x3a, 0x23, 0x4f, 0xfb, 0x97, 0xc9, 0xa5, 0x11, 0x7d, 0x64, 0x8, 0xbc, 0xd0, 0x1, 0x6d, 0xd9, 0xb5, 0xac, 0xc0, 0x74, 0x18, 0x46, 0x2a, 0x9e, 0xf2, 0xeb, 0x87, 0x33, 0x5f, 0x8f, 0xe3, 0x57, 0x3b, 0x22, 0x4e, 0xfa, 0x96, 0xc8, 0xa4, 0x10, 0x7c, 0x65, 0x9, 0xbd, 0xd1, 0x2, 0x6e, 0xda, 0xb6, 0xaf, 0xc3, 0x77, 0x1b, 0x45, 0x29, 0x9d, 0xf1, 0xe8, 0x84, 0x30, 0x5c, 0x8c, 0xe0, 0x54, 0x38, 0x21, 0x4d, 0xf9, 0x95, 0xcb, 0xa7, 0x13, 0x7f, 0x66, 0xa, 0xbe, 0xd2, 0x3, 0x6f, 0xdb, 0xb7, 0xae, 0xc2, 0x76, 0x1a, 0x44, 0x28, 0x9c, 0xf0, 0xe9, 0x85, 0x31, 0x5d, 0x8d, 0xe1, 0x55, 0x39, 0x20, 0x4c, 0xf8, 0x94, 0xca, 0xa6, 0x12, 0x7e, 0x67, 0xb, 0xbf, 0xd3, 0x4, 0x68, 0xdc, 0xb0, 0xa9, 0xc5, 0x71, 0x1d, 0x43, 0x2f, 0x9b, 0xf7, 0xee, 0x82, 0x36, 0x5a, 0x8a, 0xe6, 0x52, 0x3e, 0x27, 0x4b, 0xff, 0x93, 0xcd, 0xa1, 0x15, 0x79, 0x60, 0xc, 0xb8, 0xd4, 0x5, 0x69, 0xdd, 0xb1, 0xa8, 0xc4, 0x70, 0x1c, 0x42, 0x2e, 0x9a, 0xf6, 0xef, 0x83, 0x37, 0x5b, 0x8b, 0xe7, 0x53, 0x3f, 0x26, 0x4a, 0xfe, 0x92, 0xcc, 0xa0, 0x14, 0x78, 0x61, 0xd, 0xb9, 0xd5, 0x6, 0x6a, 0xde, 0xb2, 0xab, 0xc7, 0x73, 0x1f, 0x41, 0x2d, 0x99, 0xf5, 0xec, 0x80, 0x34, 0x58, 0x88, 0xe4, 0x50, 0x3c, 0x25, 0x49, 0xfd, 0x91, 0xcf, 0xa3, 0x17, 0x7b, 0x62, 0xe, 0xba, 0xd6, 0x7, 0x6b, 0xdf, 0xb3, 0xaa, 0xc6, 0x72, 0x1e, 0x40, 0x2c, 0x98, 0xf4, 0xed, 0x81, 0x35, 0x59, 0x89, 0xe5, 0x51, 0x3d, 0x24, 0x48, 0xfc, 0x90, 0xce, 0xa2, 0x16, 0x7a, 0x63, 0xf, 0xbb, 0xd7},
+ {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51, 0x9e, 0xf3, 0x44, 0x29, 0x37, 0x5a, 0xed, 0x80, 0xd1, 0xbc, 0xb, 0x66, 0x78, 0x15, 0xa2, 0xcf, 0x21, 0x4c, 0xfb, 0x96, 0x88, 0xe5, 0x52, 0x3f, 0x6e, 0x3, 0xb4, 0xd9, 0xc7, 0xaa, 0x1d, 0x70, 0xbf, 0xd2, 0x65, 0x8, 0x16, 0x7b, 0xcc, 0xa1, 0xf0, 0x9d, 0x2a, 0x47, 0x59, 0x34, 0x83, 0xee, 0x42, 0x2f, 0x98, 0xf5, 0xeb, 0x86, 0x31, 0x5c, 0xd, 0x60, 0xd7, 0xba, 0xa4, 0xc9, 0x7e, 0x13, 0xdc, 0xb1, 0x6, 0x6b, 0x75, 0x18, 0xaf, 0xc2, 0x93, 0xfe, 0x49, 0x24, 0x3a, 0x57, 0xe0, 0x8d, 0x63, 0xe, 0xb9, 0xd4, 0xca, 0xa7, 0x10, 0x7d, 0x2c, 0x41, 0xf6, 0x9b, 0x85, 0xe8, 0x5f, 0x32, 0xfd, 0x90, 0x27, 0x4a, 0x54, 0x39, 0x8e, 0xe3, 0xb2, 0xdf, 0x68, 0x5, 0x1b, 0x76, 0xc1, 0xac, 0x84, 0xe9, 0x5e, 0x33, 0x2d, 0x40, 0xf7, 0x9a, 0xcb, 0xa6, 0x11, 0x7c, 0x62, 0xf, 0xb8, 0xd5, 0x1a, 0x77, 0xc0, 0xad, 0xb3, 0xde, 0x69, 0x4, 0x55, 0x38, 0x8f, 0xe2, 0xfc, 0x91, 0x26, 0x4b, 0xa5, 0xc8, 0x7f, 0x12, 0xc, 0x61, 0xd6, 0xbb, 0xea, 0x87, 0x30, 0x5d, 0x43, 0x2e, 0x99, 0xf4, 0x3b, 0x56, 0xe1, 0x8c, 0x92, 0xff, 0x48, 0x25, 0x74, 0x19, 0xae, 0xc3, 0xdd, 0xb0, 0x7, 0x6a, 0xc6, 0xab, 0x1c, 0x71, 0x6f, 0x2, 0xb5, 0xd8, 0x89, 0xe4, 0x53, 0x3e, 0x20, 0x4d, 0xfa, 0x97, 0x58, 0x35, 0x82, 0xef, 0xf1, 0x9c, 0x2b, 0x46, 0x17, 0x7a, 0xcd, 0xa0, 0xbe, 0xd3, 0x64, 0x9, 0xe7, 0x8a, 0x3d, 0x50, 0x4e, 0x23, 0x94, 0xf9, 0xa8, 0xc5, 0x72, 0x1f, 0x1, 0x6c, 0xdb, 0xb6, 0x79, 0x14, 0xa3, 0xce, 0xd0, 0xbd, 0xa, 0x67, 0x36, 0x5b, 0xec, 0x81, 0x9f, 0xf2, 0x45, 0x28},
+ {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40, 0xae, 0xc0, 0x72, 0x1c, 0xb, 0x65, 0xd7, 0xb9, 0xf9, 0x97, 0x25, 0x4b, 0x5c, 0x32, 0x80, 0xee, 0x41, 0x2f, 0x9d, 0xf3, 0xe4, 0x8a, 0x38, 0x56, 0x16, 0x78, 0xca, 0xa4, 0xb3, 0xdd, 0x6f, 0x1, 0xef, 0x81, 0x33, 0x5d, 0x4a, 0x24, 0x96, 0xf8, 0xb8, 0xd6, 0x64, 0xa, 0x1d, 0x73, 0xc1, 0xaf, 0x82, 0xec, 0x5e, 0x30, 0x27, 0x49, 0xfb, 0x95, 0xd5, 0xbb, 0x9, 0x67, 0x70, 0x1e, 0xac, 0xc2, 0x2c, 0x42, 0xf0, 0x9e, 0x89, 0xe7, 0x55, 0x3b, 0x7b, 0x15, 0xa7, 0xc9, 0xde, 0xb0, 0x2, 0x6c, 0xc3, 0xad, 0x1f, 0x71, 0x66, 0x8, 0xba, 0xd4, 0x94, 0xfa, 0x48, 0x26, 0x31, 0x5f, 0xed, 0x83, 0x6d, 0x3, 0xb1, 0xdf, 0xc8, 0xa6, 0x14, 0x7a, 0x3a, 0x54, 0xe6, 0x88, 0x9f, 0xf1, 0x43, 0x2d, 0x19, 0x77, 0xc5, 0xab, 0xbc, 0xd2, 0x60, 0xe, 0x4e, 0x20, 0x92, 0xfc, 0xeb, 0x85, 0x37, 0x59, 0xb7, 0xd9, 0x6b, 0x5, 0x12, 0x7c, 0xce, 0xa0, 0xe0, 0x8e, 0x3c, 0x52, 0x45, 0x2b, 0x99, 0xf7, 0x58, 0x36, 0x84, 0xea, 0xfd, 0x93, 0x21, 0x4f, 0xf, 0x61, 0xd3, 0xbd, 0xaa, 0xc4, 0x76, 0x18, 0xf6, 0x98, 0x2a, 0x44, 0x53, 0x3d, 0x8f, 0xe1, 0xa1, 0xcf, 0x7d, 0x13, 0x4, 0x6a, 0xd8, 0xb6, 0x9b, 0xf5, 0x47, 0x29, 0x3e, 0x50, 0xe2, 0x8c, 0xcc, 0xa2, 0x10, 0x7e, 0x69, 0x7, 0xb5, 0xdb, 0x35, 0x5b, 0xe9, 0x87, 0x90, 0xfe, 0x4c, 0x22, 0x62, 0xc, 0xbe, 0xd0, 0xc7, 0xa9, 0x1b, 0x75, 0xda, 0xb4, 0x6, 0x68, 0x7f, 0x11, 0xa3, 0xcd, 0x8d, 0xe3, 0x51, 0x3f, 0x28, 0x46, 0xf4, 0x9a, 0x74, 0x1a, 0xa8, 0xc6, 0xd1, 0xbf, 0xd, 0x63, 0x23, 0x4d, 0xff, 0x91, 0x86, 0xe8, 0x5a, 0x34},
+ {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f, 0xbe, 0xd1, 0x60, 0xf, 0x1f, 0x70, 0xc1, 0xae, 0xe1, 0x8e, 0x3f, 0x50, 0x40, 0x2f, 0x9e, 0xf1, 0x61, 0xe, 0xbf, 0xd0, 0xc0, 0xaf, 0x1e, 0x71, 0x3e, 0x51, 0xe0, 0x8f, 0x9f, 0xf0, 0x41, 0x2e, 0xdf, 0xb0, 0x1, 0x6e, 0x7e, 0x11, 0xa0, 0xcf, 0x80, 0xef, 0x5e, 0x31, 0x21, 0x4e, 0xff, 0x90, 0xc2, 0xad, 0x1c, 0x73, 0x63, 0xc, 0xbd, 0xd2, 0x9d, 0xf2, 0x43, 0x2c, 0x3c, 0x53, 0xe2, 0x8d, 0x7c, 0x13, 0xa2, 0xcd, 0xdd, 0xb2, 0x3, 0x6c, 0x23, 0x4c, 0xfd, 0x92, 0x82, 0xed, 0x5c, 0x33, 0xa3, 0xcc, 0x7d, 0x12, 0x2, 0x6d, 0xdc, 0xb3, 0xfc, 0x93, 0x22, 0x4d, 0x5d, 0x32, 0x83, 0xec, 0x1d, 0x72, 0xc3, 0xac, 0xbc, 0xd3, 0x62, 0xd, 0x42, 0x2d, 0x9c, 0xf3, 0xe3, 0x8c, 0x3d, 0x52, 0x99, 0xf6, 0x47, 0x28, 0x38, 0x57, 0xe6, 0x89, 0xc6, 0xa9, 0x18, 0x77, 0x67, 0x8, 0xb9, 0xd6, 0x27, 0x48, 0xf9, 0x96, 0x86, 0xe9, 0x58, 0x37, 0x78, 0x17, 0xa6, 0xc9, 0xd9, 0xb6, 0x7, 0x68, 0xf8, 0x97, 0x26, 0x49, 0x59, 0x36, 0x87, 0xe8, 0xa7, 0xc8, 0x79, 0x16, 0x6, 0x69, 0xd8, 0xb7, 0x46, 0x29, 0x98, 0xf7, 0xe7, 0x88, 0x39, 0x56, 0x19, 0x76, 0xc7, 0xa8, 0xb8, 0xd7, 0x66, 0x9, 0x5b, 0x34, 0x85, 0xea, 0xfa, 0x95, 0x24, 0x4b, 0x4, 0x6b, 0xda, 0xb5, 0xa5, 0xca, 0x7b, 0x14, 0xe5, 0x8a, 0x3b, 0x54, 0x44, 0x2b, 0x9a, 0xf5, 0xba, 0xd5, 0x64, 0xb, 0x1b, 0x74, 0xc5, 0xaa, 0x3a, 0x55, 0xe4, 0x8b, 0x9b, 0xf4, 0x45, 0x2a, 0x65, 0xa, 0xbb, 0xd4, 0xc4, 0xab, 0x1a, 0x75, 0x84, 0xeb, 0x5a, 0x35, 0x25, 0x4a, 0xfb, 0x94, 0xdb, 0xb4, 0x5, 0x6a, 0x7a, 0x15, 0xa4, 0xcb},
+ {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea, 0x53, 0x23, 0xb3, 0xc3, 0x8e, 0xfe, 0x6e, 0x1e, 0xf4, 0x84, 0x14, 0x64, 0x29, 0x59, 0xc9, 0xb9, 0xa6, 0xd6, 0x46, 0x36, 0x7b, 0xb, 0x9b, 0xeb, 0x1, 0x71, 0xe1, 0x91, 0xdc, 0xac, 0x3c, 0x4c, 0xf5, 0x85, 0x15, 0x65, 0x28, 0x58, 0xc8, 0xb8, 0x52, 0x22, 0xb2, 0xc2, 0x8f, 0xff, 0x6f, 0x1f, 0x51, 0x21, 0xb1, 0xc1, 0x8c, 0xfc, 0x6c, 0x1c, 0xf6, 0x86, 0x16, 0x66, 0x2b, 0x5b, 0xcb, 0xbb, 0x2, 0x72, 0xe2, 0x92, 0xdf, 0xaf, 0x3f, 0x4f, 0xa5, 0xd5, 0x45, 0x35, 0x78, 0x8, 0x98, 0xe8, 0xf7, 0x87, 0x17, 0x67, 0x2a, 0x5a, 0xca, 0xba, 0x50, 0x20, 0xb0, 0xc0, 0x8d, 0xfd, 0x6d, 0x1d, 0xa4, 0xd4, 0x44, 0x34, 0x79, 0x9, 0x99, 0xe9, 0x3, 0x73, 0xe3, 0x93, 0xde, 0xae, 0x3e, 0x4e, 0xa2, 0xd2, 0x42, 0x32, 0x7f, 0xf, 0x9f, 0xef, 0x5, 0x75, 0xe5, 0x95, 0xd8, 0xa8, 0x38, 0x48, 0xf1, 0x81, 0x11, 0x61, 0x2c, 0x5c, 0xcc, 0xbc, 0x56, 0x26, 0xb6, 0xc6, 0x8b, 0xfb, 0x6b, 0x1b, 0x4, 0x74, 0xe4, 0x94, 0xd9, 0xa9, 0x39, 0x49, 0xa3, 0xd3, 0x43, 0x33, 0x7e, 0xe, 0x9e, 0xee, 0x57, 0x27, 0xb7, 0xc7, 0x8a, 0xfa, 0x6a, 0x1a, 0xf0, 0x80, 0x10, 0x60, 0x2d, 0x5d, 0xcd, 0xbd, 0xf3, 0x83, 0x13, 0x63, 0x2e, 0x5e, 0xce, 0xbe, 0x54, 0x24, 0xb4, 0xc4, 0x89, 0xf9, 0x69, 0x19, 0xa0, 0xd0, 0x40, 0x30, 0x7d, 0xd, 0x9d, 0xed, 0x7, 0x77, 0xe7, 0x97, 0xda, 0xaa, 0x3a, 0x4a, 0x55, 0x25, 0xb5, 0xc5, 0x88, 0xf8, 0x68, 0x18, 0xf2, 0x82, 0x12, 0x62, 0x2f, 0x5f, 0xcf, 0xbf, 0x6, 0x76, 0xe6, 0x96, 0xdb, 0xab, 0x3b, 0x4b, 0xa1, 0xd1, 0x41, 0x31, 0x7c, 0xc, 0x9c, 0xec},
+ {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5, 0x43, 0x32, 0xa1, 0xd0, 0x9a, 0xeb, 0x78, 0x9, 0xec, 0x9d, 0xe, 0x7f, 0x35, 0x44, 0xd7, 0xa6, 0x86, 0xf7, 0x64, 0x15, 0x5f, 0x2e, 0xbd, 0xcc, 0x29, 0x58, 0xcb, 0xba, 0xf0, 0x81, 0x12, 0x63, 0xc5, 0xb4, 0x27, 0x56, 0x1c, 0x6d, 0xfe, 0x8f, 0x6a, 0x1b, 0x88, 0xf9, 0xb3, 0xc2, 0x51, 0x20, 0x11, 0x60, 0xf3, 0x82, 0xc8, 0xb9, 0x2a, 0x5b, 0xbe, 0xcf, 0x5c, 0x2d, 0x67, 0x16, 0x85, 0xf4, 0x52, 0x23, 0xb0, 0xc1, 0x8b, 0xfa, 0x69, 0x18, 0xfd, 0x8c, 0x1f, 0x6e, 0x24, 0x55, 0xc6, 0xb7, 0x97, 0xe6, 0x75, 0x4, 0x4e, 0x3f, 0xac, 0xdd, 0x38, 0x49, 0xda, 0xab, 0xe1, 0x90, 0x3, 0x72, 0xd4, 0xa5, 0x36, 0x47, 0xd, 0x7c, 0xef, 0x9e, 0x7b, 0xa, 0x99, 0xe8, 0xa2, 0xd3, 0x40, 0x31, 0x22, 0x53, 0xc0, 0xb1, 0xfb, 0x8a, 0x19, 0x68, 0x8d, 0xfc, 0x6f, 0x1e, 0x54, 0x25, 0xb6, 0xc7, 0x61, 0x10, 0x83, 0xf2, 0xb8, 0xc9, 0x5a, 0x2b, 0xce, 0xbf, 0x2c, 0x5d, 0x17, 0x66, 0xf5, 0x84, 0xa4, 0xd5, 0x46, 0x37, 0x7d, 0xc, 0x9f, 0xee, 0xb, 0x7a, 0xe9, 0x98, 0xd2, 0xa3, 0x30, 0x41, 0xe7, 0x96, 0x5, 0x74, 0x3e, 0x4f, 0xdc, 0xad, 0x48, 0x39, 0xaa, 0xdb, 0x91, 0xe0, 0x73, 0x2, 0x33, 0x42, 0xd1, 0xa0, 0xea, 0x9b, 0x8, 0x79, 0x9c, 0xed, 0x7e, 0xf, 0x45, 0x34, 0xa7, 0xd6, 0x70, 0x1, 0x92, 0xe3, 0xa9, 0xd8, 0x4b, 0x3a, 0xdf, 0xae, 0x3d, 0x4c, 0x6, 0x77, 0xe4, 0x95, 0xb5, 0xc4, 0x57, 0x26, 0x6c, 0x1d, 0x8e, 0xff, 0x1a, 0x6b, 0xf8, 0x89, 0xc3, 0xb2, 0x21, 0x50, 0xf6, 0x87, 0x14, 0x65, 0x2f, 0x5e, 0xcd, 0xbc, 0x59, 0x28, 0xbb, 0xca, 0x80, 0xf1, 0x62, 0x13},
+ {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4, 0x73, 0x1, 0x97, 0xe5, 0xa6, 0xd4, 0x42, 0x30, 0xc4, 0xb6, 0x20, 0x52, 0x11, 0x63, 0xf5, 0x87, 0xe6, 0x94, 0x2, 0x70, 0x33, 0x41, 0xd7, 0xa5, 0x51, 0x23, 0xb5, 0xc7, 0x84, 0xf6, 0x60, 0x12, 0x95, 0xe7, 0x71, 0x3, 0x40, 0x32, 0xa4, 0xd6, 0x22, 0x50, 0xc6, 0xb4, 0xf7, 0x85, 0x13, 0x61, 0xd1, 0xa3, 0x35, 0x47, 0x4, 0x76, 0xe0, 0x92, 0x66, 0x14, 0x82, 0xf0, 0xb3, 0xc1, 0x57, 0x25, 0xa2, 0xd0, 0x46, 0x34, 0x77, 0x5, 0x93, 0xe1, 0x15, 0x67, 0xf1, 0x83, 0xc0, 0xb2, 0x24, 0x56, 0x37, 0x45, 0xd3, 0xa1, 0xe2, 0x90, 0x6, 0x74, 0x80, 0xf2, 0x64, 0x16, 0x55, 0x27, 0xb1, 0xc3, 0x44, 0x36, 0xa0, 0xd2, 0x91, 0xe3, 0x75, 0x7, 0xf3, 0x81, 0x17, 0x65, 0x26, 0x54, 0xc2, 0xb0, 0xbf, 0xcd, 0x5b, 0x29, 0x6a, 0x18, 0x8e, 0xfc, 0x8, 0x7a, 0xec, 0x9e, 0xdd, 0xaf, 0x39, 0x4b, 0xcc, 0xbe, 0x28, 0x5a, 0x19, 0x6b, 0xfd, 0x8f, 0x7b, 0x9, 0x9f, 0xed, 0xae, 0xdc, 0x4a, 0x38, 0x59, 0x2b, 0xbd, 0xcf, 0x8c, 0xfe, 0x68, 0x1a, 0xee, 0x9c, 0xa, 0x78, 0x3b, 0x49, 0xdf, 0xad, 0x2a, 0x58, 0xce, 0xbc, 0xff, 0x8d, 0x1b, 0x69, 0x9d, 0xef, 0x79, 0xb, 0x48, 0x3a, 0xac, 0xde, 0x6e, 0x1c, 0x8a, 0xf8, 0xbb, 0xc9, 0x5f, 0x2d, 0xd9, 0xab, 0x3d, 0x4f, 0xc, 0x7e, 0xe8, 0x9a, 0x1d, 0x6f, 0xf9, 0x8b, 0xc8, 0xba, 0x2c, 0x5e, 0xaa, 0xd8, 0x4e, 0x3c, 0x7f, 0xd, 0x9b, 0xe9, 0x88, 0xfa, 0x6c, 0x1e, 0x5d, 0x2f, 0xb9, 0xcb, 0x3f, 0x4d, 0xdb, 0xa9, 0xea, 0x98, 0xe, 0x7c, 0xfb, 0x89, 0x1f, 0x6d, 0x2e, 0x5c, 0xca, 0xb8, 0x4c, 0x3e, 0xa8, 0xda, 0x99, 0xeb, 0x7d, 0xf},
+ {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb, 0x63, 0x10, 0x85, 0xf6, 0xb2, 0xc1, 0x54, 0x27, 0xdc, 0xaf, 0x3a, 0x49, 0xd, 0x7e, 0xeb, 0x98, 0xc6, 0xb5, 0x20, 0x53, 0x17, 0x64, 0xf1, 0x82, 0x79, 0xa, 0x9f, 0xec, 0xa8, 0xdb, 0x4e, 0x3d, 0xa5, 0xd6, 0x43, 0x30, 0x74, 0x7, 0x92, 0xe1, 0x1a, 0x69, 0xfc, 0x8f, 0xcb, 0xb8, 0x2d, 0x5e, 0x91, 0xe2, 0x77, 0x4, 0x40, 0x33, 0xa6, 0xd5, 0x2e, 0x5d, 0xc8, 0xbb, 0xff, 0x8c, 0x19, 0x6a, 0xf2, 0x81, 0x14, 0x67, 0x23, 0x50, 0xc5, 0xb6, 0x4d, 0x3e, 0xab, 0xd8, 0x9c, 0xef, 0x7a, 0x9, 0x57, 0x24, 0xb1, 0xc2, 0x86, 0xf5, 0x60, 0x13, 0xe8, 0x9b, 0xe, 0x7d, 0x39, 0x4a, 0xdf, 0xac, 0x34, 0x47, 0xd2, 0xa1, 0xe5, 0x96, 0x3, 0x70, 0x8b, 0xf8, 0x6d, 0x1e, 0x5a, 0x29, 0xbc, 0xcf, 0x3f, 0x4c, 0xd9, 0xaa, 0xee, 0x9d, 0x8, 0x7b, 0x80, 0xf3, 0x66, 0x15, 0x51, 0x22, 0xb7, 0xc4, 0x5c, 0x2f, 0xba, 0xc9, 0x8d, 0xfe, 0x6b, 0x18, 0xe3, 0x90, 0x5, 0x76, 0x32, 0x41, 0xd4, 0xa7, 0xf9, 0x8a, 0x1f, 0x6c, 0x28, 0x5b, 0xce, 0xbd, 0x46, 0x35, 0xa0, 0xd3, 0x97, 0xe4, 0x71, 0x2, 0x9a, 0xe9, 0x7c, 0xf, 0x4b, 0x38, 0xad, 0xde, 0x25, 0x56, 0xc3, 0xb0, 0xf4, 0x87, 0x12, 0x61, 0xae, 0xdd, 0x48, 0x3b, 0x7f, 0xc, 0x99, 0xea, 0x11, 0x62, 0xf7, 0x84, 0xc0, 0xb3, 0x26, 0x55, 0xcd, 0xbe, 0x2b, 0x58, 0x1c, 0x6f, 0xfa, 0x89, 0x72, 0x1, 0x94, 0xe7, 0xa3, 0xd0, 0x45, 0x36, 0x68, 0x1b, 0x8e, 0xfd, 0xb9, 0xca, 0x5f, 0x2c, 0xd7, 0xa4, 0x31, 0x42, 0x6, 0x75, 0xe0, 0x93, 0xb, 0x78, 0xed, 0x9e, 0xda, 0xa9, 0x3c, 0x4f, 0xb4, 0xc7, 0x52, 0x21, 0x65, 0x16, 0x83, 0xf0},
+ {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6, 0x13, 0x67, 0xfb, 0x8f, 0xde, 0xaa, 0x36, 0x42, 0x94, 0xe0, 0x7c, 0x8, 0x59, 0x2d, 0xb1, 0xc5, 0x26, 0x52, 0xce, 0xba, 0xeb, 0x9f, 0x3, 0x77, 0xa1, 0xd5, 0x49, 0x3d, 0x6c, 0x18, 0x84, 0xf0, 0x35, 0x41, 0xdd, 0xa9, 0xf8, 0x8c, 0x10, 0x64, 0xb2, 0xc6, 0x5a, 0x2e, 0x7f, 0xb, 0x97, 0xe3, 0x4c, 0x38, 0xa4, 0xd0, 0x81, 0xf5, 0x69, 0x1d, 0xcb, 0xbf, 0x23, 0x57, 0x6, 0x72, 0xee, 0x9a, 0x5f, 0x2b, 0xb7, 0xc3, 0x92, 0xe6, 0x7a, 0xe, 0xd8, 0xac, 0x30, 0x44, 0x15, 0x61, 0xfd, 0x89, 0x6a, 0x1e, 0x82, 0xf6, 0xa7, 0xd3, 0x4f, 0x3b, 0xed, 0x99, 0x5, 0x71, 0x20, 0x54, 0xc8, 0xbc, 0x79, 0xd, 0x91, 0xe5, 0xb4, 0xc0, 0x5c, 0x28, 0xfe, 0x8a, 0x16, 0x62, 0x33, 0x47, 0xdb, 0xaf, 0x98, 0xec, 0x70, 0x4, 0x55, 0x21, 0xbd, 0xc9, 0x1f, 0x6b, 0xf7, 0x83, 0xd2, 0xa6, 0x3a, 0x4e, 0x8b, 0xff, 0x63, 0x17, 0x46, 0x32, 0xae, 0xda, 0xc, 0x78, 0xe4, 0x90, 0xc1, 0xb5, 0x29, 0x5d, 0xbe, 0xca, 0x56, 0x22, 0x73, 0x7, 0x9b, 0xef, 0x39, 0x4d, 0xd1, 0xa5, 0xf4, 0x80, 0x1c, 0x68, 0xad, 0xd9, 0x45, 0x31, 0x60, 0x14, 0x88, 0xfc, 0x2a, 0x5e, 0xc2, 0xb6, 0xe7, 0x93, 0xf, 0x7b, 0xd4, 0xa0, 0x3c, 0x48, 0x19, 0x6d, 0xf1, 0x85, 0x53, 0x27, 0xbb, 0xcf, 0x9e, 0xea, 0x76, 0x2, 0xc7, 0xb3, 0x2f, 0x5b, 0xa, 0x7e, 0xe2, 0x96, 0x40, 0x34, 0xa8, 0xdc, 0x8d, 0xf9, 0x65, 0x11, 0xf2, 0x86, 0x1a, 0x6e, 0x3f, 0x4b, 0xd7, 0xa3, 0x75, 0x1, 0x9d, 0xe9, 0xb8, 0xcc, 0x50, 0x24, 0xe1, 0x95, 0x9, 0x7d, 0x2c, 0x58, 0xc4, 0xb0, 0x66, 0x12, 0x8e, 0xfa, 0xab, 0xdf, 0x43, 0x37},
+ {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9, 0x3, 0x76, 0xe9, 0x9c, 0xca, 0xbf, 0x20, 0x55, 0x8c, 0xf9, 0x66, 0x13, 0x45, 0x30, 0xaf, 0xda, 0x6, 0x73, 0xec, 0x99, 0xcf, 0xba, 0x25, 0x50, 0x89, 0xfc, 0x63, 0x16, 0x40, 0x35, 0xaa, 0xdf, 0x5, 0x70, 0xef, 0x9a, 0xcc, 0xb9, 0x26, 0x53, 0x8a, 0xff, 0x60, 0x15, 0x43, 0x36, 0xa9, 0xdc, 0xc, 0x79, 0xe6, 0x93, 0xc5, 0xb0, 0x2f, 0x5a, 0x83, 0xf6, 0x69, 0x1c, 0x4a, 0x3f, 0xa0, 0xd5, 0xf, 0x7a, 0xe5, 0x90, 0xc6, 0xb3, 0x2c, 0x59, 0x80, 0xf5, 0x6a, 0x1f, 0x49, 0x3c, 0xa3, 0xd6, 0xa, 0x7f, 0xe0, 0x95, 0xc3, 0xb6, 0x29, 0x5c, 0x85, 0xf0, 0x6f, 0x1a, 0x4c, 0x39, 0xa6, 0xd3, 0x9, 0x7c, 0xe3, 0x96, 0xc0, 0xb5, 0x2a, 0x5f, 0x86, 0xf3, 0x6c, 0x19, 0x4f, 0x3a, 0xa5, 0xd0, 0x18, 0x6d, 0xf2, 0x87, 0xd1, 0xa4, 0x3b, 0x4e, 0x97, 0xe2, 0x7d, 0x8, 0x5e, 0x2b, 0xb4, 0xc1, 0x1b, 0x6e, 0xf1, 0x84, 0xd2, 0xa7, 0x38, 0x4d, 0x94, 0xe1, 0x7e, 0xb, 0x5d, 0x28, 0xb7, 0xc2, 0x1e, 0x6b, 0xf4, 0x81, 0xd7, 0xa2, 0x3d, 0x48, 0x91, 0xe4, 0x7b, 0xe, 0x58, 0x2d, 0xb2, 0xc7, 0x1d, 0x68, 0xf7, 0x82, 0xd4, 0xa1, 0x3e, 0x4b, 0x92, 0xe7, 0x78, 0xd, 0x5b, 0x2e, 0xb1, 0xc4, 0x14, 0x61, 0xfe, 0x8b, 0xdd, 0xa8, 0x37, 0x42, 0x9b, 0xee, 0x71, 0x4, 0x52, 0x27, 0xb8, 0xcd, 0x17, 0x62, 0xfd, 0x88, 0xde, 0xab, 0x34, 0x41, 0x98, 0xed, 0x72, 0x7, 0x51, 0x24, 0xbb, 0xce, 0x12, 0x67, 0xf8, 0x8d, 0xdb, 0xae, 0x31, 0x44, 0x9d, 0xe8, 0x77, 0x2, 0x54, 0x21, 0xbe, 0xcb, 0x11, 0x64, 0xfb, 0x8e, 0xd8, 0xad, 0x32, 0x47, 0x9e, 0xeb, 0x74, 0x1, 0x57, 0x22, 0xbd, 0xc8},
+ {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8, 0x33, 0x45, 0xdf, 0xa9, 0xf6, 0x80, 0x1a, 0x6c, 0xa4, 0xd2, 0x48, 0x3e, 0x61, 0x17, 0x8d, 0xfb, 0x66, 0x10, 0x8a, 0xfc, 0xa3, 0xd5, 0x4f, 0x39, 0xf1, 0x87, 0x1d, 0x6b, 0x34, 0x42, 0xd8, 0xae, 0x55, 0x23, 0xb9, 0xcf, 0x90, 0xe6, 0x7c, 0xa, 0xc2, 0xb4, 0x2e, 0x58, 0x7, 0x71, 0xeb, 0x9d, 0xcc, 0xba, 0x20, 0x56, 0x9, 0x7f, 0xe5, 0x93, 0x5b, 0x2d, 0xb7, 0xc1, 0x9e, 0xe8, 0x72, 0x4, 0xff, 0x89, 0x13, 0x65, 0x3a, 0x4c, 0xd6, 0xa0, 0x68, 0x1e, 0x84, 0xf2, 0xad, 0xdb, 0x41, 0x37, 0xaa, 0xdc, 0x46, 0x30, 0x6f, 0x19, 0x83, 0xf5, 0x3d, 0x4b, 0xd1, 0xa7, 0xf8, 0x8e, 0x14, 0x62, 0x99, 0xef, 0x75, 0x3, 0x5c, 0x2a, 0xb0, 0xc6, 0xe, 0x78, 0xe2, 0x94, 0xcb, 0xbd, 0x27, 0x51, 0x85, 0xf3, 0x69, 0x1f, 0x40, 0x36, 0xac, 0xda, 0x12, 0x64, 0xfe, 0x88, 0xd7, 0xa1, 0x3b, 0x4d, 0xb6, 0xc0, 0x5a, 0x2c, 0x73, 0x5, 0x9f, 0xe9, 0x21, 0x57, 0xcd, 0xbb, 0xe4, 0x92, 0x8, 0x7e, 0xe3, 0x95, 0xf, 0x79, 0x26, 0x50, 0xca, 0xbc, 0x74, 0x2, 0x98, 0xee, 0xb1, 0xc7, 0x5d, 0x2b, 0xd0, 0xa6, 0x3c, 0x4a, 0x15, 0x63, 0xf9, 0x8f, 0x47, 0x31, 0xab, 0xdd, 0x82, 0xf4, 0x6e, 0x18, 0x49, 0x3f, 0xa5, 0xd3, 0x8c, 0xfa, 0x60, 0x16, 0xde, 0xa8, 0x32, 0x44, 0x1b, 0x6d, 0xf7, 0x81, 0x7a, 0xc, 0x96, 0xe0, 0xbf, 0xc9, 0x53, 0x25, 0xed, 0x9b, 0x1, 0x77, 0x28, 0x5e, 0xc4, 0xb2, 0x2f, 0x59, 0xc3, 0xb5, 0xea, 0x9c, 0x6, 0x70, 0xb8, 0xce, 0x54, 0x22, 0x7d, 0xb, 0x91, 0xe7, 0x1c, 0x6a, 0xf0, 0x86, 0xd9, 0xaf, 0x35, 0x43, 0x8b, 0xfd, 0x67, 0x11, 0x4e, 0x38, 0xa2, 0xd4},
+ {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7, 0x23, 0x54, 0xcd, 0xba, 0xe2, 0x95, 0xc, 0x7b, 0xbc, 0xcb, 0x52, 0x25, 0x7d, 0xa, 0x93, 0xe4, 0x46, 0x31, 0xa8, 0xdf, 0x87, 0xf0, 0x69, 0x1e, 0xd9, 0xae, 0x37, 0x40, 0x18, 0x6f, 0xf6, 0x81, 0x65, 0x12, 0x8b, 0xfc, 0xa4, 0xd3, 0x4a, 0x3d, 0xfa, 0x8d, 0x14, 0x63, 0x3b, 0x4c, 0xd5, 0xa2, 0x8c, 0xfb, 0x62, 0x15, 0x4d, 0x3a, 0xa3, 0xd4, 0x13, 0x64, 0xfd, 0x8a, 0xd2, 0xa5, 0x3c, 0x4b, 0xaf, 0xd8, 0x41, 0x36, 0x6e, 0x19, 0x80, 0xf7, 0x30, 0x47, 0xde, 0xa9, 0xf1, 0x86, 0x1f, 0x68, 0xca, 0xbd, 0x24, 0x53, 0xb, 0x7c, 0xe5, 0x92, 0x55, 0x22, 0xbb, 0xcc, 0x94, 0xe3, 0x7a, 0xd, 0xe9, 0x9e, 0x7, 0x70, 0x28, 0x5f, 0xc6, 0xb1, 0x76, 0x1, 0x98, 0xef, 0xb7, 0xc0, 0x59, 0x2e, 0x5, 0x72, 0xeb, 0x9c, 0xc4, 0xb3, 0x2a, 0x5d, 0x9a, 0xed, 0x74, 0x3, 0x5b, 0x2c, 0xb5, 0xc2, 0x26, 0x51, 0xc8, 0xbf, 0xe7, 0x90, 0x9, 0x7e, 0xb9, 0xce, 0x57, 0x20, 0x78, 0xf, 0x96, 0xe1, 0x43, 0x34, 0xad, 0xda, 0x82, 0xf5, 0x6c, 0x1b, 0xdc, 0xab, 0x32, 0x45, 0x1d, 0x6a, 0xf3, 0x84, 0x60, 0x17, 0x8e, 0xf9, 0xa1, 0xd6, 0x4f, 0x38, 0xff, 0x88, 0x11, 0x66, 0x3e, 0x49, 0xd0, 0xa7, 0x89, 0xfe, 0x67, 0x10, 0x48, 0x3f, 0xa6, 0xd1, 0x16, 0x61, 0xf8, 0x8f, 0xd7, 0xa0, 0x39, 0x4e, 0xaa, 0xdd, 0x44, 0x33, 0x6b, 0x1c, 0x85, 0xf2, 0x35, 0x42, 0xdb, 0xac, 0xf4, 0x83, 0x1a, 0x6d, 0xcf, 0xb8, 0x21, 0x56, 0xe, 0x79, 0xe0, 0x97, 0x50, 0x27, 0xbe, 0xc9, 0x91, 0xe6, 0x7f, 0x8, 0xec, 0x9b, 0x2, 0x75, 0x2d, 0x5a, 0xc3, 0xb4, 0x73, 0x4, 0x9d, 0xea, 0xb2, 0xc5, 0x5c, 0x2b},
+ {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92, 0xd3, 0xab, 0x23, 0x5b, 0x2e, 0x56, 0xde, 0xa6, 0x34, 0x4c, 0xc4, 0xbc, 0xc9, 0xb1, 0x39, 0x41, 0xbb, 0xc3, 0x4b, 0x33, 0x46, 0x3e, 0xb6, 0xce, 0x5c, 0x24, 0xac, 0xd4, 0xa1, 0xd9, 0x51, 0x29, 0x68, 0x10, 0x98, 0xe0, 0x95, 0xed, 0x65, 0x1d, 0x8f, 0xf7, 0x7f, 0x7, 0x72, 0xa, 0x82, 0xfa, 0x6b, 0x13, 0x9b, 0xe3, 0x96, 0xee, 0x66, 0x1e, 0x8c, 0xf4, 0x7c, 0x4, 0x71, 0x9, 0x81, 0xf9, 0xb8, 0xc0, 0x48, 0x30, 0x45, 0x3d, 0xb5, 0xcd, 0x5f, 0x27, 0xaf, 0xd7, 0xa2, 0xda, 0x52, 0x2a, 0xd0, 0xa8, 0x20, 0x58, 0x2d, 0x55, 0xdd, 0xa5, 0x37, 0x4f, 0xc7, 0xbf, 0xca, 0xb2, 0x3a, 0x42, 0x3, 0x7b, 0xf3, 0x8b, 0xfe, 0x86, 0xe, 0x76, 0xe4, 0x9c, 0x14, 0x6c, 0x19, 0x61, 0xe9, 0x91, 0xd6, 0xae, 0x26, 0x5e, 0x2b, 0x53, 0xdb, 0xa3, 0x31, 0x49, 0xc1, 0xb9, 0xcc, 0xb4, 0x3c, 0x44, 0x5, 0x7d, 0xf5, 0x8d, 0xf8, 0x80, 0x8, 0x70, 0xe2, 0x9a, 0x12, 0x6a, 0x1f, 0x67, 0xef, 0x97, 0x6d, 0x15, 0x9d, 0xe5, 0x90, 0xe8, 0x60, 0x18, 0x8a, 0xf2, 0x7a, 0x2, 0x77, 0xf, 0x87, 0xff, 0xbe, 0xc6, 0x4e, 0x36, 0x43, 0x3b, 0xb3, 0xcb, 0x59, 0x21, 0xa9, 0xd1, 0xa4, 0xdc, 0x54, 0x2c, 0xbd, 0xc5, 0x4d, 0x35, 0x40, 0x38, 0xb0, 0xc8, 0x5a, 0x22, 0xaa, 0xd2, 0xa7, 0xdf, 0x57, 0x2f, 0x6e, 0x16, 0x9e, 0xe6, 0x93, 0xeb, 0x63, 0x1b, 0x89, 0xf1, 0x79, 0x1, 0x74, 0xc, 0x84, 0xfc, 0x6, 0x7e, 0xf6, 0x8e, 0xfb, 0x83, 0xb, 0x73, 0xe1, 0x99, 0x11, 0x69, 0x1c, 0x64, 0xec, 0x94, 0xd5, 0xad, 0x25, 0x5d, 0x28, 0x50, 0xd8, 0xa0, 0x32, 0x4a, 0xc2, 0xba, 0xcf, 0xb7, 0x3f, 0x47},
+ {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d, 0xc3, 0xba, 0x31, 0x48, 0x3a, 0x43, 0xc8, 0xb1, 0x2c, 0x55, 0xde, 0xa7, 0xd5, 0xac, 0x27, 0x5e, 0x9b, 0xe2, 0x69, 0x10, 0x62, 0x1b, 0x90, 0xe9, 0x74, 0xd, 0x86, 0xff, 0x8d, 0xf4, 0x7f, 0x6, 0x58, 0x21, 0xaa, 0xd3, 0xa1, 0xd8, 0x53, 0x2a, 0xb7, 0xce, 0x45, 0x3c, 0x4e, 0x37, 0xbc, 0xc5, 0x2b, 0x52, 0xd9, 0xa0, 0xd2, 0xab, 0x20, 0x59, 0xc4, 0xbd, 0x36, 0x4f, 0x3d, 0x44, 0xcf, 0xb6, 0xe8, 0x91, 0x1a, 0x63, 0x11, 0x68, 0xe3, 0x9a, 0x7, 0x7e, 0xf5, 0x8c, 0xfe, 0x87, 0xc, 0x75, 0xb0, 0xc9, 0x42, 0x3b, 0x49, 0x30, 0xbb, 0xc2, 0x5f, 0x26, 0xad, 0xd4, 0xa6, 0xdf, 0x54, 0x2d, 0x73, 0xa, 0x81, 0xf8, 0x8a, 0xf3, 0x78, 0x1, 0x9c, 0xe5, 0x6e, 0x17, 0x65, 0x1c, 0x97, 0xee, 0x56, 0x2f, 0xa4, 0xdd, 0xaf, 0xd6, 0x5d, 0x24, 0xb9, 0xc0, 0x4b, 0x32, 0x40, 0x39, 0xb2, 0xcb, 0x95, 0xec, 0x67, 0x1e, 0x6c, 0x15, 0x9e, 0xe7, 0x7a, 0x3, 0x88, 0xf1, 0x83, 0xfa, 0x71, 0x8, 0xcd, 0xb4, 0x3f, 0x46, 0x34, 0x4d, 0xc6, 0xbf, 0x22, 0x5b, 0xd0, 0xa9, 0xdb, 0xa2, 0x29, 0x50, 0xe, 0x77, 0xfc, 0x85, 0xf7, 0x8e, 0x5, 0x7c, 0xe1, 0x98, 0x13, 0x6a, 0x18, 0x61, 0xea, 0x93, 0x7d, 0x4, 0x8f, 0xf6, 0x84, 0xfd, 0x76, 0xf, 0x92, 0xeb, 0x60, 0x19, 0x6b, 0x12, 0x99, 0xe0, 0xbe, 0xc7, 0x4c, 0x35, 0x47, 0x3e, 0xb5, 0xcc, 0x51, 0x28, 0xa3, 0xda, 0xa8, 0xd1, 0x5a, 0x23, 0xe6, 0x9f, 0x14, 0x6d, 0x1f, 0x66, 0xed, 0x94, 0x9, 0x70, 0xfb, 0x82, 0xf0, 0x89, 0x2, 0x7b, 0x25, 0x5c, 0xd7, 0xae, 0xdc, 0xa5, 0x2e, 0x57, 0xca, 0xb3, 0x38, 0x41, 0x33, 0x4a, 0xc1, 0xb8},
+ {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c, 0xf3, 0x89, 0x7, 0x7d, 0x6, 0x7c, 0xf2, 0x88, 0x4, 0x7e, 0xf0, 0x8a, 0xf1, 0x8b, 0x5, 0x7f, 0xfb, 0x81, 0xf, 0x75, 0xe, 0x74, 0xfa, 0x80, 0xc, 0x76, 0xf8, 0x82, 0xf9, 0x83, 0xd, 0x77, 0x8, 0x72, 0xfc, 0x86, 0xfd, 0x87, 0x9, 0x73, 0xff, 0x85, 0xb, 0x71, 0xa, 0x70, 0xfe, 0x84, 0xeb, 0x91, 0x1f, 0x65, 0x1e, 0x64, 0xea, 0x90, 0x1c, 0x66, 0xe8, 0x92, 0xe9, 0x93, 0x1d, 0x67, 0x18, 0x62, 0xec, 0x96, 0xed, 0x97, 0x19, 0x63, 0xef, 0x95, 0x1b, 0x61, 0x1a, 0x60, 0xee, 0x94, 0x10, 0x6a, 0xe4, 0x9e, 0xe5, 0x9f, 0x11, 0x6b, 0xe7, 0x9d, 0x13, 0x69, 0x12, 0x68, 0xe6, 0x9c, 0xe3, 0x99, 0x17, 0x6d, 0x16, 0x6c, 0xe2, 0x98, 0x14, 0x6e, 0xe0, 0x9a, 0xe1, 0x9b, 0x15, 0x6f, 0xcb, 0xb1, 0x3f, 0x45, 0x3e, 0x44, 0xca, 0xb0, 0x3c, 0x46, 0xc8, 0xb2, 0xc9, 0xb3, 0x3d, 0x47, 0x38, 0x42, 0xcc, 0xb6, 0xcd, 0xb7, 0x39, 0x43, 0xcf, 0xb5, 0x3b, 0x41, 0x3a, 0x40, 0xce, 0xb4, 0x30, 0x4a, 0xc4, 0xbe, 0xc5, 0xbf, 0x31, 0x4b, 0xc7, 0xbd, 0x33, 0x49, 0x32, 0x48, 0xc6, 0xbc, 0xc3, 0xb9, 0x37, 0x4d, 0x36, 0x4c, 0xc2, 0xb8, 0x34, 0x4e, 0xc0, 0xba, 0xc1, 0xbb, 0x35, 0x4f, 0x20, 0x5a, 0xd4, 0xae, 0xd5, 0xaf, 0x21, 0x5b, 0xd7, 0xad, 0x23, 0x59, 0x22, 0x58, 0xd6, 0xac, 0xd3, 0xa9, 0x27, 0x5d, 0x26, 0x5c, 0xd2, 0xa8, 0x24, 0x5e, 0xd0, 0xaa, 0xd1, 0xab, 0x25, 0x5f, 0xdb, 0xa1, 0x2f, 0x55, 0x2e, 0x54, 0xda, 0xa0, 0x2c, 0x56, 0xd8, 0xa2, 0xd9, 0xa3, 0x2d, 0x57, 0x28, 0x52, 0xdc, 0xa6, 0xdd, 0xa7, 0x29, 0x53, 0xdf, 0xa5, 0x2b, 0x51, 0x2a, 0x50, 0xde, 0xa4},
+ {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83, 0xe3, 0x98, 0x15, 0x6e, 0x12, 0x69, 0xe4, 0x9f, 0x1c, 0x67, 0xea, 0x91, 0xed, 0x96, 0x1b, 0x60, 0xdb, 0xa0, 0x2d, 0x56, 0x2a, 0x51, 0xdc, 0xa7, 0x24, 0x5f, 0xd2, 0xa9, 0xd5, 0xae, 0x23, 0x58, 0x38, 0x43, 0xce, 0xb5, 0xc9, 0xb2, 0x3f, 0x44, 0xc7, 0xbc, 0x31, 0x4a, 0x36, 0x4d, 0xc0, 0xbb, 0xab, 0xd0, 0x5d, 0x26, 0x5a, 0x21, 0xac, 0xd7, 0x54, 0x2f, 0xa2, 0xd9, 0xa5, 0xde, 0x53, 0x28, 0x48, 0x33, 0xbe, 0xc5, 0xb9, 0xc2, 0x4f, 0x34, 0xb7, 0xcc, 0x41, 0x3a, 0x46, 0x3d, 0xb0, 0xcb, 0x70, 0xb, 0x86, 0xfd, 0x81, 0xfa, 0x77, 0xc, 0x8f, 0xf4, 0x79, 0x2, 0x7e, 0x5, 0x88, 0xf3, 0x93, 0xe8, 0x65, 0x1e, 0x62, 0x19, 0x94, 0xef, 0x6c, 0x17, 0x9a, 0xe1, 0x9d, 0xe6, 0x6b, 0x10, 0x4b, 0x30, 0xbd, 0xc6, 0xba, 0xc1, 0x4c, 0x37, 0xb4, 0xcf, 0x42, 0x39, 0x45, 0x3e, 0xb3, 0xc8, 0xa8, 0xd3, 0x5e, 0x25, 0x59, 0x22, 0xaf, 0xd4, 0x57, 0x2c, 0xa1, 0xda, 0xa6, 0xdd, 0x50, 0x2b, 0x90, 0xeb, 0x66, 0x1d, 0x61, 0x1a, 0x97, 0xec, 0x6f, 0x14, 0x99, 0xe2, 0x9e, 0xe5, 0x68, 0x13, 0x73, 0x8, 0x85, 0xfe, 0x82, 0xf9, 0x74, 0xf, 0x8c, 0xf7, 0x7a, 0x1, 0x7d, 0x6, 0x8b, 0xf0, 0xe0, 0x9b, 0x16, 0x6d, 0x11, 0x6a, 0xe7, 0x9c, 0x1f, 0x64, 0xe9, 0x92, 0xee, 0x95, 0x18, 0x63, 0x3, 0x78, 0xf5, 0x8e, 0xf2, 0x89, 0x4, 0x7f, 0xfc, 0x87, 0xa, 0x71, 0xd, 0x76, 0xfb, 0x80, 0x3b, 0x40, 0xcd, 0xb6, 0xca, 0xb1, 0x3c, 0x47, 0xc4, 0xbf, 0x32, 0x49, 0x35, 0x4e, 0xc3, 0xb8, 0xd8, 0xa3, 0x2e, 0x55, 0x29, 0x52, 0xdf, 0xa4, 0x27, 0x5c, 0xd1, 0xaa, 0xd6, 0xad, 0x20, 0x5b},
+ {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae, 0x93, 0xef, 0x6b, 0x17, 0x7e, 0x2, 0x86, 0xfa, 0x54, 0x28, 0xac, 0xd0, 0xb9, 0xc5, 0x41, 0x3d, 0x3b, 0x47, 0xc3, 0xbf, 0xd6, 0xaa, 0x2e, 0x52, 0xfc, 0x80, 0x4, 0x78, 0x11, 0x6d, 0xe9, 0x95, 0xa8, 0xd4, 0x50, 0x2c, 0x45, 0x39, 0xbd, 0xc1, 0x6f, 0x13, 0x97, 0xeb, 0x82, 0xfe, 0x7a, 0x6, 0x76, 0xa, 0x8e, 0xf2, 0x9b, 0xe7, 0x63, 0x1f, 0xb1, 0xcd, 0x49, 0x35, 0x5c, 0x20, 0xa4, 0xd8, 0xe5, 0x99, 0x1d, 0x61, 0x8, 0x74, 0xf0, 0x8c, 0x22, 0x5e, 0xda, 0xa6, 0xcf, 0xb3, 0x37, 0x4b, 0x4d, 0x31, 0xb5, 0xc9, 0xa0, 0xdc, 0x58, 0x24, 0x8a, 0xf6, 0x72, 0xe, 0x67, 0x1b, 0x9f, 0xe3, 0xde, 0xa2, 0x26, 0x5a, 0x33, 0x4f, 0xcb, 0xb7, 0x19, 0x65, 0xe1, 0x9d, 0xf4, 0x88, 0xc, 0x70, 0xec, 0x90, 0x14, 0x68, 0x1, 0x7d, 0xf9, 0x85, 0x2b, 0x57, 0xd3, 0xaf, 0xc6, 0xba, 0x3e, 0x42, 0x7f, 0x3, 0x87, 0xfb, 0x92, 0xee, 0x6a, 0x16, 0xb8, 0xc4, 0x40, 0x3c, 0x55, 0x29, 0xad, 0xd1, 0xd7, 0xab, 0x2f, 0x53, 0x3a, 0x46, 0xc2, 0xbe, 0x10, 0x6c, 0xe8, 0x94, 0xfd, 0x81, 0x5, 0x79, 0x44, 0x38, 0xbc, 0xc0, 0xa9, 0xd5, 0x51, 0x2d, 0x83, 0xff, 0x7b, 0x7, 0x6e, 0x12, 0x96, 0xea, 0x9a, 0xe6, 0x62, 0x1e, 0x77, 0xb, 0x8f, 0xf3, 0x5d, 0x21, 0xa5, 0xd9, 0xb0, 0xcc, 0x48, 0x34, 0x9, 0x75, 0xf1, 0x8d, 0xe4, 0x98, 0x1c, 0x60, 0xce, 0xb2, 0x36, 0x4a, 0x23, 0x5f, 0xdb, 0xa7, 0xa1, 0xdd, 0x59, 0x25, 0x4c, 0x30, 0xb4, 0xc8, 0x66, 0x1a, 0x9e, 0xe2, 0x8b, 0xf7, 0x73, 0xf, 0x32, 0x4e, 0xca, 0xb6, 0xdf, 0xa3, 0x27, 0x5b, 0xf5, 0x89, 0xd, 0x71, 0x18, 0x64, 0xe0, 0x9c},
+ {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1, 0x83, 0xfe, 0x79, 0x4, 0x6a, 0x17, 0x90, 0xed, 0x4c, 0x31, 0xb6, 0xcb, 0xa5, 0xd8, 0x5f, 0x22, 0x1b, 0x66, 0xe1, 0x9c, 0xf2, 0x8f, 0x8, 0x75, 0xd4, 0xa9, 0x2e, 0x53, 0x3d, 0x40, 0xc7, 0xba, 0x98, 0xe5, 0x62, 0x1f, 0x71, 0xc, 0x8b, 0xf6, 0x57, 0x2a, 0xad, 0xd0, 0xbe, 0xc3, 0x44, 0x39, 0x36, 0x4b, 0xcc, 0xb1, 0xdf, 0xa2, 0x25, 0x58, 0xf9, 0x84, 0x3, 0x7e, 0x10, 0x6d, 0xea, 0x97, 0xb5, 0xc8, 0x4f, 0x32, 0x5c, 0x21, 0xa6, 0xdb, 0x7a, 0x7, 0x80, 0xfd, 0x93, 0xee, 0x69, 0x14, 0x2d, 0x50, 0xd7, 0xaa, 0xc4, 0xb9, 0x3e, 0x43, 0xe2, 0x9f, 0x18, 0x65, 0xb, 0x76, 0xf1, 0x8c, 0xae, 0xd3, 0x54, 0x29, 0x47, 0x3a, 0xbd, 0xc0, 0x61, 0x1c, 0x9b, 0xe6, 0x88, 0xf5, 0x72, 0xf, 0x6c, 0x11, 0x96, 0xeb, 0x85, 0xf8, 0x7f, 0x2, 0xa3, 0xde, 0x59, 0x24, 0x4a, 0x37, 0xb0, 0xcd, 0xef, 0x92, 0x15, 0x68, 0x6, 0x7b, 0xfc, 0x81, 0x20, 0x5d, 0xda, 0xa7, 0xc9, 0xb4, 0x33, 0x4e, 0x77, 0xa, 0x8d, 0xf0, 0x9e, 0xe3, 0x64, 0x19, 0xb8, 0xc5, 0x42, 0x3f, 0x51, 0x2c, 0xab, 0xd6, 0xf4, 0x89, 0xe, 0x73, 0x1d, 0x60, 0xe7, 0x9a, 0x3b, 0x46, 0xc1, 0xbc, 0xd2, 0xaf, 0x28, 0x55, 0x5a, 0x27, 0xa0, 0xdd, 0xb3, 0xce, 0x49, 0x34, 0x95, 0xe8, 0x6f, 0x12, 0x7c, 0x1, 0x86, 0xfb, 0xd9, 0xa4, 0x23, 0x5e, 0x30, 0x4d, 0xca, 0xb7, 0x16, 0x6b, 0xec, 0x91, 0xff, 0x82, 0x5, 0x78, 0x41, 0x3c, 0xbb, 0xc6, 0xa8, 0xd5, 0x52, 0x2f, 0x8e, 0xf3, 0x74, 0x9, 0x67, 0x1a, 0x9d, 0xe0, 0xc2, 0xbf, 0x38, 0x45, 0x2b, 0x56, 0xd1, 0xac, 0xd, 0x70, 0xf7, 0x8a, 0xe4, 0x99, 0x1e, 0x63},
+ {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0, 0xb3, 0xcd, 0x4f, 0x31, 0x56, 0x28, 0xaa, 0xd4, 0x64, 0x1a, 0x98, 0xe6, 0x81, 0xff, 0x7d, 0x3, 0x7b, 0x5, 0x87, 0xf9, 0x9e, 0xe0, 0x62, 0x1c, 0xac, 0xd2, 0x50, 0x2e, 0x49, 0x37, 0xb5, 0xcb, 0xc8, 0xb6, 0x34, 0x4a, 0x2d, 0x53, 0xd1, 0xaf, 0x1f, 0x61, 0xe3, 0x9d, 0xfa, 0x84, 0x6, 0x78, 0xf6, 0x88, 0xa, 0x74, 0x13, 0x6d, 0xef, 0x91, 0x21, 0x5f, 0xdd, 0xa3, 0xc4, 0xba, 0x38, 0x46, 0x45, 0x3b, 0xb9, 0xc7, 0xa0, 0xde, 0x5c, 0x22, 0x92, 0xec, 0x6e, 0x10, 0x77, 0x9, 0x8b, 0xf5, 0x8d, 0xf3, 0x71, 0xf, 0x68, 0x16, 0x94, 0xea, 0x5a, 0x24, 0xa6, 0xd8, 0xbf, 0xc1, 0x43, 0x3d, 0x3e, 0x40, 0xc2, 0xbc, 0xdb, 0xa5, 0x27, 0x59, 0xe9, 0x97, 0x15, 0x6b, 0xc, 0x72, 0xf0, 0x8e, 0xf1, 0x8f, 0xd, 0x73, 0x14, 0x6a, 0xe8, 0x96, 0x26, 0x58, 0xda, 0xa4, 0xc3, 0xbd, 0x3f, 0x41, 0x42, 0x3c, 0xbe, 0xc0, 0xa7, 0xd9, 0x5b, 0x25, 0x95, 0xeb, 0x69, 0x17, 0x70, 0xe, 0x8c, 0xf2, 0x8a, 0xf4, 0x76, 0x8, 0x6f, 0x11, 0x93, 0xed, 0x5d, 0x23, 0xa1, 0xdf, 0xb8, 0xc6, 0x44, 0x3a, 0x39, 0x47, 0xc5, 0xbb, 0xdc, 0xa2, 0x20, 0x5e, 0xee, 0x90, 0x12, 0x6c, 0xb, 0x75, 0xf7, 0x89, 0x7, 0x79, 0xfb, 0x85, 0xe2, 0x9c, 0x1e, 0x60, 0xd0, 0xae, 0x2c, 0x52, 0x35, 0x4b, 0xc9, 0xb7, 0xb4, 0xca, 0x48, 0x36, 0x51, 0x2f, 0xad, 0xd3, 0x63, 0x1d, 0x9f, 0xe1, 0x86, 0xf8, 0x7a, 0x4, 0x7c, 0x2, 0x80, 0xfe, 0x99, 0xe7, 0x65, 0x1b, 0xab, 0xd5, 0x57, 0x29, 0x4e, 0x30, 0xb2, 0xcc, 0xcf, 0xb1, 0x33, 0x4d, 0x2a, 0x54, 0xd6, 0xa8, 0x18, 0x66, 0xe4, 0x9a, 0xfd, 0x83, 0x1, 0x7f},
+ {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf, 0xa3, 0xdc, 0x5d, 0x22, 0x42, 0x3d, 0xbc, 0xc3, 0x7c, 0x3, 0x82, 0xfd, 0x9d, 0xe2, 0x63, 0x1c, 0x5b, 0x24, 0xa5, 0xda, 0xba, 0xc5, 0x44, 0x3b, 0x84, 0xfb, 0x7a, 0x5, 0x65, 0x1a, 0x9b, 0xe4, 0xf8, 0x87, 0x6, 0x79, 0x19, 0x66, 0xe7, 0x98, 0x27, 0x58, 0xd9, 0xa6, 0xc6, 0xb9, 0x38, 0x47, 0xb6, 0xc9, 0x48, 0x37, 0x57, 0x28, 0xa9, 0xd6, 0x69, 0x16, 0x97, 0xe8, 0x88, 0xf7, 0x76, 0x9, 0x15, 0x6a, 0xeb, 0x94, 0xf4, 0x8b, 0xa, 0x75, 0xca, 0xb5, 0x34, 0x4b, 0x2b, 0x54, 0xd5, 0xaa, 0xed, 0x92, 0x13, 0x6c, 0xc, 0x73, 0xf2, 0x8d, 0x32, 0x4d, 0xcc, 0xb3, 0xd3, 0xac, 0x2d, 0x52, 0x4e, 0x31, 0xb0, 0xcf, 0xaf, 0xd0, 0x51, 0x2e, 0x91, 0xee, 0x6f, 0x10, 0x70, 0xf, 0x8e, 0xf1, 0x71, 0xe, 0x8f, 0xf0, 0x90, 0xef, 0x6e, 0x11, 0xae, 0xd1, 0x50, 0x2f, 0x4f, 0x30, 0xb1, 0xce, 0xd2, 0xad, 0x2c, 0x53, 0x33, 0x4c, 0xcd, 0xb2, 0xd, 0x72, 0xf3, 0x8c, 0xec, 0x93, 0x12, 0x6d, 0x2a, 0x55, 0xd4, 0xab, 0xcb, 0xb4, 0x35, 0x4a, 0xf5, 0x8a, 0xb, 0x74, 0x14, 0x6b, 0xea, 0x95, 0x89, 0xf6, 0x77, 0x8, 0x68, 0x17, 0x96, 0xe9, 0x56, 0x29, 0xa8, 0xd7, 0xb7, 0xc8, 0x49, 0x36, 0xc7, 0xb8, 0x39, 0x46, 0x26, 0x59, 0xd8, 0xa7, 0x18, 0x67, 0xe6, 0x99, 0xf9, 0x86, 0x7, 0x78, 0x64, 0x1b, 0x9a, 0xe5, 0x85, 0xfa, 0x7b, 0x4, 0xbb, 0xc4, 0x45, 0x3a, 0x5a, 0x25, 0xa4, 0xdb, 0x9c, 0xe3, 0x62, 0x1d, 0x7d, 0x2, 0x83, 0xfc, 0x43, 0x3c, 0xbd, 0xc2, 0xa2, 0xdd, 0x5c, 0x23, 0x3f, 0x40, 0xc1, 0xbe, 0xde, 0xa1, 0x20, 0x5f, 0xe0, 0x9f, 0x1e, 0x61, 0x1, 0x7e, 0xff, 0x80},
+ {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3, 0xe8, 0x68, 0xf5, 0x75, 0xd2, 0x52, 0xcf, 0x4f, 0x9c, 0x1c, 0x81, 0x1, 0xa6, 0x26, 0xbb, 0x3b, 0xcd, 0x4d, 0xd0, 0x50, 0xf7, 0x77, 0xea, 0x6a, 0xb9, 0x39, 0xa4, 0x24, 0x83, 0x3, 0x9e, 0x1e, 0x25, 0xa5, 0x38, 0xb8, 0x1f, 0x9f, 0x2, 0x82, 0x51, 0xd1, 0x4c, 0xcc, 0x6b, 0xeb, 0x76, 0xf6, 0x87, 0x7, 0x9a, 0x1a, 0xbd, 0x3d, 0xa0, 0x20, 0xf3, 0x73, 0xee, 0x6e, 0xc9, 0x49, 0xd4, 0x54, 0x6f, 0xef, 0x72, 0xf2, 0x55, 0xd5, 0x48, 0xc8, 0x1b, 0x9b, 0x6, 0x86, 0x21, 0xa1, 0x3c, 0xbc, 0x4a, 0xca, 0x57, 0xd7, 0x70, 0xf0, 0x6d, 0xed, 0x3e, 0xbe, 0x23, 0xa3, 0x4, 0x84, 0x19, 0x99, 0xa2, 0x22, 0xbf, 0x3f, 0x98, 0x18, 0x85, 0x5, 0xd6, 0x56, 0xcb, 0x4b, 0xec, 0x6c, 0xf1, 0x71, 0x13, 0x93, 0xe, 0x8e, 0x29, 0xa9, 0x34, 0xb4, 0x67, 0xe7, 0x7a, 0xfa, 0x5d, 0xdd, 0x40, 0xc0, 0xfb, 0x7b, 0xe6, 0x66, 0xc1, 0x41, 0xdc, 0x5c, 0x8f, 0xf, 0x92, 0x12, 0xb5, 0x35, 0xa8, 0x28, 0xde, 0x5e, 0xc3, 0x43, 0xe4, 0x64, 0xf9, 0x79, 0xaa, 0x2a, 0xb7, 0x37, 0x90, 0x10, 0x8d, 0xd, 0x36, 0xb6, 0x2b, 0xab, 0xc, 0x8c, 0x11, 0x91, 0x42, 0xc2, 0x5f, 0xdf, 0x78, 0xf8, 0x65, 0xe5, 0x94, 0x14, 0x89, 0x9, 0xae, 0x2e, 0xb3, 0x33, 0xe0, 0x60, 0xfd, 0x7d, 0xda, 0x5a, 0xc7, 0x47, 0x7c, 0xfc, 0x61, 0xe1, 0x46, 0xc6, 0x5b, 0xdb, 0x8, 0x88, 0x15, 0x95, 0x32, 0xb2, 0x2f, 0xaf, 0x59, 0xd9, 0x44, 0xc4, 0x63, 0xe3, 0x7e, 0xfe, 0x2d, 0xad, 0x30, 0xb0, 0x17, 0x97, 0xa, 0x8a, 0xb1, 0x31, 0xac, 0x2c, 0x8b, 0xb, 0x96, 0x16, 0xc5, 0x45, 0xd8, 0x58, 0xff, 0x7f, 0xe2, 0x62},
+ {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc, 0xf8, 0x79, 0xe7, 0x66, 0xc6, 0x47, 0xd9, 0x58, 0x84, 0x5, 0x9b, 0x1a, 0xba, 0x3b, 0xa5, 0x24, 0xed, 0x6c, 0xf2, 0x73, 0xd3, 0x52, 0xcc, 0x4d, 0x91, 0x10, 0x8e, 0xf, 0xaf, 0x2e, 0xb0, 0x31, 0x15, 0x94, 0xa, 0x8b, 0x2b, 0xaa, 0x34, 0xb5, 0x69, 0xe8, 0x76, 0xf7, 0x57, 0xd6, 0x48, 0xc9, 0xc7, 0x46, 0xd8, 0x59, 0xf9, 0x78, 0xe6, 0x67, 0xbb, 0x3a, 0xa4, 0x25, 0x85, 0x4, 0x9a, 0x1b, 0x3f, 0xbe, 0x20, 0xa1, 0x1, 0x80, 0x1e, 0x9f, 0x43, 0xc2, 0x5c, 0xdd, 0x7d, 0xfc, 0x62, 0xe3, 0x2a, 0xab, 0x35, 0xb4, 0x14, 0x95, 0xb, 0x8a, 0x56, 0xd7, 0x49, 0xc8, 0x68, 0xe9, 0x77, 0xf6, 0xd2, 0x53, 0xcd, 0x4c, 0xec, 0x6d, 0xf3, 0x72, 0xae, 0x2f, 0xb1, 0x30, 0x90, 0x11, 0x8f, 0xe, 0x93, 0x12, 0x8c, 0xd, 0xad, 0x2c, 0xb2, 0x33, 0xef, 0x6e, 0xf0, 0x71, 0xd1, 0x50, 0xce, 0x4f, 0x6b, 0xea, 0x74, 0xf5, 0x55, 0xd4, 0x4a, 0xcb, 0x17, 0x96, 0x8, 0x89, 0x29, 0xa8, 0x36, 0xb7, 0x7e, 0xff, 0x61, 0xe0, 0x40, 0xc1, 0x5f, 0xde, 0x2, 0x83, 0x1d, 0x9c, 0x3c, 0xbd, 0x23, 0xa2, 0x86, 0x7, 0x99, 0x18, 0xb8, 0x39, 0xa7, 0x26, 0xfa, 0x7b, 0xe5, 0x64, 0xc4, 0x45, 0xdb, 0x5a, 0x54, 0xd5, 0x4b, 0xca, 0x6a, 0xeb, 0x75, 0xf4, 0x28, 0xa9, 0x37, 0xb6, 0x16, 0x97, 0x9, 0x88, 0xac, 0x2d, 0xb3, 0x32, 0x92, 0x13, 0x8d, 0xc, 0xd0, 0x51, 0xcf, 0x4e, 0xee, 0x6f, 0xf1, 0x70, 0xb9, 0x38, 0xa6, 0x27, 0x87, 0x6, 0x98, 0x19, 0xc5, 0x44, 0xda, 0x5b, 0xfb, 0x7a, 0xe4, 0x65, 0x41, 0xc0, 0x5e, 0xdf, 0x7f, 0xfe, 0x60, 0xe1, 0x3d, 0xbc, 0x22, 0xa3, 0x3, 0x82, 0x1c, 0x9d},
+ {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd, 0xc8, 0x4a, 0xd1, 0x53, 0xfa, 0x78, 0xe3, 0x61, 0xac, 0x2e, 0xb5, 0x37, 0x9e, 0x1c, 0x87, 0x5, 0x8d, 0xf, 0x94, 0x16, 0xbf, 0x3d, 0xa6, 0x24, 0xe9, 0x6b, 0xf0, 0x72, 0xdb, 0x59, 0xc2, 0x40, 0x45, 0xc7, 0x5c, 0xde, 0x77, 0xf5, 0x6e, 0xec, 0x21, 0xa3, 0x38, 0xba, 0x13, 0x91, 0xa, 0x88, 0x7, 0x85, 0x1e, 0x9c, 0x35, 0xb7, 0x2c, 0xae, 0x63, 0xe1, 0x7a, 0xf8, 0x51, 0xd3, 0x48, 0xca, 0xcf, 0x4d, 0xd6, 0x54, 0xfd, 0x7f, 0xe4, 0x66, 0xab, 0x29, 0xb2, 0x30, 0x99, 0x1b, 0x80, 0x2, 0x8a, 0x8, 0x93, 0x11, 0xb8, 0x3a, 0xa1, 0x23, 0xee, 0x6c, 0xf7, 0x75, 0xdc, 0x5e, 0xc5, 0x47, 0x42, 0xc0, 0x5b, 0xd9, 0x70, 0xf2, 0x69, 0xeb, 0x26, 0xa4, 0x3f, 0xbd, 0x14, 0x96, 0xd, 0x8f, 0xe, 0x8c, 0x17, 0x95, 0x3c, 0xbe, 0x25, 0xa7, 0x6a, 0xe8, 0x73, 0xf1, 0x58, 0xda, 0x41, 0xc3, 0xc6, 0x44, 0xdf, 0x5d, 0xf4, 0x76, 0xed, 0x6f, 0xa2, 0x20, 0xbb, 0x39, 0x90, 0x12, 0x89, 0xb, 0x83, 0x1, 0x9a, 0x18, 0xb1, 0x33, 0xa8, 0x2a, 0xe7, 0x65, 0xfe, 0x7c, 0xd5, 0x57, 0xcc, 0x4e, 0x4b, 0xc9, 0x52, 0xd0, 0x79, 0xfb, 0x60, 0xe2, 0x2f, 0xad, 0x36, 0xb4, 0x1d, 0x9f, 0x4, 0x86, 0x9, 0x8b, 0x10, 0x92, 0x3b, 0xb9, 0x22, 0xa0, 0x6d, 0xef, 0x74, 0xf6, 0x5f, 0xdd, 0x46, 0xc4, 0xc1, 0x43, 0xd8, 0x5a, 0xf3, 0x71, 0xea, 0x68, 0xa5, 0x27, 0xbc, 0x3e, 0x97, 0x15, 0x8e, 0xc, 0x84, 0x6, 0x9d, 0x1f, 0xb6, 0x34, 0xaf, 0x2d, 0xe0, 0x62, 0xf9, 0x7b, 0xd2, 0x50, 0xcb, 0x49, 0x4c, 0xce, 0x55, 0xd7, 0x7e, 0xfc, 0x67, 0xe5, 0x28, 0xaa, 0x31, 0xb3, 0x1a, 0x98, 0x3, 0x81},
+ {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2, 0xd8, 0x5b, 0xc3, 0x40, 0xee, 0x6d, 0xf5, 0x76, 0xb4, 0x37, 0xaf, 0x2c, 0x82, 0x1, 0x99, 0x1a, 0xad, 0x2e, 0xb6, 0x35, 0x9b, 0x18, 0x80, 0x3, 0xc1, 0x42, 0xda, 0x59, 0xf7, 0x74, 0xec, 0x6f, 0x75, 0xf6, 0x6e, 0xed, 0x43, 0xc0, 0x58, 0xdb, 0x19, 0x9a, 0x2, 0x81, 0x2f, 0xac, 0x34, 0xb7, 0x47, 0xc4, 0x5c, 0xdf, 0x71, 0xf2, 0x6a, 0xe9, 0x2b, 0xa8, 0x30, 0xb3, 0x1d, 0x9e, 0x6, 0x85, 0x9f, 0x1c, 0x84, 0x7, 0xa9, 0x2a, 0xb2, 0x31, 0xf3, 0x70, 0xe8, 0x6b, 0xc5, 0x46, 0xde, 0x5d, 0xea, 0x69, 0xf1, 0x72, 0xdc, 0x5f, 0xc7, 0x44, 0x86, 0x5, 0x9d, 0x1e, 0xb0, 0x33, 0xab, 0x28, 0x32, 0xb1, 0x29, 0xaa, 0x4, 0x87, 0x1f, 0x9c, 0x5e, 0xdd, 0x45, 0xc6, 0x68, 0xeb, 0x73, 0xf0, 0x8e, 0xd, 0x95, 0x16, 0xb8, 0x3b, 0xa3, 0x20, 0xe2, 0x61, 0xf9, 0x7a, 0xd4, 0x57, 0xcf, 0x4c, 0x56, 0xd5, 0x4d, 0xce, 0x60, 0xe3, 0x7b, 0xf8, 0x3a, 0xb9, 0x21, 0xa2, 0xc, 0x8f, 0x17, 0x94, 0x23, 0xa0, 0x38, 0xbb, 0x15, 0x96, 0xe, 0x8d, 0x4f, 0xcc, 0x54, 0xd7, 0x79, 0xfa, 0x62, 0xe1, 0xfb, 0x78, 0xe0, 0x63, 0xcd, 0x4e, 0xd6, 0x55, 0x97, 0x14, 0x8c, 0xf, 0xa1, 0x22, 0xba, 0x39, 0xc9, 0x4a, 0xd2, 0x51, 0xff, 0x7c, 0xe4, 0x67, 0xa5, 0x26, 0xbe, 0x3d, 0x93, 0x10, 0x88, 0xb, 0x11, 0x92, 0xa, 0x89, 0x27, 0xa4, 0x3c, 0xbf, 0x7d, 0xfe, 0x66, 0xe5, 0x4b, 0xc8, 0x50, 0xd3, 0x64, 0xe7, 0x7f, 0xfc, 0x52, 0xd1, 0x49, 0xca, 0x8, 0x8b, 0x13, 0x90, 0x3e, 0xbd, 0x25, 0xa6, 0xbc, 0x3f, 0xa7, 0x24, 0x8a, 0x9, 0x91, 0x12, 0xd0, 0x53, 0xcb, 0x48, 0xe6, 0x65, 0xfd, 0x7e},
+ {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef, 0xa8, 0x2c, 0xbd, 0x39, 0x82, 0x6, 0x97, 0x13, 0xfc, 0x78, 0xe9, 0x6d, 0xd6, 0x52, 0xc3, 0x47, 0x4d, 0xc9, 0x58, 0xdc, 0x67, 0xe3, 0x72, 0xf6, 0x19, 0x9d, 0xc, 0x88, 0x33, 0xb7, 0x26, 0xa2, 0xe5, 0x61, 0xf0, 0x74, 0xcf, 0x4b, 0xda, 0x5e, 0xb1, 0x35, 0xa4, 0x20, 0x9b, 0x1f, 0x8e, 0xa, 0x9a, 0x1e, 0x8f, 0xb, 0xb0, 0x34, 0xa5, 0x21, 0xce, 0x4a, 0xdb, 0x5f, 0xe4, 0x60, 0xf1, 0x75, 0x32, 0xb6, 0x27, 0xa3, 0x18, 0x9c, 0xd, 0x89, 0x66, 0xe2, 0x73, 0xf7, 0x4c, 0xc8, 0x59, 0xdd, 0xd7, 0x53, 0xc2, 0x46, 0xfd, 0x79, 0xe8, 0x6c, 0x83, 0x7, 0x96, 0x12, 0xa9, 0x2d, 0xbc, 0x38, 0x7f, 0xfb, 0x6a, 0xee, 0x55, 0xd1, 0x40, 0xc4, 0x2b, 0xaf, 0x3e, 0xba, 0x1, 0x85, 0x14, 0x90, 0x29, 0xad, 0x3c, 0xb8, 0x3, 0x87, 0x16, 0x92, 0x7d, 0xf9, 0x68, 0xec, 0x57, 0xd3, 0x42, 0xc6, 0x81, 0x5, 0x94, 0x10, 0xab, 0x2f, 0xbe, 0x3a, 0xd5, 0x51, 0xc0, 0x44, 0xff, 0x7b, 0xea, 0x6e, 0x64, 0xe0, 0x71, 0xf5, 0x4e, 0xca, 0x5b, 0xdf, 0x30, 0xb4, 0x25, 0xa1, 0x1a, 0x9e, 0xf, 0x8b, 0xcc, 0x48, 0xd9, 0x5d, 0xe6, 0x62, 0xf3, 0x77, 0x98, 0x1c, 0x8d, 0x9, 0xb2, 0x36, 0xa7, 0x23, 0xb3, 0x37, 0xa6, 0x22, 0x99, 0x1d, 0x8c, 0x8, 0xe7, 0x63, 0xf2, 0x76, 0xcd, 0x49, 0xd8, 0x5c, 0x1b, 0x9f, 0xe, 0x8a, 0x31, 0xb5, 0x24, 0xa0, 0x4f, 0xcb, 0x5a, 0xde, 0x65, 0xe1, 0x70, 0xf4, 0xfe, 0x7a, 0xeb, 0x6f, 0xd4, 0x50, 0xc1, 0x45, 0xaa, 0x2e, 0xbf, 0x3b, 0x80, 0x4, 0x95, 0x11, 0x56, 0xd2, 0x43, 0xc7, 0x7c, 0xf8, 0x69, 0xed, 0x2, 0x86, 0x17, 0x93, 0x28, 0xac, 0x3d, 0xb9},
+ {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0, 0xb8, 0x3d, 0xaf, 0x2a, 0x96, 0x13, 0x81, 0x4, 0xe4, 0x61, 0xf3, 0x76, 0xca, 0x4f, 0xdd, 0x58, 0x6d, 0xe8, 0x7a, 0xff, 0x43, 0xc6, 0x54, 0xd1, 0x31, 0xb4, 0x26, 0xa3, 0x1f, 0x9a, 0x8, 0x8d, 0xd5, 0x50, 0xc2, 0x47, 0xfb, 0x7e, 0xec, 0x69, 0x89, 0xc, 0x9e, 0x1b, 0xa7, 0x22, 0xb0, 0x35, 0xda, 0x5f, 0xcd, 0x48, 0xf4, 0x71, 0xe3, 0x66, 0x86, 0x3, 0x91, 0x14, 0xa8, 0x2d, 0xbf, 0x3a, 0x62, 0xe7, 0x75, 0xf0, 0x4c, 0xc9, 0x5b, 0xde, 0x3e, 0xbb, 0x29, 0xac, 0x10, 0x95, 0x7, 0x82, 0xb7, 0x32, 0xa0, 0x25, 0x99, 0x1c, 0x8e, 0xb, 0xeb, 0x6e, 0xfc, 0x79, 0xc5, 0x40, 0xd2, 0x57, 0xf, 0x8a, 0x18, 0x9d, 0x21, 0xa4, 0x36, 0xb3, 0x53, 0xd6, 0x44, 0xc1, 0x7d, 0xf8, 0x6a, 0xef, 0xa9, 0x2c, 0xbe, 0x3b, 0x87, 0x2, 0x90, 0x15, 0xf5, 0x70, 0xe2, 0x67, 0xdb, 0x5e, 0xcc, 0x49, 0x11, 0x94, 0x6, 0x83, 0x3f, 0xba, 0x28, 0xad, 0x4d, 0xc8, 0x5a, 0xdf, 0x63, 0xe6, 0x74, 0xf1, 0xc4, 0x41, 0xd3, 0x56, 0xea, 0x6f, 0xfd, 0x78, 0x98, 0x1d, 0x8f, 0xa, 0xb6, 0x33, 0xa1, 0x24, 0x7c, 0xf9, 0x6b, 0xee, 0x52, 0xd7, 0x45, 0xc0, 0x20, 0xa5, 0x37, 0xb2, 0xe, 0x8b, 0x19, 0x9c, 0x73, 0xf6, 0x64, 0xe1, 0x5d, 0xd8, 0x4a, 0xcf, 0x2f, 0xaa, 0x38, 0xbd, 0x1, 0x84, 0x16, 0x93, 0xcb, 0x4e, 0xdc, 0x59, 0xe5, 0x60, 0xf2, 0x77, 0x97, 0x12, 0x80, 0x5, 0xb9, 0x3c, 0xae, 0x2b, 0x1e, 0x9b, 0x9, 0x8c, 0x30, 0xb5, 0x27, 0xa2, 0x42, 0xc7, 0x55, 0xd0, 0x6c, 0xe9, 0x7b, 0xfe, 0xa6, 0x23, 0xb1, 0x34, 0x88, 0xd, 0x9f, 0x1a, 0xfa, 0x7f, 0xed, 0x68, 0xd4, 0x51, 0xc3, 0x46},
+ {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1, 0x88, 0xe, 0x99, 0x1f, 0xaa, 0x2c, 0xbb, 0x3d, 0xcc, 0x4a, 0xdd, 0x5b, 0xee, 0x68, 0xff, 0x79, 0xd, 0x8b, 0x1c, 0x9a, 0x2f, 0xa9, 0x3e, 0xb8, 0x49, 0xcf, 0x58, 0xde, 0x6b, 0xed, 0x7a, 0xfc, 0x85, 0x3, 0x94, 0x12, 0xa7, 0x21, 0xb6, 0x30, 0xc1, 0x47, 0xd0, 0x56, 0xe3, 0x65, 0xf2, 0x74, 0x1a, 0x9c, 0xb, 0x8d, 0x38, 0xbe, 0x29, 0xaf, 0x5e, 0xd8, 0x4f, 0xc9, 0x7c, 0xfa, 0x6d, 0xeb, 0x92, 0x14, 0x83, 0x5, 0xb0, 0x36, 0xa1, 0x27, 0xd6, 0x50, 0xc7, 0x41, 0xf4, 0x72, 0xe5, 0x63, 0x17, 0x91, 0x6, 0x80, 0x35, 0xb3, 0x24, 0xa2, 0x53, 0xd5, 0x42, 0xc4, 0x71, 0xf7, 0x60, 0xe6, 0x9f, 0x19, 0x8e, 0x8, 0xbd, 0x3b, 0xac, 0x2a, 0xdb, 0x5d, 0xca, 0x4c, 0xf9, 0x7f, 0xe8, 0x6e, 0x34, 0xb2, 0x25, 0xa3, 0x16, 0x90, 0x7, 0x81, 0x70, 0xf6, 0x61, 0xe7, 0x52, 0xd4, 0x43, 0xc5, 0xbc, 0x3a, 0xad, 0x2b, 0x9e, 0x18, 0x8f, 0x9, 0xf8, 0x7e, 0xe9, 0x6f, 0xda, 0x5c, 0xcb, 0x4d, 0x39, 0xbf, 0x28, 0xae, 0x1b, 0x9d, 0xa, 0x8c, 0x7d, 0xfb, 0x6c, 0xea, 0x5f, 0xd9, 0x4e, 0xc8, 0xb1, 0x37, 0xa0, 0x26, 0x93, 0x15, 0x82, 0x4, 0xf5, 0x73, 0xe4, 0x62, 0xd7, 0x51, 0xc6, 0x40, 0x2e, 0xa8, 0x3f, 0xb9, 0xc, 0x8a, 0x1d, 0x9b, 0x6a, 0xec, 0x7b, 0xfd, 0x48, 0xce, 0x59, 0xdf, 0xa6, 0x20, 0xb7, 0x31, 0x84, 0x2, 0x95, 0x13, 0xe2, 0x64, 0xf3, 0x75, 0xc0, 0x46, 0xd1, 0x57, 0x23, 0xa5, 0x32, 0xb4, 0x1, 0x87, 0x10, 0x96, 0x67, 0xe1, 0x76, 0xf0, 0x45, 0xc3, 0x54, 0xd2, 0xab, 0x2d, 0xba, 0x3c, 0x89, 0xf, 0x98, 0x1e, 0xef, 0x69, 0xfe, 0x78, 0xcd, 0x4b, 0xdc, 0x5a},
+ {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe, 0x98, 0x1f, 0x8b, 0xc, 0xbe, 0x39, 0xad, 0x2a, 0xd4, 0x53, 0xc7, 0x40, 0xf2, 0x75, 0xe1, 0x66, 0x2d, 0xaa, 0x3e, 0xb9, 0xb, 0x8c, 0x18, 0x9f, 0x61, 0xe6, 0x72, 0xf5, 0x47, 0xc0, 0x54, 0xd3, 0xb5, 0x32, 0xa6, 0x21, 0x93, 0x14, 0x80, 0x7, 0xf9, 0x7e, 0xea, 0x6d, 0xdf, 0x58, 0xcc, 0x4b, 0x5a, 0xdd, 0x49, 0xce, 0x7c, 0xfb, 0x6f, 0xe8, 0x16, 0x91, 0x5, 0x82, 0x30, 0xb7, 0x23, 0xa4, 0xc2, 0x45, 0xd1, 0x56, 0xe4, 0x63, 0xf7, 0x70, 0x8e, 0x9, 0x9d, 0x1a, 0xa8, 0x2f, 0xbb, 0x3c, 0x77, 0xf0, 0x64, 0xe3, 0x51, 0xd6, 0x42, 0xc5, 0x3b, 0xbc, 0x28, 0xaf, 0x1d, 0x9a, 0xe, 0x89, 0xef, 0x68, 0xfc, 0x7b, 0xc9, 0x4e, 0xda, 0x5d, 0xa3, 0x24, 0xb0, 0x37, 0x85, 0x2, 0x96, 0x11, 0xb4, 0x33, 0xa7, 0x20, 0x92, 0x15, 0x81, 0x6, 0xf8, 0x7f, 0xeb, 0x6c, 0xde, 0x59, 0xcd, 0x4a, 0x2c, 0xab, 0x3f, 0xb8, 0xa, 0x8d, 0x19, 0x9e, 0x60, 0xe7, 0x73, 0xf4, 0x46, 0xc1, 0x55, 0xd2, 0x99, 0x1e, 0x8a, 0xd, 0xbf, 0x38, 0xac, 0x2b, 0xd5, 0x52, 0xc6, 0x41, 0xf3, 0x74, 0xe0, 0x67, 0x1, 0x86, 0x12, 0x95, 0x27, 0xa0, 0x34, 0xb3, 0x4d, 0xca, 0x5e, 0xd9, 0x6b, 0xec, 0x78, 0xff, 0xee, 0x69, 0xfd, 0x7a, 0xc8, 0x4f, 0xdb, 0x5c, 0xa2, 0x25, 0xb1, 0x36, 0x84, 0x3, 0x97, 0x10, 0x76, 0xf1, 0x65, 0xe2, 0x50, 0xd7, 0x43, 0xc4, 0x3a, 0xbd, 0x29, 0xae, 0x1c, 0x9b, 0xf, 0x88, 0xc3, 0x44, 0xd0, 0x57, 0xe5, 0x62, 0xf6, 0x71, 0x8f, 0x8, 0x9c, 0x1b, 0xa9, 0x2e, 0xba, 0x3d, 0x5b, 0xdc, 0x48, 0xcf, 0x7d, 0xfa, 0x6e, 0xe9, 0x17, 0x90, 0x4, 0x83, 0x31, 0xb6, 0x22, 0xa5},
+ {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab, 0x68, 0xe0, 0x65, 0xed, 0x72, 0xfa, 0x7f, 0xf7, 0x5c, 0xd4, 0x51, 0xd9, 0x46, 0xce, 0x4b, 0xc3, 0xd0, 0x58, 0xdd, 0x55, 0xca, 0x42, 0xc7, 0x4f, 0xe4, 0x6c, 0xe9, 0x61, 0xfe, 0x76, 0xf3, 0x7b, 0xb8, 0x30, 0xb5, 0x3d, 0xa2, 0x2a, 0xaf, 0x27, 0x8c, 0x4, 0x81, 0x9, 0x96, 0x1e, 0x9b, 0x13, 0xbd, 0x35, 0xb0, 0x38, 0xa7, 0x2f, 0xaa, 0x22, 0x89, 0x1, 0x84, 0xc, 0x93, 0x1b, 0x9e, 0x16, 0xd5, 0x5d, 0xd8, 0x50, 0xcf, 0x47, 0xc2, 0x4a, 0xe1, 0x69, 0xec, 0x64, 0xfb, 0x73, 0xf6, 0x7e, 0x6d, 0xe5, 0x60, 0xe8, 0x77, 0xff, 0x7a, 0xf2, 0x59, 0xd1, 0x54, 0xdc, 0x43, 0xcb, 0x4e, 0xc6, 0x5, 0x8d, 0x8, 0x80, 0x1f, 0x97, 0x12, 0x9a, 0x31, 0xb9, 0x3c, 0xb4, 0x2b, 0xa3, 0x26, 0xae, 0x67, 0xef, 0x6a, 0xe2, 0x7d, 0xf5, 0x70, 0xf8, 0x53, 0xdb, 0x5e, 0xd6, 0x49, 0xc1, 0x44, 0xcc, 0xf, 0x87, 0x2, 0x8a, 0x15, 0x9d, 0x18, 0x90, 0x3b, 0xb3, 0x36, 0xbe, 0x21, 0xa9, 0x2c, 0xa4, 0xb7, 0x3f, 0xba, 0x32, 0xad, 0x25, 0xa0, 0x28, 0x83, 0xb, 0x8e, 0x6, 0x99, 0x11, 0x94, 0x1c, 0xdf, 0x57, 0xd2, 0x5a, 0xc5, 0x4d, 0xc8, 0x40, 0xeb, 0x63, 0xe6, 0x6e, 0xf1, 0x79, 0xfc, 0x74, 0xda, 0x52, 0xd7, 0x5f, 0xc0, 0x48, 0xcd, 0x45, 0xee, 0x66, 0xe3, 0x6b, 0xf4, 0x7c, 0xf9, 0x71, 0xb2, 0x3a, 0xbf, 0x37, 0xa8, 0x20, 0xa5, 0x2d, 0x86, 0xe, 0x8b, 0x3, 0x9c, 0x14, 0x91, 0x19, 0xa, 0x82, 0x7, 0x8f, 0x10, 0x98, 0x1d, 0x95, 0x3e, 0xb6, 0x33, 0xbb, 0x24, 0xac, 0x29, 0xa1, 0x62, 0xea, 0x6f, 0xe7, 0x78, 0xf0, 0x75, 0xfd, 0x56, 0xde, 0x5b, 0xd3, 0x4c, 0xc4, 0x41, 0xc9},
+ {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4, 0x78, 0xf1, 0x77, 0xfe, 0x66, 0xef, 0x69, 0xe0, 0x44, 0xcd, 0x4b, 0xc2, 0x5a, 0xd3, 0x55, 0xdc, 0xf0, 0x79, 0xff, 0x76, 0xee, 0x67, 0xe1, 0x68, 0xcc, 0x45, 0xc3, 0x4a, 0xd2, 0x5b, 0xdd, 0x54, 0x88, 0x1, 0x87, 0xe, 0x96, 0x1f, 0x99, 0x10, 0xb4, 0x3d, 0xbb, 0x32, 0xaa, 0x23, 0xa5, 0x2c, 0xfd, 0x74, 0xf2, 0x7b, 0xe3, 0x6a, 0xec, 0x65, 0xc1, 0x48, 0xce, 0x47, 0xdf, 0x56, 0xd0, 0x59, 0x85, 0xc, 0x8a, 0x3, 0x9b, 0x12, 0x94, 0x1d, 0xb9, 0x30, 0xb6, 0x3f, 0xa7, 0x2e, 0xa8, 0x21, 0xd, 0x84, 0x2, 0x8b, 0x13, 0x9a, 0x1c, 0x95, 0x31, 0xb8, 0x3e, 0xb7, 0x2f, 0xa6, 0x20, 0xa9, 0x75, 0xfc, 0x7a, 0xf3, 0x6b, 0xe2, 0x64, 0xed, 0x49, 0xc0, 0x46, 0xcf, 0x57, 0xde, 0x58, 0xd1, 0xe7, 0x6e, 0xe8, 0x61, 0xf9, 0x70, 0xf6, 0x7f, 0xdb, 0x52, 0xd4, 0x5d, 0xc5, 0x4c, 0xca, 0x43, 0x9f, 0x16, 0x90, 0x19, 0x81, 0x8, 0x8e, 0x7, 0xa3, 0x2a, 0xac, 0x25, 0xbd, 0x34, 0xb2, 0x3b, 0x17, 0x9e, 0x18, 0x91, 0x9, 0x80, 0x6, 0x8f, 0x2b, 0xa2, 0x24, 0xad, 0x35, 0xbc, 0x3a, 0xb3, 0x6f, 0xe6, 0x60, 0xe9, 0x71, 0xf8, 0x7e, 0xf7, 0x53, 0xda, 0x5c, 0xd5, 0x4d, 0xc4, 0x42, 0xcb, 0x1a, 0x93, 0x15, 0x9c, 0x4, 0x8d, 0xb, 0x82, 0x26, 0xaf, 0x29, 0xa0, 0x38, 0xb1, 0x37, 0xbe, 0x62, 0xeb, 0x6d, 0xe4, 0x7c, 0xf5, 0x73, 0xfa, 0x5e, 0xd7, 0x51, 0xd8, 0x40, 0xc9, 0x4f, 0xc6, 0xea, 0x63, 0xe5, 0x6c, 0xf4, 0x7d, 0xfb, 0x72, 0xd6, 0x5f, 0xd9, 0x50, 0xc8, 0x41, 0xc7, 0x4e, 0x92, 0x1b, 0x9d, 0x14, 0x8c, 0x5, 0x83, 0xa, 0xae, 0x27, 0xa1, 0x28, 0xb0, 0x39, 0xbf, 0x36},
+ {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5, 0x48, 0xc2, 0x41, 0xcb, 0x5a, 0xd0, 0x53, 0xd9, 0x6c, 0xe6, 0x65, 0xef, 0x7e, 0xf4, 0x77, 0xfd, 0x90, 0x1a, 0x99, 0x13, 0x82, 0x8, 0x8b, 0x1, 0xb4, 0x3e, 0xbd, 0x37, 0xa6, 0x2c, 0xaf, 0x25, 0xd8, 0x52, 0xd1, 0x5b, 0xca, 0x40, 0xc3, 0x49, 0xfc, 0x76, 0xf5, 0x7f, 0xee, 0x64, 0xe7, 0x6d, 0x3d, 0xb7, 0x34, 0xbe, 0x2f, 0xa5, 0x26, 0xac, 0x19, 0x93, 0x10, 0x9a, 0xb, 0x81, 0x2, 0x88, 0x75, 0xff, 0x7c, 0xf6, 0x67, 0xed, 0x6e, 0xe4, 0x51, 0xdb, 0x58, 0xd2, 0x43, 0xc9, 0x4a, 0xc0, 0xad, 0x27, 0xa4, 0x2e, 0xbf, 0x35, 0xb6, 0x3c, 0x89, 0x3, 0x80, 0xa, 0x9b, 0x11, 0x92, 0x18, 0xe5, 0x6f, 0xec, 0x66, 0xf7, 0x7d, 0xfe, 0x74, 0xc1, 0x4b, 0xc8, 0x42, 0xd3, 0x59, 0xda, 0x50, 0x7a, 0xf0, 0x73, 0xf9, 0x68, 0xe2, 0x61, 0xeb, 0x5e, 0xd4, 0x57, 0xdd, 0x4c, 0xc6, 0x45, 0xcf, 0x32, 0xb8, 0x3b, 0xb1, 0x20, 0xaa, 0x29, 0xa3, 0x16, 0x9c, 0x1f, 0x95, 0x4, 0x8e, 0xd, 0x87, 0xea, 0x60, 0xe3, 0x69, 0xf8, 0x72, 0xf1, 0x7b, 0xce, 0x44, 0xc7, 0x4d, 0xdc, 0x56, 0xd5, 0x5f, 0xa2, 0x28, 0xab, 0x21, 0xb0, 0x3a, 0xb9, 0x33, 0x86, 0xc, 0x8f, 0x5, 0x94, 0x1e, 0x9d, 0x17, 0x47, 0xcd, 0x4e, 0xc4, 0x55, 0xdf, 0x5c, 0xd6, 0x63, 0xe9, 0x6a, 0xe0, 0x71, 0xfb, 0x78, 0xf2, 0xf, 0x85, 0x6, 0x8c, 0x1d, 0x97, 0x14, 0x9e, 0x2b, 0xa1, 0x22, 0xa8, 0x39, 0xb3, 0x30, 0xba, 0xd7, 0x5d, 0xde, 0x54, 0xc5, 0x4f, 0xcc, 0x46, 0xf3, 0x79, 0xfa, 0x70, 0xe1, 0x6b, 0xe8, 0x62, 0x9f, 0x15, 0x96, 0x1c, 0x8d, 0x7, 0x84, 0xe, 0xbb, 0x31, 0xb2, 0x38, 0xa9, 0x23, 0xa0, 0x2a},
+ {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba, 0x58, 0xd3, 0x53, 0xd8, 0x4e, 0xc5, 0x45, 0xce, 0x74, 0xff, 0x7f, 0xf4, 0x62, 0xe9, 0x69, 0xe2, 0xb0, 0x3b, 0xbb, 0x30, 0xa6, 0x2d, 0xad, 0x26, 0x9c, 0x17, 0x97, 0x1c, 0x8a, 0x1, 0x81, 0xa, 0xe8, 0x63, 0xe3, 0x68, 0xfe, 0x75, 0xf5, 0x7e, 0xc4, 0x4f, 0xcf, 0x44, 0xd2, 0x59, 0xd9, 0x52, 0x7d, 0xf6, 0x76, 0xfd, 0x6b, 0xe0, 0x60, 0xeb, 0x51, 0xda, 0x5a, 0xd1, 0x47, 0xcc, 0x4c, 0xc7, 0x25, 0xae, 0x2e, 0xa5, 0x33, 0xb8, 0x38, 0xb3, 0x9, 0x82, 0x2, 0x89, 0x1f, 0x94, 0x14, 0x9f, 0xcd, 0x46, 0xc6, 0x4d, 0xdb, 0x50, 0xd0, 0x5b, 0xe1, 0x6a, 0xea, 0x61, 0xf7, 0x7c, 0xfc, 0x77, 0x95, 0x1e, 0x9e, 0x15, 0x83, 0x8, 0x88, 0x3, 0xb9, 0x32, 0xb2, 0x39, 0xaf, 0x24, 0xa4, 0x2f, 0xfa, 0x71, 0xf1, 0x7a, 0xec, 0x67, 0xe7, 0x6c, 0xd6, 0x5d, 0xdd, 0x56, 0xc0, 0x4b, 0xcb, 0x40, 0xa2, 0x29, 0xa9, 0x22, 0xb4, 0x3f, 0xbf, 0x34, 0x8e, 0x5, 0x85, 0xe, 0x98, 0x13, 0x93, 0x18, 0x4a, 0xc1, 0x41, 0xca, 0x5c, 0xd7, 0x57, 0xdc, 0x66, 0xed, 0x6d, 0xe6, 0x70, 0xfb, 0x7b, 0xf0, 0x12, 0x99, 0x19, 0x92, 0x4, 0x8f, 0xf, 0x84, 0x3e, 0xb5, 0x35, 0xbe, 0x28, 0xa3, 0x23, 0xa8, 0x87, 0xc, 0x8c, 0x7, 0x91, 0x1a, 0x9a, 0x11, 0xab, 0x20, 0xa0, 0x2b, 0xbd, 0x36, 0xb6, 0x3d, 0xdf, 0x54, 0xd4, 0x5f, 0xc9, 0x42, 0xc2, 0x49, 0xf3, 0x78, 0xf8, 0x73, 0xe5, 0x6e, 0xee, 0x65, 0x37, 0xbc, 0x3c, 0xb7, 0x21, 0xaa, 0x2a, 0xa1, 0x1b, 0x90, 0x10, 0x9b, 0xd, 0x86, 0x6, 0x8d, 0x6f, 0xe4, 0x64, 0xef, 0x79, 0xf2, 0x72, 0xf9, 0x43, 0xc8, 0x48, 0xc3, 0x55, 0xde, 0x5e, 0xd5},
+ {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97, 0x28, 0xa4, 0x2d, 0xa1, 0x22, 0xae, 0x27, 0xab, 0x3c, 0xb0, 0x39, 0xb5, 0x36, 0xba, 0x33, 0xbf, 0x50, 0xdc, 0x55, 0xd9, 0x5a, 0xd6, 0x5f, 0xd3, 0x44, 0xc8, 0x41, 0xcd, 0x4e, 0xc2, 0x4b, 0xc7, 0x78, 0xf4, 0x7d, 0xf1, 0x72, 0xfe, 0x77, 0xfb, 0x6c, 0xe0, 0x69, 0xe5, 0x66, 0xea, 0x63, 0xef, 0xa0, 0x2c, 0xa5, 0x29, 0xaa, 0x26, 0xaf, 0x23, 0xb4, 0x38, 0xb1, 0x3d, 0xbe, 0x32, 0xbb, 0x37, 0x88, 0x4, 0x8d, 0x1, 0x82, 0xe, 0x87, 0xb, 0x9c, 0x10, 0x99, 0x15, 0x96, 0x1a, 0x93, 0x1f, 0xf0, 0x7c, 0xf5, 0x79, 0xfa, 0x76, 0xff, 0x73, 0xe4, 0x68, 0xe1, 0x6d, 0xee, 0x62, 0xeb, 0x67, 0xd8, 0x54, 0xdd, 0x51, 0xd2, 0x5e, 0xd7, 0x5b, 0xcc, 0x40, 0xc9, 0x45, 0xc6, 0x4a, 0xc3, 0x4f, 0x5d, 0xd1, 0x58, 0xd4, 0x57, 0xdb, 0x52, 0xde, 0x49, 0xc5, 0x4c, 0xc0, 0x43, 0xcf, 0x46, 0xca, 0x75, 0xf9, 0x70, 0xfc, 0x7f, 0xf3, 0x7a, 0xf6, 0x61, 0xed, 0x64, 0xe8, 0x6b, 0xe7, 0x6e, 0xe2, 0xd, 0x81, 0x8, 0x84, 0x7, 0x8b, 0x2, 0x8e, 0x19, 0x95, 0x1c, 0x90, 0x13, 0x9f, 0x16, 0x9a, 0x25, 0xa9, 0x20, 0xac, 0x2f, 0xa3, 0x2a, 0xa6, 0x31, 0xbd, 0x34, 0xb8, 0x3b, 0xb7, 0x3e, 0xb2, 0xfd, 0x71, 0xf8, 0x74, 0xf7, 0x7b, 0xf2, 0x7e, 0xe9, 0x65, 0xec, 0x60, 0xe3, 0x6f, 0xe6, 0x6a, 0xd5, 0x59, 0xd0, 0x5c, 0xdf, 0x53, 0xda, 0x56, 0xc1, 0x4d, 0xc4, 0x48, 0xcb, 0x47, 0xce, 0x42, 0xad, 0x21, 0xa8, 0x24, 0xa7, 0x2b, 0xa2, 0x2e, 0xb9, 0x35, 0xbc, 0x30, 0xb3, 0x3f, 0xb6, 0x3a, 0x85, 0x9, 0x80, 0xc, 0x8f, 0x3, 0x8a, 0x6, 0x91, 0x1d, 0x94, 0x18, 0x9b, 0x17, 0x9e, 0x12},
+ {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98, 0x38, 0xb5, 0x3f, 0xb2, 0x36, 0xbb, 0x31, 0xbc, 0x24, 0xa9, 0x23, 0xae, 0x2a, 0xa7, 0x2d, 0xa0, 0x70, 0xfd, 0x77, 0xfa, 0x7e, 0xf3, 0x79, 0xf4, 0x6c, 0xe1, 0x6b, 0xe6, 0x62, 0xef, 0x65, 0xe8, 0x48, 0xc5, 0x4f, 0xc2, 0x46, 0xcb, 0x41, 0xcc, 0x54, 0xd9, 0x53, 0xde, 0x5a, 0xd7, 0x5d, 0xd0, 0xe0, 0x6d, 0xe7, 0x6a, 0xee, 0x63, 0xe9, 0x64, 0xfc, 0x71, 0xfb, 0x76, 0xf2, 0x7f, 0xf5, 0x78, 0xd8, 0x55, 0xdf, 0x52, 0xd6, 0x5b, 0xd1, 0x5c, 0xc4, 0x49, 0xc3, 0x4e, 0xca, 0x47, 0xcd, 0x40, 0x90, 0x1d, 0x97, 0x1a, 0x9e, 0x13, 0x99, 0x14, 0x8c, 0x1, 0x8b, 0x6, 0x82, 0xf, 0x85, 0x8, 0xa8, 0x25, 0xaf, 0x22, 0xa6, 0x2b, 0xa1, 0x2c, 0xb4, 0x39, 0xb3, 0x3e, 0xba, 0x37, 0xbd, 0x30, 0xdd, 0x50, 0xda, 0x57, 0xd3, 0x5e, 0xd4, 0x59, 0xc1, 0x4c, 0xc6, 0x4b, 0xcf, 0x42, 0xc8, 0x45, 0xe5, 0x68, 0xe2, 0x6f, 0xeb, 0x66, 0xec, 0x61, 0xf9, 0x74, 0xfe, 0x73, 0xf7, 0x7a, 0xf0, 0x7d, 0xad, 0x20, 0xaa, 0x27, 0xa3, 0x2e, 0xa4, 0x29, 0xb1, 0x3c, 0xb6, 0x3b, 0xbf, 0x32, 0xb8, 0x35, 0x95, 0x18, 0x92, 0x1f, 0x9b, 0x16, 0x9c, 0x11, 0x89, 0x4, 0x8e, 0x3, 0x87, 0xa, 0x80, 0xd, 0x3d, 0xb0, 0x3a, 0xb7, 0x33, 0xbe, 0x34, 0xb9, 0x21, 0xac, 0x26, 0xab, 0x2f, 0xa2, 0x28, 0xa5, 0x5, 0x88, 0x2, 0x8f, 0xb, 0x86, 0xc, 0x81, 0x19, 0x94, 0x1e, 0x93, 0x17, 0x9a, 0x10, 0x9d, 0x4d, 0xc0, 0x4a, 0xc7, 0x43, 0xce, 0x44, 0xc9, 0x51, 0xdc, 0x56, 0xdb, 0x5f, 0xd2, 0x58, 0xd5, 0x75, 0xf8, 0x72, 0xff, 0x7b, 0xf6, 0x7c, 0xf1, 0x69, 0xe4, 0x6e, 0xe3, 0x67, 0xea, 0x60, 0xed},
+ {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89, 0x8, 0x86, 0x9, 0x87, 0xa, 0x84, 0xb, 0x85, 0xc, 0x82, 0xd, 0x83, 0xe, 0x80, 0xf, 0x81, 0x10, 0x9e, 0x11, 0x9f, 0x12, 0x9c, 0x13, 0x9d, 0x14, 0x9a, 0x15, 0x9b, 0x16, 0x98, 0x17, 0x99, 0x18, 0x96, 0x19, 0x97, 0x1a, 0x94, 0x1b, 0x95, 0x1c, 0x92, 0x1d, 0x93, 0x1e, 0x90, 0x1f, 0x91, 0x20, 0xae, 0x21, 0xaf, 0x22, 0xac, 0x23, 0xad, 0x24, 0xaa, 0x25, 0xab, 0x26, 0xa8, 0x27, 0xa9, 0x28, 0xa6, 0x29, 0xa7, 0x2a, 0xa4, 0x2b, 0xa5, 0x2c, 0xa2, 0x2d, 0xa3, 0x2e, 0xa0, 0x2f, 0xa1, 0x30, 0xbe, 0x31, 0xbf, 0x32, 0xbc, 0x33, 0xbd, 0x34, 0xba, 0x35, 0xbb, 0x36, 0xb8, 0x37, 0xb9, 0x38, 0xb6, 0x39, 0xb7, 0x3a, 0xb4, 0x3b, 0xb5, 0x3c, 0xb2, 0x3d, 0xb3, 0x3e, 0xb0, 0x3f, 0xb1, 0x40, 0xce, 0x41, 0xcf, 0x42, 0xcc, 0x43, 0xcd, 0x44, 0xca, 0x45, 0xcb, 0x46, 0xc8, 0x47, 0xc9, 0x48, 0xc6, 0x49, 0xc7, 0x4a, 0xc4, 0x4b, 0xc5, 0x4c, 0xc2, 0x4d, 0xc3, 0x4e, 0xc0, 0x4f, 0xc1, 0x50, 0xde, 0x51, 0xdf, 0x52, 0xdc, 0x53, 0xdd, 0x54, 0xda, 0x55, 0xdb, 0x56, 0xd8, 0x57, 0xd9, 0x58, 0xd6, 0x59, 0xd7, 0x5a, 0xd4, 0x5b, 0xd5, 0x5c, 0xd2, 0x5d, 0xd3, 0x5e, 0xd0, 0x5f, 0xd1, 0x60, 0xee, 0x61, 0xef, 0x62, 0xec, 0x63, 0xed, 0x64, 0xea, 0x65, 0xeb, 0x66, 0xe8, 0x67, 0xe9, 0x68, 0xe6, 0x69, 0xe7, 0x6a, 0xe4, 0x6b, 0xe5, 0x6c, 0xe2, 0x6d, 0xe3, 0x6e, 0xe0, 0x6f, 0xe1, 0x70, 0xfe, 0x71, 0xff, 0x72, 0xfc, 0x73, 0xfd, 0x74, 0xfa, 0x75, 0xfb, 0x76, 0xf8, 0x77, 0xf9, 0x78, 0xf6, 0x79, 0xf7, 0x7a, 0xf4, 0x7b, 0xf5, 0x7c, 0xf2, 0x7d, 0xf3, 0x7e, 0xf0, 0x7f, 0xf1},
+ {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86, 0x18, 0x97, 0x1b, 0x94, 0x1e, 0x91, 0x1d, 0x92, 0x14, 0x9b, 0x17, 0x98, 0x12, 0x9d, 0x11, 0x9e, 0x30, 0xbf, 0x33, 0xbc, 0x36, 0xb9, 0x35, 0xba, 0x3c, 0xb3, 0x3f, 0xb0, 0x3a, 0xb5, 0x39, 0xb6, 0x28, 0xa7, 0x2b, 0xa4, 0x2e, 0xa1, 0x2d, 0xa2, 0x24, 0xab, 0x27, 0xa8, 0x22, 0xad, 0x21, 0xae, 0x60, 0xef, 0x63, 0xec, 0x66, 0xe9, 0x65, 0xea, 0x6c, 0xe3, 0x6f, 0xe0, 0x6a, 0xe5, 0x69, 0xe6, 0x78, 0xf7, 0x7b, 0xf4, 0x7e, 0xf1, 0x7d, 0xf2, 0x74, 0xfb, 0x77, 0xf8, 0x72, 0xfd, 0x71, 0xfe, 0x50, 0xdf, 0x53, 0xdc, 0x56, 0xd9, 0x55, 0xda, 0x5c, 0xd3, 0x5f, 0xd0, 0x5a, 0xd5, 0x59, 0xd6, 0x48, 0xc7, 0x4b, 0xc4, 0x4e, 0xc1, 0x4d, 0xc2, 0x44, 0xcb, 0x47, 0xc8, 0x42, 0xcd, 0x41, 0xce, 0xc0, 0x4f, 0xc3, 0x4c, 0xc6, 0x49, 0xc5, 0x4a, 0xcc, 0x43, 0xcf, 0x40, 0xca, 0x45, 0xc9, 0x46, 0xd8, 0x57, 0xdb, 0x54, 0xde, 0x51, 0xdd, 0x52, 0xd4, 0x5b, 0xd7, 0x58, 0xd2, 0x5d, 0xd1, 0x5e, 0xf0, 0x7f, 0xf3, 0x7c, 0xf6, 0x79, 0xf5, 0x7a, 0xfc, 0x73, 0xff, 0x70, 0xfa, 0x75, 0xf9, 0x76, 0xe8, 0x67, 0xeb, 0x64, 0xee, 0x61, 0xed, 0x62, 0xe4, 0x6b, 0xe7, 0x68, 0xe2, 0x6d, 0xe1, 0x6e, 0xa0, 0x2f, 0xa3, 0x2c, 0xa6, 0x29, 0xa5, 0x2a, 0xac, 0x23, 0xaf, 0x20, 0xaa, 0x25, 0xa9, 0x26, 0xb8, 0x37, 0xbb, 0x34, 0xbe, 0x31, 0xbd, 0x32, 0xb4, 0x3b, 0xb7, 0x38, 0xb2, 0x3d, 0xb1, 0x3e, 0x90, 0x1f, 0x93, 0x1c, 0x96, 0x19, 0x95, 0x1a, 0x9c, 0x13, 0x9f, 0x10, 0x9a, 0x15, 0x99, 0x16, 0x88, 0x7, 0x8b, 0x4, 0x8e, 0x1, 0x8d, 0x2, 0x84, 0xb, 0x87, 0x8, 0x82, 0xd, 0x81, 0xe},
+ {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23, 0xf5, 0x65, 0xc8, 0x58, 0x8f, 0x1f, 0xb2, 0x22, 0x1, 0x91, 0x3c, 0xac, 0x7b, 0xeb, 0x46, 0xd6, 0xf7, 0x67, 0xca, 0x5a, 0x8d, 0x1d, 0xb0, 0x20, 0x3, 0x93, 0x3e, 0xae, 0x79, 0xe9, 0x44, 0xd4, 0x2, 0x92, 0x3f, 0xaf, 0x78, 0xe8, 0x45, 0xd5, 0xf6, 0x66, 0xcb, 0x5b, 0x8c, 0x1c, 0xb1, 0x21, 0xf3, 0x63, 0xce, 0x5e, 0x89, 0x19, 0xb4, 0x24, 0x7, 0x97, 0x3a, 0xaa, 0x7d, 0xed, 0x40, 0xd0, 0x6, 0x96, 0x3b, 0xab, 0x7c, 0xec, 0x41, 0xd1, 0xf2, 0x62, 0xcf, 0x5f, 0x88, 0x18, 0xb5, 0x25, 0x4, 0x94, 0x39, 0xa9, 0x7e, 0xee, 0x43, 0xd3, 0xf0, 0x60, 0xcd, 0x5d, 0x8a, 0x1a, 0xb7, 0x27, 0xf1, 0x61, 0xcc, 0x5c, 0x8b, 0x1b, 0xb6, 0x26, 0x5, 0x95, 0x38, 0xa8, 0x7f, 0xef, 0x42, 0xd2, 0xfb, 0x6b, 0xc6, 0x56, 0x81, 0x11, 0xbc, 0x2c, 0xf, 0x9f, 0x32, 0xa2, 0x75, 0xe5, 0x48, 0xd8, 0xe, 0x9e, 0x33, 0xa3, 0x74, 0xe4, 0x49, 0xd9, 0xfa, 0x6a, 0xc7, 0x57, 0x80, 0x10, 0xbd, 0x2d, 0xc, 0x9c, 0x31, 0xa1, 0x76, 0xe6, 0x4b, 0xdb, 0xf8, 0x68, 0xc5, 0x55, 0x82, 0x12, 0xbf, 0x2f, 0xf9, 0x69, 0xc4, 0x54, 0x83, 0x13, 0xbe, 0x2e, 0xd, 0x9d, 0x30, 0xa0, 0x77, 0xe7, 0x4a, 0xda, 0x8, 0x98, 0x35, 0xa5, 0x72, 0xe2, 0x4f, 0xdf, 0xfc, 0x6c, 0xc1, 0x51, 0x86, 0x16, 0xbb, 0x2b, 0xfd, 0x6d, 0xc0, 0x50, 0x87, 0x17, 0xba, 0x2a, 0x9, 0x99, 0x34, 0xa4, 0x73, 0xe3, 0x4e, 0xde, 0xff, 0x6f, 0xc2, 0x52, 0x85, 0x15, 0xb8, 0x28, 0xb, 0x9b, 0x36, 0xa6, 0x71, 0xe1, 0x4c, 0xdc, 0xa, 0x9a, 0x37, 0xa7, 0x70, 0xe0, 0x4d, 0xdd, 0xfe, 0x6e, 0xc3, 0x53, 0x84, 0x14, 0xb9, 0x29},
+ {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c, 0xe5, 0x74, 0xda, 0x4b, 0x9b, 0xa, 0xa4, 0x35, 0x19, 0x88, 0x26, 0xb7, 0x67, 0xf6, 0x58, 0xc9, 0xd7, 0x46, 0xe8, 0x79, 0xa9, 0x38, 0x96, 0x7, 0x2b, 0xba, 0x14, 0x85, 0x55, 0xc4, 0x6a, 0xfb, 0x32, 0xa3, 0xd, 0x9c, 0x4c, 0xdd, 0x73, 0xe2, 0xce, 0x5f, 0xf1, 0x60, 0xb0, 0x21, 0x8f, 0x1e, 0xb3, 0x22, 0x8c, 0x1d, 0xcd, 0x5c, 0xf2, 0x63, 0x4f, 0xde, 0x70, 0xe1, 0x31, 0xa0, 0xe, 0x9f, 0x56, 0xc7, 0x69, 0xf8, 0x28, 0xb9, 0x17, 0x86, 0xaa, 0x3b, 0x95, 0x4, 0xd4, 0x45, 0xeb, 0x7a, 0x64, 0xf5, 0x5b, 0xca, 0x1a, 0x8b, 0x25, 0xb4, 0x98, 0x9, 0xa7, 0x36, 0xe6, 0x77, 0xd9, 0x48, 0x81, 0x10, 0xbe, 0x2f, 0xff, 0x6e, 0xc0, 0x51, 0x7d, 0xec, 0x42, 0xd3, 0x3, 0x92, 0x3c, 0xad, 0x7b, 0xea, 0x44, 0xd5, 0x5, 0x94, 0x3a, 0xab, 0x87, 0x16, 0xb8, 0x29, 0xf9, 0x68, 0xc6, 0x57, 0x9e, 0xf, 0xa1, 0x30, 0xe0, 0x71, 0xdf, 0x4e, 0x62, 0xf3, 0x5d, 0xcc, 0x1c, 0x8d, 0x23, 0xb2, 0xac, 0x3d, 0x93, 0x2, 0xd2, 0x43, 0xed, 0x7c, 0x50, 0xc1, 0x6f, 0xfe, 0x2e, 0xbf, 0x11, 0x80, 0x49, 0xd8, 0x76, 0xe7, 0x37, 0xa6, 0x8, 0x99, 0xb5, 0x24, 0x8a, 0x1b, 0xcb, 0x5a, 0xf4, 0x65, 0xc8, 0x59, 0xf7, 0x66, 0xb6, 0x27, 0x89, 0x18, 0x34, 0xa5, 0xb, 0x9a, 0x4a, 0xdb, 0x75, 0xe4, 0x2d, 0xbc, 0x12, 0x83, 0x53, 0xc2, 0x6c, 0xfd, 0xd1, 0x40, 0xee, 0x7f, 0xaf, 0x3e, 0x90, 0x1, 0x1f, 0x8e, 0x20, 0xb1, 0x61, 0xf0, 0x5e, 0xcf, 0xe3, 0x72, 0xdc, 0x4d, 0x9d, 0xc, 0xa2, 0x33, 0xfa, 0x6b, 0xc5, 0x54, 0x84, 0x15, 0xbb, 0x2a, 0x6, 0x97, 0x39, 0xa8, 0x78, 0xe9, 0x47, 0xd6},
+ {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d, 0xd5, 0x47, 0xec, 0x7e, 0xa7, 0x35, 0x9e, 0xc, 0x31, 0xa3, 0x8, 0x9a, 0x43, 0xd1, 0x7a, 0xe8, 0xb7, 0x25, 0x8e, 0x1c, 0xc5, 0x57, 0xfc, 0x6e, 0x53, 0xc1, 0x6a, 0xf8, 0x21, 0xb3, 0x18, 0x8a, 0x62, 0xf0, 0x5b, 0xc9, 0x10, 0x82, 0x29, 0xbb, 0x86, 0x14, 0xbf, 0x2d, 0xf4, 0x66, 0xcd, 0x5f, 0x73, 0xe1, 0x4a, 0xd8, 0x1, 0x93, 0x38, 0xaa, 0x97, 0x5, 0xae, 0x3c, 0xe5, 0x77, 0xdc, 0x4e, 0xa6, 0x34, 0x9f, 0xd, 0xd4, 0x46, 0xed, 0x7f, 0x42, 0xd0, 0x7b, 0xe9, 0x30, 0xa2, 0x9, 0x9b, 0xc4, 0x56, 0xfd, 0x6f, 0xb6, 0x24, 0x8f, 0x1d, 0x20, 0xb2, 0x19, 0x8b, 0x52, 0xc0, 0x6b, 0xf9, 0x11, 0x83, 0x28, 0xba, 0x63, 0xf1, 0x5a, 0xc8, 0xf5, 0x67, 0xcc, 0x5e, 0x87, 0x15, 0xbe, 0x2c, 0xe6, 0x74, 0xdf, 0x4d, 0x94, 0x6, 0xad, 0x3f, 0x2, 0x90, 0x3b, 0xa9, 0x70, 0xe2, 0x49, 0xdb, 0x33, 0xa1, 0xa, 0x98, 0x41, 0xd3, 0x78, 0xea, 0xd7, 0x45, 0xee, 0x7c, 0xa5, 0x37, 0x9c, 0xe, 0x51, 0xc3, 0x68, 0xfa, 0x23, 0xb1, 0x1a, 0x88, 0xb5, 0x27, 0x8c, 0x1e, 0xc7, 0x55, 0xfe, 0x6c, 0x84, 0x16, 0xbd, 0x2f, 0xf6, 0x64, 0xcf, 0x5d, 0x60, 0xf2, 0x59, 0xcb, 0x12, 0x80, 0x2b, 0xb9, 0x95, 0x7, 0xac, 0x3e, 0xe7, 0x75, 0xde, 0x4c, 0x71, 0xe3, 0x48, 0xda, 0x3, 0x91, 0x3a, 0xa8, 0x40, 0xd2, 0x79, 0xeb, 0x32, 0xa0, 0xb, 0x99, 0xa4, 0x36, 0x9d, 0xf, 0xd6, 0x44, 0xef, 0x7d, 0x22, 0xb0, 0x1b, 0x89, 0x50, 0xc2, 0x69, 0xfb, 0xc6, 0x54, 0xff, 0x6d, 0xb4, 0x26, 0x8d, 0x1f, 0xf7, 0x65, 0xce, 0x5c, 0x85, 0x17, 0xbc, 0x2e, 0x13, 0x81, 0x2a, 0xb8, 0x61, 0xf3, 0x58, 0xca},
+ {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32, 0xc5, 0x56, 0xfe, 0x6d, 0xb3, 0x20, 0x88, 0x1b, 0x29, 0xba, 0x12, 0x81, 0x5f, 0xcc, 0x64, 0xf7, 0x97, 0x4, 0xac, 0x3f, 0xe1, 0x72, 0xda, 0x49, 0x7b, 0xe8, 0x40, 0xd3, 0xd, 0x9e, 0x36, 0xa5, 0x52, 0xc1, 0x69, 0xfa, 0x24, 0xb7, 0x1f, 0x8c, 0xbe, 0x2d, 0x85, 0x16, 0xc8, 0x5b, 0xf3, 0x60, 0x33, 0xa0, 0x8, 0x9b, 0x45, 0xd6, 0x7e, 0xed, 0xdf, 0x4c, 0xe4, 0x77, 0xa9, 0x3a, 0x92, 0x1, 0xf6, 0x65, 0xcd, 0x5e, 0x80, 0x13, 0xbb, 0x28, 0x1a, 0x89, 0x21, 0xb2, 0x6c, 0xff, 0x57, 0xc4, 0xa4, 0x37, 0x9f, 0xc, 0xd2, 0x41, 0xe9, 0x7a, 0x48, 0xdb, 0x73, 0xe0, 0x3e, 0xad, 0x5, 0x96, 0x61, 0xf2, 0x5a, 0xc9, 0x17, 0x84, 0x2c, 0xbf, 0x8d, 0x1e, 0xb6, 0x25, 0xfb, 0x68, 0xc0, 0x53, 0x66, 0xf5, 0x5d, 0xce, 0x10, 0x83, 0x2b, 0xb8, 0x8a, 0x19, 0xb1, 0x22, 0xfc, 0x6f, 0xc7, 0x54, 0xa3, 0x30, 0x98, 0xb, 0xd5, 0x46, 0xee, 0x7d, 0x4f, 0xdc, 0x74, 0xe7, 0x39, 0xaa, 0x2, 0x91, 0xf1, 0x62, 0xca, 0x59, 0x87, 0x14, 0xbc, 0x2f, 0x1d, 0x8e, 0x26, 0xb5, 0x6b, 0xf8, 0x50, 0xc3, 0x34, 0xa7, 0xf, 0x9c, 0x42, 0xd1, 0x79, 0xea, 0xd8, 0x4b, 0xe3, 0x70, 0xae, 0x3d, 0x95, 0x6, 0x55, 0xc6, 0x6e, 0xfd, 0x23, 0xb0, 0x18, 0x8b, 0xb9, 0x2a, 0x82, 0x11, 0xcf, 0x5c, 0xf4, 0x67, 0x90, 0x3, 0xab, 0x38, 0xe6, 0x75, 0xdd, 0x4e, 0x7c, 0xef, 0x47, 0xd4, 0xa, 0x99, 0x31, 0xa2, 0xc2, 0x51, 0xf9, 0x6a, 0xb4, 0x27, 0x8f, 0x1c, 0x2e, 0xbd, 0x15, 0x86, 0x58, 0xcb, 0x63, 0xf0, 0x7, 0x94, 0x3c, 0xaf, 0x71, 0xe2, 0x4a, 0xd9, 0xeb, 0x78, 0xd0, 0x43, 0x9d, 0xe, 0xa6, 0x35},
+ {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f, 0xb5, 0x21, 0x80, 0x14, 0xdf, 0x4b, 0xea, 0x7e, 0x61, 0xf5, 0x54, 0xc0, 0xb, 0x9f, 0x3e, 0xaa, 0x77, 0xe3, 0x42, 0xd6, 0x1d, 0x89, 0x28, 0xbc, 0xa3, 0x37, 0x96, 0x2, 0xc9, 0x5d, 0xfc, 0x68, 0xc2, 0x56, 0xf7, 0x63, 0xa8, 0x3c, 0x9d, 0x9, 0x16, 0x82, 0x23, 0xb7, 0x7c, 0xe8, 0x49, 0xdd, 0xee, 0x7a, 0xdb, 0x4f, 0x84, 0x10, 0xb1, 0x25, 0x3a, 0xae, 0xf, 0x9b, 0x50, 0xc4, 0x65, 0xf1, 0x5b, 0xcf, 0x6e, 0xfa, 0x31, 0xa5, 0x4, 0x90, 0x8f, 0x1b, 0xba, 0x2e, 0xe5, 0x71, 0xd0, 0x44, 0x99, 0xd, 0xac, 0x38, 0xf3, 0x67, 0xc6, 0x52, 0x4d, 0xd9, 0x78, 0xec, 0x27, 0xb3, 0x12, 0x86, 0x2c, 0xb8, 0x19, 0x8d, 0x46, 0xd2, 0x73, 0xe7, 0xf8, 0x6c, 0xcd, 0x59, 0x92, 0x6, 0xa7, 0x33, 0xc1, 0x55, 0xf4, 0x60, 0xab, 0x3f, 0x9e, 0xa, 0x15, 0x81, 0x20, 0xb4, 0x7f, 0xeb, 0x4a, 0xde, 0x74, 0xe0, 0x41, 0xd5, 0x1e, 0x8a, 0x2b, 0xbf, 0xa0, 0x34, 0x95, 0x1, 0xca, 0x5e, 0xff, 0x6b, 0xb6, 0x22, 0x83, 0x17, 0xdc, 0x48, 0xe9, 0x7d, 0x62, 0xf6, 0x57, 0xc3, 0x8, 0x9c, 0x3d, 0xa9, 0x3, 0x97, 0x36, 0xa2, 0x69, 0xfd, 0x5c, 0xc8, 0xd7, 0x43, 0xe2, 0x76, 0xbd, 0x29, 0x88, 0x1c, 0x2f, 0xbb, 0x1a, 0x8e, 0x45, 0xd1, 0x70, 0xe4, 0xfb, 0x6f, 0xce, 0x5a, 0x91, 0x5, 0xa4, 0x30, 0x9a, 0xe, 0xaf, 0x3b, 0xf0, 0x64, 0xc5, 0x51, 0x4e, 0xda, 0x7b, 0xef, 0x24, 0xb0, 0x11, 0x85, 0x58, 0xcc, 0x6d, 0xf9, 0x32, 0xa6, 0x7, 0x93, 0x8c, 0x18, 0xb9, 0x2d, 0xe6, 0x72, 0xd3, 0x47, 0xed, 0x79, 0xd8, 0x4c, 0x87, 0x13, 0xb2, 0x26, 0x39, 0xad, 0xc, 0x98, 0x53, 0xc7, 0x66, 0xf2},
+ {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10, 0xa5, 0x30, 0x92, 0x7, 0xcb, 0x5e, 0xfc, 0x69, 0x79, 0xec, 0x4e, 0xdb, 0x17, 0x82, 0x20, 0xb5, 0x57, 0xc2, 0x60, 0xf5, 0x39, 0xac, 0xe, 0x9b, 0x8b, 0x1e, 0xbc, 0x29, 0xe5, 0x70, 0xd2, 0x47, 0xf2, 0x67, 0xc5, 0x50, 0x9c, 0x9, 0xab, 0x3e, 0x2e, 0xbb, 0x19, 0x8c, 0x40, 0xd5, 0x77, 0xe2, 0xae, 0x3b, 0x99, 0xc, 0xc0, 0x55, 0xf7, 0x62, 0x72, 0xe7, 0x45, 0xd0, 0x1c, 0x89, 0x2b, 0xbe, 0xb, 0x9e, 0x3c, 0xa9, 0x65, 0xf0, 0x52, 0xc7, 0xd7, 0x42, 0xe0, 0x75, 0xb9, 0x2c, 0x8e, 0x1b, 0xf9, 0x6c, 0xce, 0x5b, 0x97, 0x2, 0xa0, 0x35, 0x25, 0xb0, 0x12, 0x87, 0x4b, 0xde, 0x7c, 0xe9, 0x5c, 0xc9, 0x6b, 0xfe, 0x32, 0xa7, 0x5, 0x90, 0x80, 0x15, 0xb7, 0x22, 0xee, 0x7b, 0xd9, 0x4c, 0x41, 0xd4, 0x76, 0xe3, 0x2f, 0xba, 0x18, 0x8d, 0x9d, 0x8, 0xaa, 0x3f, 0xf3, 0x66, 0xc4, 0x51, 0xe4, 0x71, 0xd3, 0x46, 0x8a, 0x1f, 0xbd, 0x28, 0x38, 0xad, 0xf, 0x9a, 0x56, 0xc3, 0x61, 0xf4, 0x16, 0x83, 0x21, 0xb4, 0x78, 0xed, 0x4f, 0xda, 0xca, 0x5f, 0xfd, 0x68, 0xa4, 0x31, 0x93, 0x6, 0xb3, 0x26, 0x84, 0x11, 0xdd, 0x48, 0xea, 0x7f, 0x6f, 0xfa, 0x58, 0xcd, 0x1, 0x94, 0x36, 0xa3, 0xef, 0x7a, 0xd8, 0x4d, 0x81, 0x14, 0xb6, 0x23, 0x33, 0xa6, 0x4, 0x91, 0x5d, 0xc8, 0x6a, 0xff, 0x4a, 0xdf, 0x7d, 0xe8, 0x24, 0xb1, 0x13, 0x86, 0x96, 0x3, 0xa1, 0x34, 0xf8, 0x6d, 0xcf, 0x5a, 0xb8, 0x2d, 0x8f, 0x1a, 0xd6, 0x43, 0xe1, 0x74, 0x64, 0xf1, 0x53, 0xc6, 0xa, 0x9f, 0x3d, 0xa8, 0x1d, 0x88, 0x2a, 0xbf, 0x73, 0xe6, 0x44, 0xd1, 0xc1, 0x54, 0xf6, 0x63, 0xaf, 0x3a, 0x98, 0xd},
+ {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1, 0x95, 0x3, 0xa4, 0x32, 0xf7, 0x61, 0xc6, 0x50, 0x51, 0xc7, 0x60, 0xf6, 0x33, 0xa5, 0x2, 0x94, 0x37, 0xa1, 0x6, 0x90, 0x55, 0xc3, 0x64, 0xf2, 0xf3, 0x65, 0xc2, 0x54, 0x91, 0x7, 0xa0, 0x36, 0xa2, 0x34, 0x93, 0x5, 0xc0, 0x56, 0xf1, 0x67, 0x66, 0xf0, 0x57, 0xc1, 0x4, 0x92, 0x35, 0xa3, 0x6e, 0xf8, 0x5f, 0xc9, 0xc, 0x9a, 0x3d, 0xab, 0xaa, 0x3c, 0x9b, 0xd, 0xc8, 0x5e, 0xf9, 0x6f, 0xfb, 0x6d, 0xca, 0x5c, 0x99, 0xf, 0xa8, 0x3e, 0x3f, 0xa9, 0xe, 0x98, 0x5d, 0xcb, 0x6c, 0xfa, 0x59, 0xcf, 0x68, 0xfe, 0x3b, 0xad, 0xa, 0x9c, 0x9d, 0xb, 0xac, 0x3a, 0xff, 0x69, 0xce, 0x58, 0xcc, 0x5a, 0xfd, 0x6b, 0xae, 0x38, 0x9f, 0x9, 0x8, 0x9e, 0x39, 0xaf, 0x6a, 0xfc, 0x5b, 0xcd, 0xdc, 0x4a, 0xed, 0x7b, 0xbe, 0x28, 0x8f, 0x19, 0x18, 0x8e, 0x29, 0xbf, 0x7a, 0xec, 0x4b, 0xdd, 0x49, 0xdf, 0x78, 0xee, 0x2b, 0xbd, 0x1a, 0x8c, 0x8d, 0x1b, 0xbc, 0x2a, 0xef, 0x79, 0xde, 0x48, 0xeb, 0x7d, 0xda, 0x4c, 0x89, 0x1f, 0xb8, 0x2e, 0x2f, 0xb9, 0x1e, 0x88, 0x4d, 0xdb, 0x7c, 0xea, 0x7e, 0xe8, 0x4f, 0xd9, 0x1c, 0x8a, 0x2d, 0xbb, 0xba, 0x2c, 0x8b, 0x1d, 0xd8, 0x4e, 0xe9, 0x7f, 0xb2, 0x24, 0x83, 0x15, 0xd0, 0x46, 0xe1, 0x77, 0x76, 0xe0, 0x47, 0xd1, 0x14, 0x82, 0x25, 0xb3, 0x27, 0xb1, 0x16, 0x80, 0x45, 0xd3, 0x74, 0xe2, 0xe3, 0x75, 0xd2, 0x44, 0x81, 0x17, 0xb0, 0x26, 0x85, 0x13, 0xb4, 0x22, 0xe7, 0x71, 0xd6, 0x40, 0x41, 0xd7, 0x70, 0xe6, 0x23, 0xb5, 0x12, 0x84, 0x10, 0x86, 0x21, 0xb7, 0x72, 0xe4, 0x43, 0xd5, 0xd4, 0x42, 0xe5, 0x73, 0xb6, 0x20, 0x87, 0x11},
+ {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe, 0x85, 0x12, 0xb6, 0x21, 0xe3, 0x74, 0xd0, 0x47, 0x49, 0xde, 0x7a, 0xed, 0x2f, 0xb8, 0x1c, 0x8b, 0x17, 0x80, 0x24, 0xb3, 0x71, 0xe6, 0x42, 0xd5, 0xdb, 0x4c, 0xe8, 0x7f, 0xbd, 0x2a, 0x8e, 0x19, 0x92, 0x5, 0xa1, 0x36, 0xf4, 0x63, 0xc7, 0x50, 0x5e, 0xc9, 0x6d, 0xfa, 0x38, 0xaf, 0xb, 0x9c, 0x2e, 0xb9, 0x1d, 0x8a, 0x48, 0xdf, 0x7b, 0xec, 0xe2, 0x75, 0xd1, 0x46, 0x84, 0x13, 0xb7, 0x20, 0xab, 0x3c, 0x98, 0xf, 0xcd, 0x5a, 0xfe, 0x69, 0x67, 0xf0, 0x54, 0xc3, 0x1, 0x96, 0x32, 0xa5, 0x39, 0xae, 0xa, 0x9d, 0x5f, 0xc8, 0x6c, 0xfb, 0xf5, 0x62, 0xc6, 0x51, 0x93, 0x4, 0xa0, 0x37, 0xbc, 0x2b, 0x8f, 0x18, 0xda, 0x4d, 0xe9, 0x7e, 0x70, 0xe7, 0x43, 0xd4, 0x16, 0x81, 0x25, 0xb2, 0x5c, 0xcb, 0x6f, 0xf8, 0x3a, 0xad, 0x9, 0x9e, 0x90, 0x7, 0xa3, 0x34, 0xf6, 0x61, 0xc5, 0x52, 0xd9, 0x4e, 0xea, 0x7d, 0xbf, 0x28, 0x8c, 0x1b, 0x15, 0x82, 0x26, 0xb1, 0x73, 0xe4, 0x40, 0xd7, 0x4b, 0xdc, 0x78, 0xef, 0x2d, 0xba, 0x1e, 0x89, 0x87, 0x10, 0xb4, 0x23, 0xe1, 0x76, 0xd2, 0x45, 0xce, 0x59, 0xfd, 0x6a, 0xa8, 0x3f, 0x9b, 0xc, 0x2, 0x95, 0x31, 0xa6, 0x64, 0xf3, 0x57, 0xc0, 0x72, 0xe5, 0x41, 0xd6, 0x14, 0x83, 0x27, 0xb0, 0xbe, 0x29, 0x8d, 0x1a, 0xd8, 0x4f, 0xeb, 0x7c, 0xf7, 0x60, 0xc4, 0x53, 0x91, 0x6, 0xa2, 0x35, 0x3b, 0xac, 0x8, 0x9f, 0x5d, 0xca, 0x6e, 0xf9, 0x65, 0xf2, 0x56, 0xc1, 0x3, 0x94, 0x30, 0xa7, 0xa9, 0x3e, 0x9a, 0xd, 0xcf, 0x58, 0xfc, 0x6b, 0xe0, 0x77, 0xd3, 0x44, 0x86, 0x11, 0xb5, 0x22, 0x2c, 0xbb, 0x1f, 0x88, 0x4a, 0xdd, 0x79, 0xee},
+ {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b, 0x75, 0xed, 0x58, 0xc0, 0x2f, 0xb7, 0x2, 0x9a, 0xc1, 0x59, 0xec, 0x74, 0x9b, 0x3, 0xb6, 0x2e, 0xea, 0x72, 0xc7, 0x5f, 0xb0, 0x28, 0x9d, 0x5, 0x5e, 0xc6, 0x73, 0xeb, 0x4, 0x9c, 0x29, 0xb1, 0x9f, 0x7, 0xb2, 0x2a, 0xc5, 0x5d, 0xe8, 0x70, 0x2b, 0xb3, 0x6, 0x9e, 0x71, 0xe9, 0x5c, 0xc4, 0xc9, 0x51, 0xe4, 0x7c, 0x93, 0xb, 0xbe, 0x26, 0x7d, 0xe5, 0x50, 0xc8, 0x27, 0xbf, 0xa, 0x92, 0xbc, 0x24, 0x91, 0x9, 0xe6, 0x7e, 0xcb, 0x53, 0x8, 0x90, 0x25, 0xbd, 0x52, 0xca, 0x7f, 0xe7, 0x23, 0xbb, 0xe, 0x96, 0x79, 0xe1, 0x54, 0xcc, 0x97, 0xf, 0xba, 0x22, 0xcd, 0x55, 0xe0, 0x78, 0x56, 0xce, 0x7b, 0xe3, 0xc, 0x94, 0x21, 0xb9, 0xe2, 0x7a, 0xcf, 0x57, 0xb8, 0x20, 0x95, 0xd, 0x8f, 0x17, 0xa2, 0x3a, 0xd5, 0x4d, 0xf8, 0x60, 0x3b, 0xa3, 0x16, 0x8e, 0x61, 0xf9, 0x4c, 0xd4, 0xfa, 0x62, 0xd7, 0x4f, 0xa0, 0x38, 0x8d, 0x15, 0x4e, 0xd6, 0x63, 0xfb, 0x14, 0x8c, 0x39, 0xa1, 0x65, 0xfd, 0x48, 0xd0, 0x3f, 0xa7, 0x12, 0x8a, 0xd1, 0x49, 0xfc, 0x64, 0x8b, 0x13, 0xa6, 0x3e, 0x10, 0x88, 0x3d, 0xa5, 0x4a, 0xd2, 0x67, 0xff, 0xa4, 0x3c, 0x89, 0x11, 0xfe, 0x66, 0xd3, 0x4b, 0x46, 0xde, 0x6b, 0xf3, 0x1c, 0x84, 0x31, 0xa9, 0xf2, 0x6a, 0xdf, 0x47, 0xa8, 0x30, 0x85, 0x1d, 0x33, 0xab, 0x1e, 0x86, 0x69, 0xf1, 0x44, 0xdc, 0x87, 0x1f, 0xaa, 0x32, 0xdd, 0x45, 0xf0, 0x68, 0xac, 0x34, 0x81, 0x19, 0xf6, 0x6e, 0xdb, 0x43, 0x18, 0x80, 0x35, 0xad, 0x42, 0xda, 0x6f, 0xf7, 0xd9, 0x41, 0xf4, 0x6c, 0x83, 0x1b, 0xae, 0x36, 0x6d, 0xf5, 0x40, 0xd8, 0x37, 0xaf, 0x1a, 0x82},
+ {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54, 0x65, 0xfc, 0x4a, 0xd3, 0x3b, 0xa2, 0x14, 0x8d, 0xd9, 0x40, 0xf6, 0x6f, 0x87, 0x1e, 0xa8, 0x31, 0xca, 0x53, 0xe5, 0x7c, 0x94, 0xd, 0xbb, 0x22, 0x76, 0xef, 0x59, 0xc0, 0x28, 0xb1, 0x7, 0x9e, 0xaf, 0x36, 0x80, 0x19, 0xf1, 0x68, 0xde, 0x47, 0x13, 0x8a, 0x3c, 0xa5, 0x4d, 0xd4, 0x62, 0xfb, 0x89, 0x10, 0xa6, 0x3f, 0xd7, 0x4e, 0xf8, 0x61, 0x35, 0xac, 0x1a, 0x83, 0x6b, 0xf2, 0x44, 0xdd, 0xec, 0x75, 0xc3, 0x5a, 0xb2, 0x2b, 0x9d, 0x4, 0x50, 0xc9, 0x7f, 0xe6, 0xe, 0x97, 0x21, 0xb8, 0x43, 0xda, 0x6c, 0xf5, 0x1d, 0x84, 0x32, 0xab, 0xff, 0x66, 0xd0, 0x49, 0xa1, 0x38, 0x8e, 0x17, 0x26, 0xbf, 0x9, 0x90, 0x78, 0xe1, 0x57, 0xce, 0x9a, 0x3, 0xb5, 0x2c, 0xc4, 0x5d, 0xeb, 0x72, 0xf, 0x96, 0x20, 0xb9, 0x51, 0xc8, 0x7e, 0xe7, 0xb3, 0x2a, 0x9c, 0x5, 0xed, 0x74, 0xc2, 0x5b, 0x6a, 0xf3, 0x45, 0xdc, 0x34, 0xad, 0x1b, 0x82, 0xd6, 0x4f, 0xf9, 0x60, 0x88, 0x11, 0xa7, 0x3e, 0xc5, 0x5c, 0xea, 0x73, 0x9b, 0x2, 0xb4, 0x2d, 0x79, 0xe0, 0x56, 0xcf, 0x27, 0xbe, 0x8, 0x91, 0xa0, 0x39, 0x8f, 0x16, 0xfe, 0x67, 0xd1, 0x48, 0x1c, 0x85, 0x33, 0xaa, 0x42, 0xdb, 0x6d, 0xf4, 0x86, 0x1f, 0xa9, 0x30, 0xd8, 0x41, 0xf7, 0x6e, 0x3a, 0xa3, 0x15, 0x8c, 0x64, 0xfd, 0x4b, 0xd2, 0xe3, 0x7a, 0xcc, 0x55, 0xbd, 0x24, 0x92, 0xb, 0x5f, 0xc6, 0x70, 0xe9, 0x1, 0x98, 0x2e, 0xb7, 0x4c, 0xd5, 0x63, 0xfa, 0x12, 0x8b, 0x3d, 0xa4, 0xf0, 0x69, 0xdf, 0x46, 0xae, 0x37, 0x81, 0x18, 0x29, 0xb0, 0x6, 0x9f, 0x77, 0xee, 0x58, 0xc1, 0x95, 0xc, 0xba, 0x23, 0xcb, 0x52, 0xe4, 0x7d},
+ {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45, 0x55, 0xcf, 0x7c, 0xe6, 0x7, 0x9d, 0x2e, 0xb4, 0xf1, 0x6b, 0xd8, 0x42, 0xa3, 0x39, 0x8a, 0x10, 0xaa, 0x30, 0x83, 0x19, 0xf8, 0x62, 0xd1, 0x4b, 0xe, 0x94, 0x27, 0xbd, 0x5c, 0xc6, 0x75, 0xef, 0xff, 0x65, 0xd6, 0x4c, 0xad, 0x37, 0x84, 0x1e, 0x5b, 0xc1, 0x72, 0xe8, 0x9, 0x93, 0x20, 0xba, 0x49, 0xd3, 0x60, 0xfa, 0x1b, 0x81, 0x32, 0xa8, 0xed, 0x77, 0xc4, 0x5e, 0xbf, 0x25, 0x96, 0xc, 0x1c, 0x86, 0x35, 0xaf, 0x4e, 0xd4, 0x67, 0xfd, 0xb8, 0x22, 0x91, 0xb, 0xea, 0x70, 0xc3, 0x59, 0xe3, 0x79, 0xca, 0x50, 0xb1, 0x2b, 0x98, 0x2, 0x47, 0xdd, 0x6e, 0xf4, 0x15, 0x8f, 0x3c, 0xa6, 0xb6, 0x2c, 0x9f, 0x5, 0xe4, 0x7e, 0xcd, 0x57, 0x12, 0x88, 0x3b, 0xa1, 0x40, 0xda, 0x69, 0xf3, 0x92, 0x8, 0xbb, 0x21, 0xc0, 0x5a, 0xe9, 0x73, 0x36, 0xac, 0x1f, 0x85, 0x64, 0xfe, 0x4d, 0xd7, 0xc7, 0x5d, 0xee, 0x74, 0x95, 0xf, 0xbc, 0x26, 0x63, 0xf9, 0x4a, 0xd0, 0x31, 0xab, 0x18, 0x82, 0x38, 0xa2, 0x11, 0x8b, 0x6a, 0xf0, 0x43, 0xd9, 0x9c, 0x6, 0xb5, 0x2f, 0xce, 0x54, 0xe7, 0x7d, 0x6d, 0xf7, 0x44, 0xde, 0x3f, 0xa5, 0x16, 0x8c, 0xc9, 0x53, 0xe0, 0x7a, 0x9b, 0x1, 0xb2, 0x28, 0xdb, 0x41, 0xf2, 0x68, 0x89, 0x13, 0xa0, 0x3a, 0x7f, 0xe5, 0x56, 0xcc, 0x2d, 0xb7, 0x4, 0x9e, 0x8e, 0x14, 0xa7, 0x3d, 0xdc, 0x46, 0xf5, 0x6f, 0x2a, 0xb0, 0x3, 0x99, 0x78, 0xe2, 0x51, 0xcb, 0x71, 0xeb, 0x58, 0xc2, 0x23, 0xb9, 0xa, 0x90, 0xd5, 0x4f, 0xfc, 0x66, 0x87, 0x1d, 0xae, 0x34, 0x24, 0xbe, 0xd, 0x97, 0x76, 0xec, 0x5f, 0xc5, 0x80, 0x1a, 0xa9, 0x33, 0xd2, 0x48, 0xfb, 0x61},
+ {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a, 0x45, 0xde, 0x6e, 0xf5, 0x13, 0x88, 0x38, 0xa3, 0xe9, 0x72, 0xc2, 0x59, 0xbf, 0x24, 0x94, 0xf, 0x8a, 0x11, 0xa1, 0x3a, 0xdc, 0x47, 0xf7, 0x6c, 0x26, 0xbd, 0xd, 0x96, 0x70, 0xeb, 0x5b, 0xc0, 0xcf, 0x54, 0xe4, 0x7f, 0x99, 0x2, 0xb2, 0x29, 0x63, 0xf8, 0x48, 0xd3, 0x35, 0xae, 0x1e, 0x85, 0x9, 0x92, 0x22, 0xb9, 0x5f, 0xc4, 0x74, 0xef, 0xa5, 0x3e, 0x8e, 0x15, 0xf3, 0x68, 0xd8, 0x43, 0x4c, 0xd7, 0x67, 0xfc, 0x1a, 0x81, 0x31, 0xaa, 0xe0, 0x7b, 0xcb, 0x50, 0xb6, 0x2d, 0x9d, 0x6, 0x83, 0x18, 0xa8, 0x33, 0xd5, 0x4e, 0xfe, 0x65, 0x2f, 0xb4, 0x4, 0x9f, 0x79, 0xe2, 0x52, 0xc9, 0xc6, 0x5d, 0xed, 0x76, 0x90, 0xb, 0xbb, 0x20, 0x6a, 0xf1, 0x41, 0xda, 0x3c, 0xa7, 0x17, 0x8c, 0x12, 0x89, 0x39, 0xa2, 0x44, 0xdf, 0x6f, 0xf4, 0xbe, 0x25, 0x95, 0xe, 0xe8, 0x73, 0xc3, 0x58, 0x57, 0xcc, 0x7c, 0xe7, 0x1, 0x9a, 0x2a, 0xb1, 0xfb, 0x60, 0xd0, 0x4b, 0xad, 0x36, 0x86, 0x1d, 0x98, 0x3, 0xb3, 0x28, 0xce, 0x55, 0xe5, 0x7e, 0x34, 0xaf, 0x1f, 0x84, 0x62, 0xf9, 0x49, 0xd2, 0xdd, 0x46, 0xf6, 0x6d, 0x8b, 0x10, 0xa0, 0x3b, 0x71, 0xea, 0x5a, 0xc1, 0x27, 0xbc, 0xc, 0x97, 0x1b, 0x80, 0x30, 0xab, 0x4d, 0xd6, 0x66, 0xfd, 0xb7, 0x2c, 0x9c, 0x7, 0xe1, 0x7a, 0xca, 0x51, 0x5e, 0xc5, 0x75, 0xee, 0x8, 0x93, 0x23, 0xb8, 0xf2, 0x69, 0xd9, 0x42, 0xa4, 0x3f, 0x8f, 0x14, 0x91, 0xa, 0xba, 0x21, 0xc7, 0x5c, 0xec, 0x77, 0x3d, 0xa6, 0x16, 0x8d, 0x6b, 0xf0, 0x40, 0xdb, 0xd4, 0x4f, 0xff, 0x64, 0x82, 0x19, 0xa9, 0x32, 0x78, 0xe3, 0x53, 0xc8, 0x2e, 0xb5, 0x5, 0x9e},
+ {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67, 0x35, 0xa9, 0x10, 0x8c, 0x7f, 0xe3, 0x5a, 0xc6, 0xa1, 0x3d, 0x84, 0x18, 0xeb, 0x77, 0xce, 0x52, 0x6a, 0xf6, 0x4f, 0xd3, 0x20, 0xbc, 0x5, 0x99, 0xfe, 0x62, 0xdb, 0x47, 0xb4, 0x28, 0x91, 0xd, 0x5f, 0xc3, 0x7a, 0xe6, 0x15, 0x89, 0x30, 0xac, 0xcb, 0x57, 0xee, 0x72, 0x81, 0x1d, 0xa4, 0x38, 0xd4, 0x48, 0xf1, 0x6d, 0x9e, 0x2, 0xbb, 0x27, 0x40, 0xdc, 0x65, 0xf9, 0xa, 0x96, 0x2f, 0xb3, 0xe1, 0x7d, 0xc4, 0x58, 0xab, 0x37, 0x8e, 0x12, 0x75, 0xe9, 0x50, 0xcc, 0x3f, 0xa3, 0x1a, 0x86, 0xbe, 0x22, 0x9b, 0x7, 0xf4, 0x68, 0xd1, 0x4d, 0x2a, 0xb6, 0xf, 0x93, 0x60, 0xfc, 0x45, 0xd9, 0x8b, 0x17, 0xae, 0x32, 0xc1, 0x5d, 0xe4, 0x78, 0x1f, 0x83, 0x3a, 0xa6, 0x55, 0xc9, 0x70, 0xec, 0xb5, 0x29, 0x90, 0xc, 0xff, 0x63, 0xda, 0x46, 0x21, 0xbd, 0x4, 0x98, 0x6b, 0xf7, 0x4e, 0xd2, 0x80, 0x1c, 0xa5, 0x39, 0xca, 0x56, 0xef, 0x73, 0x14, 0x88, 0x31, 0xad, 0x5e, 0xc2, 0x7b, 0xe7, 0xdf, 0x43, 0xfa, 0x66, 0x95, 0x9, 0xb0, 0x2c, 0x4b, 0xd7, 0x6e, 0xf2, 0x1, 0x9d, 0x24, 0xb8, 0xea, 0x76, 0xcf, 0x53, 0xa0, 0x3c, 0x85, 0x19, 0x7e, 0xe2, 0x5b, 0xc7, 0x34, 0xa8, 0x11, 0x8d, 0x61, 0xfd, 0x44, 0xd8, 0x2b, 0xb7, 0xe, 0x92, 0xf5, 0x69, 0xd0, 0x4c, 0xbf, 0x23, 0x9a, 0x6, 0x54, 0xc8, 0x71, 0xed, 0x1e, 0x82, 0x3b, 0xa7, 0xc0, 0x5c, 0xe5, 0x79, 0x8a, 0x16, 0xaf, 0x33, 0xb, 0x97, 0x2e, 0xb2, 0x41, 0xdd, 0x64, 0xf8, 0x9f, 0x3, 0xba, 0x26, 0xd5, 0x49, 0xf0, 0x6c, 0x3e, 0xa2, 0x1b, 0x87, 0x74, 0xe8, 0x51, 0xcd, 0xaa, 0x36, 0x8f, 0x13, 0xe0, 0x7c, 0xc5, 0x59},
+ {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68, 0x25, 0xb8, 0x2, 0x9f, 0x6b, 0xf6, 0x4c, 0xd1, 0xb9, 0x24, 0x9e, 0x3, 0xf7, 0x6a, 0xd0, 0x4d, 0x4a, 0xd7, 0x6d, 0xf0, 0x4, 0x99, 0x23, 0xbe, 0xd6, 0x4b, 0xf1, 0x6c, 0x98, 0x5, 0xbf, 0x22, 0x6f, 0xf2, 0x48, 0xd5, 0x21, 0xbc, 0x6, 0x9b, 0xf3, 0x6e, 0xd4, 0x49, 0xbd, 0x20, 0x9a, 0x7, 0x94, 0x9, 0xb3, 0x2e, 0xda, 0x47, 0xfd, 0x60, 0x8, 0x95, 0x2f, 0xb2, 0x46, 0xdb, 0x61, 0xfc, 0xb1, 0x2c, 0x96, 0xb, 0xff, 0x62, 0xd8, 0x45, 0x2d, 0xb0, 0xa, 0x97, 0x63, 0xfe, 0x44, 0xd9, 0xde, 0x43, 0xf9, 0x64, 0x90, 0xd, 0xb7, 0x2a, 0x42, 0xdf, 0x65, 0xf8, 0xc, 0x91, 0x2b, 0xb6, 0xfb, 0x66, 0xdc, 0x41, 0xb5, 0x28, 0x92, 0xf, 0x67, 0xfa, 0x40, 0xdd, 0x29, 0xb4, 0xe, 0x93, 0x35, 0xa8, 0x12, 0x8f, 0x7b, 0xe6, 0x5c, 0xc1, 0xa9, 0x34, 0x8e, 0x13, 0xe7, 0x7a, 0xc0, 0x5d, 0x10, 0x8d, 0x37, 0xaa, 0x5e, 0xc3, 0x79, 0xe4, 0x8c, 0x11, 0xab, 0x36, 0xc2, 0x5f, 0xe5, 0x78, 0x7f, 0xe2, 0x58, 0xc5, 0x31, 0xac, 0x16, 0x8b, 0xe3, 0x7e, 0xc4, 0x59, 0xad, 0x30, 0x8a, 0x17, 0x5a, 0xc7, 0x7d, 0xe0, 0x14, 0x89, 0x33, 0xae, 0xc6, 0x5b, 0xe1, 0x7c, 0x88, 0x15, 0xaf, 0x32, 0xa1, 0x3c, 0x86, 0x1b, 0xef, 0x72, 0xc8, 0x55, 0x3d, 0xa0, 0x1a, 0x87, 0x73, 0xee, 0x54, 0xc9, 0x84, 0x19, 0xa3, 0x3e, 0xca, 0x57, 0xed, 0x70, 0x18, 0x85, 0x3f, 0xa2, 0x56, 0xcb, 0x71, 0xec, 0xeb, 0x76, 0xcc, 0x51, 0xa5, 0x38, 0x82, 0x1f, 0x77, 0xea, 0x50, 0xcd, 0x39, 0xa4, 0x1e, 0x83, 0xce, 0x53, 0xe9, 0x74, 0x80, 0x1d, 0xa7, 0x3a, 0x52, 0xcf, 0x75, 0xe8, 0x1c, 0x81, 0x3b, 0xa6},
+ {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79, 0x15, 0x8b, 0x34, 0xaa, 0x57, 0xc9, 0x76, 0xe8, 0x91, 0xf, 0xb0, 0x2e, 0xd3, 0x4d, 0xf2, 0x6c, 0x2a, 0xb4, 0xb, 0x95, 0x68, 0xf6, 0x49, 0xd7, 0xae, 0x30, 0x8f, 0x11, 0xec, 0x72, 0xcd, 0x53, 0x3f, 0xa1, 0x1e, 0x80, 0x7d, 0xe3, 0x5c, 0xc2, 0xbb, 0x25, 0x9a, 0x4, 0xf9, 0x67, 0xd8, 0x46, 0x54, 0xca, 0x75, 0xeb, 0x16, 0x88, 0x37, 0xa9, 0xd0, 0x4e, 0xf1, 0x6f, 0x92, 0xc, 0xb3, 0x2d, 0x41, 0xdf, 0x60, 0xfe, 0x3, 0x9d, 0x22, 0xbc, 0xc5, 0x5b, 0xe4, 0x7a, 0x87, 0x19, 0xa6, 0x38, 0x7e, 0xe0, 0x5f, 0xc1, 0x3c, 0xa2, 0x1d, 0x83, 0xfa, 0x64, 0xdb, 0x45, 0xb8, 0x26, 0x99, 0x7, 0x6b, 0xf5, 0x4a, 0xd4, 0x29, 0xb7, 0x8, 0x96, 0xef, 0x71, 0xce, 0x50, 0xad, 0x33, 0x8c, 0x12, 0xa8, 0x36, 0x89, 0x17, 0xea, 0x74, 0xcb, 0x55, 0x2c, 0xb2, 0xd, 0x93, 0x6e, 0xf0, 0x4f, 0xd1, 0xbd, 0x23, 0x9c, 0x2, 0xff, 0x61, 0xde, 0x40, 0x39, 0xa7, 0x18, 0x86, 0x7b, 0xe5, 0x5a, 0xc4, 0x82, 0x1c, 0xa3, 0x3d, 0xc0, 0x5e, 0xe1, 0x7f, 0x6, 0x98, 0x27, 0xb9, 0x44, 0xda, 0x65, 0xfb, 0x97, 0x9, 0xb6, 0x28, 0xd5, 0x4b, 0xf4, 0x6a, 0x13, 0x8d, 0x32, 0xac, 0x51, 0xcf, 0x70, 0xee, 0xfc, 0x62, 0xdd, 0x43, 0xbe, 0x20, 0x9f, 0x1, 0x78, 0xe6, 0x59, 0xc7, 0x3a, 0xa4, 0x1b, 0x85, 0xe9, 0x77, 0xc8, 0x56, 0xab, 0x35, 0x8a, 0x14, 0x6d, 0xf3, 0x4c, 0xd2, 0x2f, 0xb1, 0xe, 0x90, 0xd6, 0x48, 0xf7, 0x69, 0x94, 0xa, 0xb5, 0x2b, 0x52, 0xcc, 0x73, 0xed, 0x10, 0x8e, 0x31, 0xaf, 0xc3, 0x5d, 0xe2, 0x7c, 0x81, 0x1f, 0xa0, 0x3e, 0x47, 0xd9, 0x66, 0xf8, 0x5, 0x9b, 0x24, 0xba},
+ {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76, 0x5, 0x9a, 0x26, 0xb9, 0x43, 0xdc, 0x60, 0xff, 0x89, 0x16, 0xaa, 0x35, 0xcf, 0x50, 0xec, 0x73, 0xa, 0x95, 0x29, 0xb6, 0x4c, 0xd3, 0x6f, 0xf0, 0x86, 0x19, 0xa5, 0x3a, 0xc0, 0x5f, 0xe3, 0x7c, 0xf, 0x90, 0x2c, 0xb3, 0x49, 0xd6, 0x6a, 0xf5, 0x83, 0x1c, 0xa0, 0x3f, 0xc5, 0x5a, 0xe6, 0x79, 0x14, 0x8b, 0x37, 0xa8, 0x52, 0xcd, 0x71, 0xee, 0x98, 0x7, 0xbb, 0x24, 0xde, 0x41, 0xfd, 0x62, 0x11, 0x8e, 0x32, 0xad, 0x57, 0xc8, 0x74, 0xeb, 0x9d, 0x2, 0xbe, 0x21, 0xdb, 0x44, 0xf8, 0x67, 0x1e, 0x81, 0x3d, 0xa2, 0x58, 0xc7, 0x7b, 0xe4, 0x92, 0xd, 0xb1, 0x2e, 0xd4, 0x4b, 0xf7, 0x68, 0x1b, 0x84, 0x38, 0xa7, 0x5d, 0xc2, 0x7e, 0xe1, 0x97, 0x8, 0xb4, 0x2b, 0xd1, 0x4e, 0xf2, 0x6d, 0x28, 0xb7, 0xb, 0x94, 0x6e, 0xf1, 0x4d, 0xd2, 0xa4, 0x3b, 0x87, 0x18, 0xe2, 0x7d, 0xc1, 0x5e, 0x2d, 0xb2, 0xe, 0x91, 0x6b, 0xf4, 0x48, 0xd7, 0xa1, 0x3e, 0x82, 0x1d, 0xe7, 0x78, 0xc4, 0x5b, 0x22, 0xbd, 0x1, 0x9e, 0x64, 0xfb, 0x47, 0xd8, 0xae, 0x31, 0x8d, 0x12, 0xe8, 0x77, 0xcb, 0x54, 0x27, 0xb8, 0x4, 0x9b, 0x61, 0xfe, 0x42, 0xdd, 0xab, 0x34, 0x88, 0x17, 0xed, 0x72, 0xce, 0x51, 0x3c, 0xa3, 0x1f, 0x80, 0x7a, 0xe5, 0x59, 0xc6, 0xb0, 0x2f, 0x93, 0xc, 0xf6, 0x69, 0xd5, 0x4a, 0x39, 0xa6, 0x1a, 0x85, 0x7f, 0xe0, 0x5c, 0xc3, 0xb5, 0x2a, 0x96, 0x9, 0xf3, 0x6c, 0xd0, 0x4f, 0x36, 0xa9, 0x15, 0x8a, 0x70, 0xef, 0x53, 0xcc, 0xba, 0x25, 0x99, 0x6, 0xfc, 0x63, 0xdf, 0x40, 0x33, 0xac, 0x10, 0x8f, 0x75, 0xea, 0x56, 0xc9, 0xbf, 0x20, 0x9c, 0x3, 0xf9, 0x66, 0xda, 0x45},
+ {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e, 0xd2, 0x72, 0x8f, 0x2f, 0x68, 0xc8, 0x35, 0x95, 0xbb, 0x1b, 0xe6, 0x46, 0x1, 0xa1, 0x5c, 0xfc, 0xb9, 0x19, 0xe4, 0x44, 0x3, 0xa3, 0x5e, 0xfe, 0xd0, 0x70, 0x8d, 0x2d, 0x6a, 0xca, 0x37, 0x97, 0x6b, 0xcb, 0x36, 0x96, 0xd1, 0x71, 0x8c, 0x2c, 0x2, 0xa2, 0x5f, 0xff, 0xb8, 0x18, 0xe5, 0x45, 0x6f, 0xcf, 0x32, 0x92, 0xd5, 0x75, 0x88, 0x28, 0x6, 0xa6, 0x5b, 0xfb, 0xbc, 0x1c, 0xe1, 0x41, 0xbd, 0x1d, 0xe0, 0x40, 0x7, 0xa7, 0x5a, 0xfa, 0xd4, 0x74, 0x89, 0x29, 0x6e, 0xce, 0x33, 0x93, 0xd6, 0x76, 0x8b, 0x2b, 0x6c, 0xcc, 0x31, 0x91, 0xbf, 0x1f, 0xe2, 0x42, 0x5, 0xa5, 0x58, 0xf8, 0x4, 0xa4, 0x59, 0xf9, 0xbe, 0x1e, 0xe3, 0x43, 0x6d, 0xcd, 0x30, 0x90, 0xd7, 0x77, 0x8a, 0x2a, 0xde, 0x7e, 0x83, 0x23, 0x64, 0xc4, 0x39, 0x99, 0xb7, 0x17, 0xea, 0x4a, 0xd, 0xad, 0x50, 0xf0, 0xc, 0xac, 0x51, 0xf1, 0xb6, 0x16, 0xeb, 0x4b, 0x65, 0xc5, 0x38, 0x98, 0xdf, 0x7f, 0x82, 0x22, 0x67, 0xc7, 0x3a, 0x9a, 0xdd, 0x7d, 0x80, 0x20, 0xe, 0xae, 0x53, 0xf3, 0xb4, 0x14, 0xe9, 0x49, 0xb5, 0x15, 0xe8, 0x48, 0xf, 0xaf, 0x52, 0xf2, 0xdc, 0x7c, 0x81, 0x21, 0x66, 0xc6, 0x3b, 0x9b, 0xb1, 0x11, 0xec, 0x4c, 0xb, 0xab, 0x56, 0xf6, 0xd8, 0x78, 0x85, 0x25, 0x62, 0xc2, 0x3f, 0x9f, 0x63, 0xc3, 0x3e, 0x9e, 0xd9, 0x79, 0x84, 0x24, 0xa, 0xaa, 0x57, 0xf7, 0xb0, 0x10, 0xed, 0x4d, 0x8, 0xa8, 0x55, 0xf5, 0xb2, 0x12, 0xef, 0x4f, 0x61, 0xc1, 0x3c, 0x9c, 0xdb, 0x7b, 0x86, 0x26, 0xda, 0x7a, 0x87, 0x27, 0x60, 0xc0, 0x3d, 0x9d, 0xb3, 0x13, 0xee, 0x4e, 0x9, 0xa9, 0x54, 0xf4},
+ {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21, 0xc2, 0x63, 0x9d, 0x3c, 0x7c, 0xdd, 0x23, 0x82, 0xa3, 0x2, 0xfc, 0x5d, 0x1d, 0xbc, 0x42, 0xe3, 0x99, 0x38, 0xc6, 0x67, 0x27, 0x86, 0x78, 0xd9, 0xf8, 0x59, 0xa7, 0x6, 0x46, 0xe7, 0x19, 0xb8, 0x5b, 0xfa, 0x4, 0xa5, 0xe5, 0x44, 0xba, 0x1b, 0x3a, 0x9b, 0x65, 0xc4, 0x84, 0x25, 0xdb, 0x7a, 0x2f, 0x8e, 0x70, 0xd1, 0x91, 0x30, 0xce, 0x6f, 0x4e, 0xef, 0x11, 0xb0, 0xf0, 0x51, 0xaf, 0xe, 0xed, 0x4c, 0xb2, 0x13, 0x53, 0xf2, 0xc, 0xad, 0x8c, 0x2d, 0xd3, 0x72, 0x32, 0x93, 0x6d, 0xcc, 0xb6, 0x17, 0xe9, 0x48, 0x8, 0xa9, 0x57, 0xf6, 0xd7, 0x76, 0x88, 0x29, 0x69, 0xc8, 0x36, 0x97, 0x74, 0xd5, 0x2b, 0x8a, 0xca, 0x6b, 0x95, 0x34, 0x15, 0xb4, 0x4a, 0xeb, 0xab, 0xa, 0xf4, 0x55, 0x5e, 0xff, 0x1, 0xa0, 0xe0, 0x41, 0xbf, 0x1e, 0x3f, 0x9e, 0x60, 0xc1, 0x81, 0x20, 0xde, 0x7f, 0x9c, 0x3d, 0xc3, 0x62, 0x22, 0x83, 0x7d, 0xdc, 0xfd, 0x5c, 0xa2, 0x3, 0x43, 0xe2, 0x1c, 0xbd, 0xc7, 0x66, 0x98, 0x39, 0x79, 0xd8, 0x26, 0x87, 0xa6, 0x7, 0xf9, 0x58, 0x18, 0xb9, 0x47, 0xe6, 0x5, 0xa4, 0x5a, 0xfb, 0xbb, 0x1a, 0xe4, 0x45, 0x64, 0xc5, 0x3b, 0x9a, 0xda, 0x7b, 0x85, 0x24, 0x71, 0xd0, 0x2e, 0x8f, 0xcf, 0x6e, 0x90, 0x31, 0x10, 0xb1, 0x4f, 0xee, 0xae, 0xf, 0xf1, 0x50, 0xb3, 0x12, 0xec, 0x4d, 0xd, 0xac, 0x52, 0xf3, 0xd2, 0x73, 0x8d, 0x2c, 0x6c, 0xcd, 0x33, 0x92, 0xe8, 0x49, 0xb7, 0x16, 0x56, 0xf7, 0x9, 0xa8, 0x89, 0x28, 0xd6, 0x77, 0x37, 0x96, 0x68, 0xc9, 0x2a, 0x8b, 0x75, 0xd4, 0x94, 0x35, 0xcb, 0x6a, 0x4b, 0xea, 0x14, 0xb5, 0xf5, 0x54, 0xaa, 0xb},
+ {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30, 0xf2, 0x50, 0xab, 0x9, 0x40, 0xe2, 0x19, 0xbb, 0x8b, 0x29, 0xd2, 0x70, 0x39, 0x9b, 0x60, 0xc2, 0xf9, 0x5b, 0xa0, 0x2, 0x4b, 0xe9, 0x12, 0xb0, 0x80, 0x22, 0xd9, 0x7b, 0x32, 0x90, 0x6b, 0xc9, 0xb, 0xa9, 0x52, 0xf0, 0xb9, 0x1b, 0xe0, 0x42, 0x72, 0xd0, 0x2b, 0x89, 0xc0, 0x62, 0x99, 0x3b, 0xef, 0x4d, 0xb6, 0x14, 0x5d, 0xff, 0x4, 0xa6, 0x96, 0x34, 0xcf, 0x6d, 0x24, 0x86, 0x7d, 0xdf, 0x1d, 0xbf, 0x44, 0xe6, 0xaf, 0xd, 0xf6, 0x54, 0x64, 0xc6, 0x3d, 0x9f, 0xd6, 0x74, 0x8f, 0x2d, 0x16, 0xb4, 0x4f, 0xed, 0xa4, 0x6, 0xfd, 0x5f, 0x6f, 0xcd, 0x36, 0x94, 0xdd, 0x7f, 0x84, 0x26, 0xe4, 0x46, 0xbd, 0x1f, 0x56, 0xf4, 0xf, 0xad, 0x9d, 0x3f, 0xc4, 0x66, 0x2f, 0x8d, 0x76, 0xd4, 0xc3, 0x61, 0x9a, 0x38, 0x71, 0xd3, 0x28, 0x8a, 0xba, 0x18, 0xe3, 0x41, 0x8, 0xaa, 0x51, 0xf3, 0x31, 0x93, 0x68, 0xca, 0x83, 0x21, 0xda, 0x78, 0x48, 0xea, 0x11, 0xb3, 0xfa, 0x58, 0xa3, 0x1, 0x3a, 0x98, 0x63, 0xc1, 0x88, 0x2a, 0xd1, 0x73, 0x43, 0xe1, 0x1a, 0xb8, 0xf1, 0x53, 0xa8, 0xa, 0xc8, 0x6a, 0x91, 0x33, 0x7a, 0xd8, 0x23, 0x81, 0xb1, 0x13, 0xe8, 0x4a, 0x3, 0xa1, 0x5a, 0xf8, 0x2c, 0x8e, 0x75, 0xd7, 0x9e, 0x3c, 0xc7, 0x65, 0x55, 0xf7, 0xc, 0xae, 0xe7, 0x45, 0xbe, 0x1c, 0xde, 0x7c, 0x87, 0x25, 0x6c, 0xce, 0x35, 0x97, 0xa7, 0x5, 0xfe, 0x5c, 0x15, 0xb7, 0x4c, 0xee, 0xd5, 0x77, 0x8c, 0x2e, 0x67, 0xc5, 0x3e, 0x9c, 0xac, 0xe, 0xf5, 0x57, 0x1e, 0xbc, 0x47, 0xe5, 0x27, 0x85, 0x7e, 0xdc, 0x95, 0x37, 0xcc, 0x6e, 0x5e, 0xfc, 0x7, 0xa5, 0xec, 0x4e, 0xb5, 0x17},
+ {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f, 0xe2, 0x41, 0xb9, 0x1a, 0x54, 0xf7, 0xf, 0xac, 0x93, 0x30, 0xc8, 0x6b, 0x25, 0x86, 0x7e, 0xdd, 0xd9, 0x7a, 0x82, 0x21, 0x6f, 0xcc, 0x34, 0x97, 0xa8, 0xb, 0xf3, 0x50, 0x1e, 0xbd, 0x45, 0xe6, 0x3b, 0x98, 0x60, 0xc3, 0x8d, 0x2e, 0xd6, 0x75, 0x4a, 0xe9, 0x11, 0xb2, 0xfc, 0x5f, 0xa7, 0x4, 0xaf, 0xc, 0xf4, 0x57, 0x19, 0xba, 0x42, 0xe1, 0xde, 0x7d, 0x85, 0x26, 0x68, 0xcb, 0x33, 0x90, 0x4d, 0xee, 0x16, 0xb5, 0xfb, 0x58, 0xa0, 0x3, 0x3c, 0x9f, 0x67, 0xc4, 0x8a, 0x29, 0xd1, 0x72, 0x76, 0xd5, 0x2d, 0x8e, 0xc0, 0x63, 0x9b, 0x38, 0x7, 0xa4, 0x5c, 0xff, 0xb1, 0x12, 0xea, 0x49, 0x94, 0x37, 0xcf, 0x6c, 0x22, 0x81, 0x79, 0xda, 0xe5, 0x46, 0xbe, 0x1d, 0x53, 0xf0, 0x8, 0xab, 0x43, 0xe0, 0x18, 0xbb, 0xf5, 0x56, 0xae, 0xd, 0x32, 0x91, 0x69, 0xca, 0x84, 0x27, 0xdf, 0x7c, 0xa1, 0x2, 0xfa, 0x59, 0x17, 0xb4, 0x4c, 0xef, 0xd0, 0x73, 0x8b, 0x28, 0x66, 0xc5, 0x3d, 0x9e, 0x9a, 0x39, 0xc1, 0x62, 0x2c, 0x8f, 0x77, 0xd4, 0xeb, 0x48, 0xb0, 0x13, 0x5d, 0xfe, 0x6, 0xa5, 0x78, 0xdb, 0x23, 0x80, 0xce, 0x6d, 0x95, 0x36, 0x9, 0xaa, 0x52, 0xf1, 0xbf, 0x1c, 0xe4, 0x47, 0xec, 0x4f, 0xb7, 0x14, 0x5a, 0xf9, 0x1, 0xa2, 0x9d, 0x3e, 0xc6, 0x65, 0x2b, 0x88, 0x70, 0xd3, 0xe, 0xad, 0x55, 0xf6, 0xb8, 0x1b, 0xe3, 0x40, 0x7f, 0xdc, 0x24, 0x87, 0xc9, 0x6a, 0x92, 0x31, 0x35, 0x96, 0x6e, 0xcd, 0x83, 0x20, 0xd8, 0x7b, 0x44, 0xe7, 0x1f, 0xbc, 0xf2, 0x51, 0xa9, 0xa, 0xd7, 0x74, 0x8c, 0x2f, 0x61, 0xc2, 0x3a, 0x99, 0xa6, 0x5, 0xfd, 0x5e, 0x10, 0xb3, 0x4b, 0xe8},
+ {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12, 0x92, 0x36, 0xc7, 0x63, 0x38, 0x9c, 0x6d, 0xc9, 0xdb, 0x7f, 0x8e, 0x2a, 0x71, 0xd5, 0x24, 0x80, 0x39, 0x9d, 0x6c, 0xc8, 0x93, 0x37, 0xc6, 0x62, 0x70, 0xd4, 0x25, 0x81, 0xda, 0x7e, 0x8f, 0x2b, 0xab, 0xf, 0xfe, 0x5a, 0x1, 0xa5, 0x54, 0xf0, 0xe2, 0x46, 0xb7, 0x13, 0x48, 0xec, 0x1d, 0xb9, 0x72, 0xd6, 0x27, 0x83, 0xd8, 0x7c, 0x8d, 0x29, 0x3b, 0x9f, 0x6e, 0xca, 0x91, 0x35, 0xc4, 0x60, 0xe0, 0x44, 0xb5, 0x11, 0x4a, 0xee, 0x1f, 0xbb, 0xa9, 0xd, 0xfc, 0x58, 0x3, 0xa7, 0x56, 0xf2, 0x4b, 0xef, 0x1e, 0xba, 0xe1, 0x45, 0xb4, 0x10, 0x2, 0xa6, 0x57, 0xf3, 0xa8, 0xc, 0xfd, 0x59, 0xd9, 0x7d, 0x8c, 0x28, 0x73, 0xd7, 0x26, 0x82, 0x90, 0x34, 0xc5, 0x61, 0x3a, 0x9e, 0x6f, 0xcb, 0xe4, 0x40, 0xb1, 0x15, 0x4e, 0xea, 0x1b, 0xbf, 0xad, 0x9, 0xf8, 0x5c, 0x7, 0xa3, 0x52, 0xf6, 0x76, 0xd2, 0x23, 0x87, 0xdc, 0x78, 0x89, 0x2d, 0x3f, 0x9b, 0x6a, 0xce, 0x95, 0x31, 0xc0, 0x64, 0xdd, 0x79, 0x88, 0x2c, 0x77, 0xd3, 0x22, 0x86, 0x94, 0x30, 0xc1, 0x65, 0x3e, 0x9a, 0x6b, 0xcf, 0x4f, 0xeb, 0x1a, 0xbe, 0xe5, 0x41, 0xb0, 0x14, 0x6, 0xa2, 0x53, 0xf7, 0xac, 0x8, 0xf9, 0x5d, 0x96, 0x32, 0xc3, 0x67, 0x3c, 0x98, 0x69, 0xcd, 0xdf, 0x7b, 0x8a, 0x2e, 0x75, 0xd1, 0x20, 0x84, 0x4, 0xa0, 0x51, 0xf5, 0xae, 0xa, 0xfb, 0x5f, 0x4d, 0xe9, 0x18, 0xbc, 0xe7, 0x43, 0xb2, 0x16, 0xaf, 0xb, 0xfa, 0x5e, 0x5, 0xa1, 0x50, 0xf4, 0xe6, 0x42, 0xb3, 0x17, 0x4c, 0xe8, 0x19, 0xbd, 0x3d, 0x99, 0x68, 0xcc, 0x97, 0x33, 0xc2, 0x66, 0x74, 0xd0, 0x21, 0x85, 0xde, 0x7a, 0x8b, 0x2f},
+ {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d, 0x82, 0x27, 0xd5, 0x70, 0x2c, 0x89, 0x7b, 0xde, 0xc3, 0x66, 0x94, 0x31, 0x6d, 0xc8, 0x3a, 0x9f, 0x19, 0xbc, 0x4e, 0xeb, 0xb7, 0x12, 0xe0, 0x45, 0x58, 0xfd, 0xf, 0xaa, 0xf6, 0x53, 0xa1, 0x4, 0x9b, 0x3e, 0xcc, 0x69, 0x35, 0x90, 0x62, 0xc7, 0xda, 0x7f, 0x8d, 0x28, 0x74, 0xd1, 0x23, 0x86, 0x32, 0x97, 0x65, 0xc0, 0x9c, 0x39, 0xcb, 0x6e, 0x73, 0xd6, 0x24, 0x81, 0xdd, 0x78, 0x8a, 0x2f, 0xb0, 0x15, 0xe7, 0x42, 0x1e, 0xbb, 0x49, 0xec, 0xf1, 0x54, 0xa6, 0x3, 0x5f, 0xfa, 0x8, 0xad, 0x2b, 0x8e, 0x7c, 0xd9, 0x85, 0x20, 0xd2, 0x77, 0x6a, 0xcf, 0x3d, 0x98, 0xc4, 0x61, 0x93, 0x36, 0xa9, 0xc, 0xfe, 0x5b, 0x7, 0xa2, 0x50, 0xf5, 0xe8, 0x4d, 0xbf, 0x1a, 0x46, 0xe3, 0x11, 0xb4, 0x64, 0xc1, 0x33, 0x96, 0xca, 0x6f, 0x9d, 0x38, 0x25, 0x80, 0x72, 0xd7, 0x8b, 0x2e, 0xdc, 0x79, 0xe6, 0x43, 0xb1, 0x14, 0x48, 0xed, 0x1f, 0xba, 0xa7, 0x2, 0xf0, 0x55, 0x9, 0xac, 0x5e, 0xfb, 0x7d, 0xd8, 0x2a, 0x8f, 0xd3, 0x76, 0x84, 0x21, 0x3c, 0x99, 0x6b, 0xce, 0x92, 0x37, 0xc5, 0x60, 0xff, 0x5a, 0xa8, 0xd, 0x51, 0xf4, 0x6, 0xa3, 0xbe, 0x1b, 0xe9, 0x4c, 0x10, 0xb5, 0x47, 0xe2, 0x56, 0xf3, 0x1, 0xa4, 0xf8, 0x5d, 0xaf, 0xa, 0x17, 0xb2, 0x40, 0xe5, 0xb9, 0x1c, 0xee, 0x4b, 0xd4, 0x71, 0x83, 0x26, 0x7a, 0xdf, 0x2d, 0x88, 0x95, 0x30, 0xc2, 0x67, 0x3b, 0x9e, 0x6c, 0xc9, 0x4f, 0xea, 0x18, 0xbd, 0xe1, 0x44, 0xb6, 0x13, 0xe, 0xab, 0x59, 0xfc, 0xa0, 0x5, 0xf7, 0x52, 0xcd, 0x68, 0x9a, 0x3f, 0x63, 0xc6, 0x34, 0x91, 0x8c, 0x29, 0xdb, 0x7e, 0x22, 0x87, 0x75, 0xd0},
+ {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc, 0xb2, 0x14, 0xe3, 0x45, 0x10, 0xb6, 0x41, 0xe7, 0xeb, 0x4d, 0xba, 0x1c, 0x49, 0xef, 0x18, 0xbe, 0x79, 0xdf, 0x28, 0x8e, 0xdb, 0x7d, 0x8a, 0x2c, 0x20, 0x86, 0x71, 0xd7, 0x82, 0x24, 0xd3, 0x75, 0xcb, 0x6d, 0x9a, 0x3c, 0x69, 0xcf, 0x38, 0x9e, 0x92, 0x34, 0xc3, 0x65, 0x30, 0x96, 0x61, 0xc7, 0xf2, 0x54, 0xa3, 0x5, 0x50, 0xf6, 0x1, 0xa7, 0xab, 0xd, 0xfa, 0x5c, 0x9, 0xaf, 0x58, 0xfe, 0x40, 0xe6, 0x11, 0xb7, 0xe2, 0x44, 0xb3, 0x15, 0x19, 0xbf, 0x48, 0xee, 0xbb, 0x1d, 0xea, 0x4c, 0x8b, 0x2d, 0xda, 0x7c, 0x29, 0x8f, 0x78, 0xde, 0xd2, 0x74, 0x83, 0x25, 0x70, 0xd6, 0x21, 0x87, 0x39, 0x9f, 0x68, 0xce, 0x9b, 0x3d, 0xca, 0x6c, 0x60, 0xc6, 0x31, 0x97, 0xc2, 0x64, 0x93, 0x35, 0xf9, 0x5f, 0xa8, 0xe, 0x5b, 0xfd, 0xa, 0xac, 0xa0, 0x6, 0xf1, 0x57, 0x2, 0xa4, 0x53, 0xf5, 0x4b, 0xed, 0x1a, 0xbc, 0xe9, 0x4f, 0xb8, 0x1e, 0x12, 0xb4, 0x43, 0xe5, 0xb0, 0x16, 0xe1, 0x47, 0x80, 0x26, 0xd1, 0x77, 0x22, 0x84, 0x73, 0xd5, 0xd9, 0x7f, 0x88, 0x2e, 0x7b, 0xdd, 0x2a, 0x8c, 0x32, 0x94, 0x63, 0xc5, 0x90, 0x36, 0xc1, 0x67, 0x6b, 0xcd, 0x3a, 0x9c, 0xc9, 0x6f, 0x98, 0x3e, 0xb, 0xad, 0x5a, 0xfc, 0xa9, 0xf, 0xf8, 0x5e, 0x52, 0xf4, 0x3, 0xa5, 0xf0, 0x56, 0xa1, 0x7, 0xb9, 0x1f, 0xe8, 0x4e, 0x1b, 0xbd, 0x4a, 0xec, 0xe0, 0x46, 0xb1, 0x17, 0x42, 0xe4, 0x13, 0xb5, 0x72, 0xd4, 0x23, 0x85, 0xd0, 0x76, 0x81, 0x27, 0x2b, 0x8d, 0x7a, 0xdc, 0x89, 0x2f, 0xd8, 0x7e, 0xc0, 0x66, 0x91, 0x37, 0x62, 0xc4, 0x33, 0x95, 0x99, 0x3f, 0xc8, 0x6e, 0x3b, 0x9d, 0x6a, 0xcc},
+ {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3, 0xa2, 0x5, 0xf1, 0x56, 0x4, 0xa3, 0x57, 0xf0, 0xf3, 0x54, 0xa0, 0x7, 0x55, 0xf2, 0x6, 0xa1, 0x59, 0xfe, 0xa, 0xad, 0xff, 0x58, 0xac, 0xb, 0x8, 0xaf, 0x5b, 0xfc, 0xae, 0x9, 0xfd, 0x5a, 0xfb, 0x5c, 0xa8, 0xf, 0x5d, 0xfa, 0xe, 0xa9, 0xaa, 0xd, 0xf9, 0x5e, 0xc, 0xab, 0x5f, 0xf8, 0xb2, 0x15, 0xe1, 0x46, 0x14, 0xb3, 0x47, 0xe0, 0xe3, 0x44, 0xb0, 0x17, 0x45, 0xe2, 0x16, 0xb1, 0x10, 0xb7, 0x43, 0xe4, 0xb6, 0x11, 0xe5, 0x42, 0x41, 0xe6, 0x12, 0xb5, 0xe7, 0x40, 0xb4, 0x13, 0xeb, 0x4c, 0xb8, 0x1f, 0x4d, 0xea, 0x1e, 0xb9, 0xba, 0x1d, 0xe9, 0x4e, 0x1c, 0xbb, 0x4f, 0xe8, 0x49, 0xee, 0x1a, 0xbd, 0xef, 0x48, 0xbc, 0x1b, 0x18, 0xbf, 0x4b, 0xec, 0xbe, 0x19, 0xed, 0x4a, 0x79, 0xde, 0x2a, 0x8d, 0xdf, 0x78, 0x8c, 0x2b, 0x28, 0x8f, 0x7b, 0xdc, 0x8e, 0x29, 0xdd, 0x7a, 0xdb, 0x7c, 0x88, 0x2f, 0x7d, 0xda, 0x2e, 0x89, 0x8a, 0x2d, 0xd9, 0x7e, 0x2c, 0x8b, 0x7f, 0xd8, 0x20, 0x87, 0x73, 0xd4, 0x86, 0x21, 0xd5, 0x72, 0x71, 0xd6, 0x22, 0x85, 0xd7, 0x70, 0x84, 0x23, 0x82, 0x25, 0xd1, 0x76, 0x24, 0x83, 0x77, 0xd0, 0xd3, 0x74, 0x80, 0x27, 0x75, 0xd2, 0x26, 0x81, 0xcb, 0x6c, 0x98, 0x3f, 0x6d, 0xca, 0x3e, 0x99, 0x9a, 0x3d, 0xc9, 0x6e, 0x3c, 0x9b, 0x6f, 0xc8, 0x69, 0xce, 0x3a, 0x9d, 0xcf, 0x68, 0x9c, 0x3b, 0x38, 0x9f, 0x6b, 0xcc, 0x9e, 0x39, 0xcd, 0x6a, 0x92, 0x35, 0xc1, 0x66, 0x34, 0x93, 0x67, 0xc0, 0xc3, 0x64, 0x90, 0x37, 0x65, 0xc2, 0x36, 0x91, 0x30, 0x97, 0x63, 0xc4, 0x96, 0x31, 0xc5, 0x62, 0x61, 0xc6, 0x32, 0x95, 0xc7, 0x60, 0x94, 0x33},
+ {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56, 0x52, 0xfa, 0x1f, 0xb7, 0xc8, 0x60, 0x85, 0x2d, 0x7b, 0xd3, 0x36, 0x9e, 0xe1, 0x49, 0xac, 0x4, 0xa4, 0xc, 0xe9, 0x41, 0x3e, 0x96, 0x73, 0xdb, 0x8d, 0x25, 0xc0, 0x68, 0x17, 0xbf, 0x5a, 0xf2, 0xf6, 0x5e, 0xbb, 0x13, 0x6c, 0xc4, 0x21, 0x89, 0xdf, 0x77, 0x92, 0x3a, 0x45, 0xed, 0x8, 0xa0, 0x55, 0xfd, 0x18, 0xb0, 0xcf, 0x67, 0x82, 0x2a, 0x7c, 0xd4, 0x31, 0x99, 0xe6, 0x4e, 0xab, 0x3, 0x7, 0xaf, 0x4a, 0xe2, 0x9d, 0x35, 0xd0, 0x78, 0x2e, 0x86, 0x63, 0xcb, 0xb4, 0x1c, 0xf9, 0x51, 0xf1, 0x59, 0xbc, 0x14, 0x6b, 0xc3, 0x26, 0x8e, 0xd8, 0x70, 0x95, 0x3d, 0x42, 0xea, 0xf, 0xa7, 0xa3, 0xb, 0xee, 0x46, 0x39, 0x91, 0x74, 0xdc, 0x8a, 0x22, 0xc7, 0x6f, 0x10, 0xb8, 0x5d, 0xf5, 0xaa, 0x2, 0xe7, 0x4f, 0x30, 0x98, 0x7d, 0xd5, 0x83, 0x2b, 0xce, 0x66, 0x19, 0xb1, 0x54, 0xfc, 0xf8, 0x50, 0xb5, 0x1d, 0x62, 0xca, 0x2f, 0x87, 0xd1, 0x79, 0x9c, 0x34, 0x4b, 0xe3, 0x6, 0xae, 0xe, 0xa6, 0x43, 0xeb, 0x94, 0x3c, 0xd9, 0x71, 0x27, 0x8f, 0x6a, 0xc2, 0xbd, 0x15, 0xf0, 0x58, 0x5c, 0xf4, 0x11, 0xb9, 0xc6, 0x6e, 0x8b, 0x23, 0x75, 0xdd, 0x38, 0x90, 0xef, 0x47, 0xa2, 0xa, 0xff, 0x57, 0xb2, 0x1a, 0x65, 0xcd, 0x28, 0x80, 0xd6, 0x7e, 0x9b, 0x33, 0x4c, 0xe4, 0x1, 0xa9, 0xad, 0x5, 0xe0, 0x48, 0x37, 0x9f, 0x7a, 0xd2, 0x84, 0x2c, 0xc9, 0x61, 0x1e, 0xb6, 0x53, 0xfb, 0x5b, 0xf3, 0x16, 0xbe, 0xc1, 0x69, 0x8c, 0x24, 0x72, 0xda, 0x3f, 0x97, 0xe8, 0x40, 0xa5, 0xd, 0x9, 0xa1, 0x44, 0xec, 0x93, 0x3b, 0xde, 0x76, 0x20, 0x88, 0x6d, 0xc5, 0xba, 0x12, 0xf7, 0x5f},
+ {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59, 0x42, 0xeb, 0xd, 0xa4, 0xdc, 0x75, 0x93, 0x3a, 0x63, 0xca, 0x2c, 0x85, 0xfd, 0x54, 0xb2, 0x1b, 0x84, 0x2d, 0xcb, 0x62, 0x1a, 0xb3, 0x55, 0xfc, 0xa5, 0xc, 0xea, 0x43, 0x3b, 0x92, 0x74, 0xdd, 0xc6, 0x6f, 0x89, 0x20, 0x58, 0xf1, 0x17, 0xbe, 0xe7, 0x4e, 0xa8, 0x1, 0x79, 0xd0, 0x36, 0x9f, 0x15, 0xbc, 0x5a, 0xf3, 0x8b, 0x22, 0xc4, 0x6d, 0x34, 0x9d, 0x7b, 0xd2, 0xaa, 0x3, 0xe5, 0x4c, 0x57, 0xfe, 0x18, 0xb1, 0xc9, 0x60, 0x86, 0x2f, 0x76, 0xdf, 0x39, 0x90, 0xe8, 0x41, 0xa7, 0xe, 0x91, 0x38, 0xde, 0x77, 0xf, 0xa6, 0x40, 0xe9, 0xb0, 0x19, 0xff, 0x56, 0x2e, 0x87, 0x61, 0xc8, 0xd3, 0x7a, 0x9c, 0x35, 0x4d, 0xe4, 0x2, 0xab, 0xf2, 0x5b, 0xbd, 0x14, 0x6c, 0xc5, 0x23, 0x8a, 0x2a, 0x83, 0x65, 0xcc, 0xb4, 0x1d, 0xfb, 0x52, 0xb, 0xa2, 0x44, 0xed, 0x95, 0x3c, 0xda, 0x73, 0x68, 0xc1, 0x27, 0x8e, 0xf6, 0x5f, 0xb9, 0x10, 0x49, 0xe0, 0x6, 0xaf, 0xd7, 0x7e, 0x98, 0x31, 0xae, 0x7, 0xe1, 0x48, 0x30, 0x99, 0x7f, 0xd6, 0x8f, 0x26, 0xc0, 0x69, 0x11, 0xb8, 0x5e, 0xf7, 0xec, 0x45, 0xa3, 0xa, 0x72, 0xdb, 0x3d, 0x94, 0xcd, 0x64, 0x82, 0x2b, 0x53, 0xfa, 0x1c, 0xb5, 0x3f, 0x96, 0x70, 0xd9, 0xa1, 0x8, 0xee, 0x47, 0x1e, 0xb7, 0x51, 0xf8, 0x80, 0x29, 0xcf, 0x66, 0x7d, 0xd4, 0x32, 0x9b, 0xe3, 0x4a, 0xac, 0x5, 0x5c, 0xf5, 0x13, 0xba, 0xc2, 0x6b, 0x8d, 0x24, 0xbb, 0x12, 0xf4, 0x5d, 0x25, 0x8c, 0x6a, 0xc3, 0x9a, 0x33, 0xd5, 0x7c, 0x4, 0xad, 0x4b, 0xe2, 0xf9, 0x50, 0xb6, 0x1f, 0x67, 0xce, 0x28, 0x81, 0xd8, 0x71, 0x97, 0x3e, 0x46, 0xef, 0x9, 0xa0},
+ {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48, 0x72, 0xd8, 0x3b, 0x91, 0xe0, 0x4a, 0xa9, 0x3, 0x4b, 0xe1, 0x2, 0xa8, 0xd9, 0x73, 0x90, 0x3a, 0xe4, 0x4e, 0xad, 0x7, 0x76, 0xdc, 0x3f, 0x95, 0xdd, 0x77, 0x94, 0x3e, 0x4f, 0xe5, 0x6, 0xac, 0x96, 0x3c, 0xdf, 0x75, 0x4, 0xae, 0x4d, 0xe7, 0xaf, 0x5, 0xe6, 0x4c, 0x3d, 0x97, 0x74, 0xde, 0xd5, 0x7f, 0x9c, 0x36, 0x47, 0xed, 0xe, 0xa4, 0xec, 0x46, 0xa5, 0xf, 0x7e, 0xd4, 0x37, 0x9d, 0xa7, 0xd, 0xee, 0x44, 0x35, 0x9f, 0x7c, 0xd6, 0x9e, 0x34, 0xd7, 0x7d, 0xc, 0xa6, 0x45, 0xef, 0x31, 0x9b, 0x78, 0xd2, 0xa3, 0x9, 0xea, 0x40, 0x8, 0xa2, 0x41, 0xeb, 0x9a, 0x30, 0xd3, 0x79, 0x43, 0xe9, 0xa, 0xa0, 0xd1, 0x7b, 0x98, 0x32, 0x7a, 0xd0, 0x33, 0x99, 0xe8, 0x42, 0xa1, 0xb, 0xb7, 0x1d, 0xfe, 0x54, 0x25, 0x8f, 0x6c, 0xc6, 0x8e, 0x24, 0xc7, 0x6d, 0x1c, 0xb6, 0x55, 0xff, 0xc5, 0x6f, 0x8c, 0x26, 0x57, 0xfd, 0x1e, 0xb4, 0xfc, 0x56, 0xb5, 0x1f, 0x6e, 0xc4, 0x27, 0x8d, 0x53, 0xf9, 0x1a, 0xb0, 0xc1, 0x6b, 0x88, 0x22, 0x6a, 0xc0, 0x23, 0x89, 0xf8, 0x52, 0xb1, 0x1b, 0x21, 0x8b, 0x68, 0xc2, 0xb3, 0x19, 0xfa, 0x50, 0x18, 0xb2, 0x51, 0xfb, 0x8a, 0x20, 0xc3, 0x69, 0x62, 0xc8, 0x2b, 0x81, 0xf0, 0x5a, 0xb9, 0x13, 0x5b, 0xf1, 0x12, 0xb8, 0xc9, 0x63, 0x80, 0x2a, 0x10, 0xba, 0x59, 0xf3, 0x82, 0x28, 0xcb, 0x61, 0x29, 0x83, 0x60, 0xca, 0xbb, 0x11, 0xf2, 0x58, 0x86, 0x2c, 0xcf, 0x65, 0x14, 0xbe, 0x5d, 0xf7, 0xbf, 0x15, 0xf6, 0x5c, 0x2d, 0x87, 0x64, 0xce, 0xf4, 0x5e, 0xbd, 0x17, 0x66, 0xcc, 0x2f, 0x85, 0xcd, 0x67, 0x84, 0x2e, 0x5f, 0xf5, 0x16, 0xbc},
+ {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47, 0x62, 0xc9, 0x29, 0x82, 0xf4, 0x5f, 0xbf, 0x14, 0x53, 0xf8, 0x18, 0xb3, 0xc5, 0x6e, 0x8e, 0x25, 0xc4, 0x6f, 0x8f, 0x24, 0x52, 0xf9, 0x19, 0xb2, 0xf5, 0x5e, 0xbe, 0x15, 0x63, 0xc8, 0x28, 0x83, 0xa6, 0xd, 0xed, 0x46, 0x30, 0x9b, 0x7b, 0xd0, 0x97, 0x3c, 0xdc, 0x77, 0x1, 0xaa, 0x4a, 0xe1, 0x95, 0x3e, 0xde, 0x75, 0x3, 0xa8, 0x48, 0xe3, 0xa4, 0xf, 0xef, 0x44, 0x32, 0x99, 0x79, 0xd2, 0xf7, 0x5c, 0xbc, 0x17, 0x61, 0xca, 0x2a, 0x81, 0xc6, 0x6d, 0x8d, 0x26, 0x50, 0xfb, 0x1b, 0xb0, 0x51, 0xfa, 0x1a, 0xb1, 0xc7, 0x6c, 0x8c, 0x27, 0x60, 0xcb, 0x2b, 0x80, 0xf6, 0x5d, 0xbd, 0x16, 0x33, 0x98, 0x78, 0xd3, 0xa5, 0xe, 0xee, 0x45, 0x2, 0xa9, 0x49, 0xe2, 0x94, 0x3f, 0xdf, 0x74, 0x37, 0x9c, 0x7c, 0xd7, 0xa1, 0xa, 0xea, 0x41, 0x6, 0xad, 0x4d, 0xe6, 0x90, 0x3b, 0xdb, 0x70, 0x55, 0xfe, 0x1e, 0xb5, 0xc3, 0x68, 0x88, 0x23, 0x64, 0xcf, 0x2f, 0x84, 0xf2, 0x59, 0xb9, 0x12, 0xf3, 0x58, 0xb8, 0x13, 0x65, 0xce, 0x2e, 0x85, 0xc2, 0x69, 0x89, 0x22, 0x54, 0xff, 0x1f, 0xb4, 0x91, 0x3a, 0xda, 0x71, 0x7, 0xac, 0x4c, 0xe7, 0xa0, 0xb, 0xeb, 0x40, 0x36, 0x9d, 0x7d, 0xd6, 0xa2, 0x9, 0xe9, 0x42, 0x34, 0x9f, 0x7f, 0xd4, 0x93, 0x38, 0xd8, 0x73, 0x5, 0xae, 0x4e, 0xe5, 0xc0, 0x6b, 0x8b, 0x20, 0x56, 0xfd, 0x1d, 0xb6, 0xf1, 0x5a, 0xba, 0x11, 0x67, 0xcc, 0x2c, 0x87, 0x66, 0xcd, 0x2d, 0x86, 0xf0, 0x5b, 0xbb, 0x10, 0x57, 0xfc, 0x1c, 0xb7, 0xc1, 0x6a, 0x8a, 0x21, 0x4, 0xaf, 0x4f, 0xe4, 0x92, 0x39, 0xd9, 0x72, 0x35, 0x9e, 0x7e, 0xd5, 0xa3, 0x8, 0xe8, 0x43},
+ {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a, 0x12, 0xbe, 0x57, 0xfb, 0x98, 0x34, 0xdd, 0x71, 0x1b, 0xb7, 0x5e, 0xf2, 0x91, 0x3d, 0xd4, 0x78, 0x24, 0x88, 0x61, 0xcd, 0xae, 0x2, 0xeb, 0x47, 0x2d, 0x81, 0x68, 0xc4, 0xa7, 0xb, 0xe2, 0x4e, 0x36, 0x9a, 0x73, 0xdf, 0xbc, 0x10, 0xf9, 0x55, 0x3f, 0x93, 0x7a, 0xd6, 0xb5, 0x19, 0xf0, 0x5c, 0x48, 0xe4, 0xd, 0xa1, 0xc2, 0x6e, 0x87, 0x2b, 0x41, 0xed, 0x4, 0xa8, 0xcb, 0x67, 0x8e, 0x22, 0x5a, 0xf6, 0x1f, 0xb3, 0xd0, 0x7c, 0x95, 0x39, 0x53, 0xff, 0x16, 0xba, 0xd9, 0x75, 0x9c, 0x30, 0x6c, 0xc0, 0x29, 0x85, 0xe6, 0x4a, 0xa3, 0xf, 0x65, 0xc9, 0x20, 0x8c, 0xef, 0x43, 0xaa, 0x6, 0x7e, 0xd2, 0x3b, 0x97, 0xf4, 0x58, 0xb1, 0x1d, 0x77, 0xdb, 0x32, 0x9e, 0xfd, 0x51, 0xb8, 0x14, 0x90, 0x3c, 0xd5, 0x79, 0x1a, 0xb6, 0x5f, 0xf3, 0x99, 0x35, 0xdc, 0x70, 0x13, 0xbf, 0x56, 0xfa, 0x82, 0x2e, 0xc7, 0x6b, 0x8, 0xa4, 0x4d, 0xe1, 0x8b, 0x27, 0xce, 0x62, 0x1, 0xad, 0x44, 0xe8, 0xb4, 0x18, 0xf1, 0x5d, 0x3e, 0x92, 0x7b, 0xd7, 0xbd, 0x11, 0xf8, 0x54, 0x37, 0x9b, 0x72, 0xde, 0xa6, 0xa, 0xe3, 0x4f, 0x2c, 0x80, 0x69, 0xc5, 0xaf, 0x3, 0xea, 0x46, 0x25, 0x89, 0x60, 0xcc, 0xd8, 0x74, 0x9d, 0x31, 0x52, 0xfe, 0x17, 0xbb, 0xd1, 0x7d, 0x94, 0x38, 0x5b, 0xf7, 0x1e, 0xb2, 0xca, 0x66, 0x8f, 0x23, 0x40, 0xec, 0x5, 0xa9, 0xc3, 0x6f, 0x86, 0x2a, 0x49, 0xe5, 0xc, 0xa0, 0xfc, 0x50, 0xb9, 0x15, 0x76, 0xda, 0x33, 0x9f, 0xf5, 0x59, 0xb0, 0x1c, 0x7f, 0xd3, 0x3a, 0x96, 0xee, 0x42, 0xab, 0x7, 0x64, 0xc8, 0x21, 0x8d, 0xe7, 0x4b, 0xa2, 0xe, 0x6d, 0xc1, 0x28, 0x84},
+ {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65, 0x2, 0xaf, 0x45, 0xe8, 0x8c, 0x21, 0xcb, 0x66, 0x3, 0xae, 0x44, 0xe9, 0x8d, 0x20, 0xca, 0x67, 0x4, 0xa9, 0x43, 0xee, 0x8a, 0x27, 0xcd, 0x60, 0x5, 0xa8, 0x42, 0xef, 0x8b, 0x26, 0xcc, 0x61, 0x6, 0xab, 0x41, 0xec, 0x88, 0x25, 0xcf, 0x62, 0x7, 0xaa, 0x40, 0xed, 0x89, 0x24, 0xce, 0x63, 0x8, 0xa5, 0x4f, 0xe2, 0x86, 0x2b, 0xc1, 0x6c, 0x9, 0xa4, 0x4e, 0xe3, 0x87, 0x2a, 0xc0, 0x6d, 0xa, 0xa7, 0x4d, 0xe0, 0x84, 0x29, 0xc3, 0x6e, 0xb, 0xa6, 0x4c, 0xe1, 0x85, 0x28, 0xc2, 0x6f, 0xc, 0xa1, 0x4b, 0xe6, 0x82, 0x2f, 0xc5, 0x68, 0xd, 0xa0, 0x4a, 0xe7, 0x83, 0x2e, 0xc4, 0x69, 0xe, 0xa3, 0x49, 0xe4, 0x80, 0x2d, 0xc7, 0x6a, 0xf, 0xa2, 0x48, 0xe5, 0x81, 0x2c, 0xc6, 0x6b, 0x10, 0xbd, 0x57, 0xfa, 0x9e, 0x33, 0xd9, 0x74, 0x11, 0xbc, 0x56, 0xfb, 0x9f, 0x32, 0xd8, 0x75, 0x12, 0xbf, 0x55, 0xf8, 0x9c, 0x31, 0xdb, 0x76, 0x13, 0xbe, 0x54, 0xf9, 0x9d, 0x30, 0xda, 0x77, 0x14, 0xb9, 0x53, 0xfe, 0x9a, 0x37, 0xdd, 0x70, 0x15, 0xb8, 0x52, 0xff, 0x9b, 0x36, 0xdc, 0x71, 0x16, 0xbb, 0x51, 0xfc, 0x98, 0x35, 0xdf, 0x72, 0x17, 0xba, 0x50, 0xfd, 0x99, 0x34, 0xde, 0x73, 0x18, 0xb5, 0x5f, 0xf2, 0x96, 0x3b, 0xd1, 0x7c, 0x19, 0xb4, 0x5e, 0xf3, 0x97, 0x3a, 0xd0, 0x7d, 0x1a, 0xb7, 0x5d, 0xf0, 0x94, 0x39, 0xd3, 0x7e, 0x1b, 0xb6, 0x5c, 0xf1, 0x95, 0x38, 0xd2, 0x7f, 0x1c, 0xb1, 0x5b, 0xf6, 0x92, 0x3f, 0xd5, 0x78, 0x1d, 0xb0, 0x5a, 0xf7, 0x93, 0x3e, 0xd4, 0x79, 0x1e, 0xb3, 0x59, 0xf4, 0x90, 0x3d, 0xd7, 0x7a, 0x1f, 0xb2, 0x58, 0xf5, 0x91, 0x3c, 0xd6, 0x7b},
+ {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74, 0x32, 0x9c, 0x73, 0xdd, 0xb0, 0x1e, 0xf1, 0x5f, 0x2b, 0x85, 0x6a, 0xc4, 0xa9, 0x7, 0xe8, 0x46, 0x64, 0xca, 0x25, 0x8b, 0xe6, 0x48, 0xa7, 0x9, 0x7d, 0xd3, 0x3c, 0x92, 0xff, 0x51, 0xbe, 0x10, 0x56, 0xf8, 0x17, 0xb9, 0xd4, 0x7a, 0x95, 0x3b, 0x4f, 0xe1, 0xe, 0xa0, 0xcd, 0x63, 0x8c, 0x22, 0xc8, 0x66, 0x89, 0x27, 0x4a, 0xe4, 0xb, 0xa5, 0xd1, 0x7f, 0x90, 0x3e, 0x53, 0xfd, 0x12, 0xbc, 0xfa, 0x54, 0xbb, 0x15, 0x78, 0xd6, 0x39, 0x97, 0xe3, 0x4d, 0xa2, 0xc, 0x61, 0xcf, 0x20, 0x8e, 0xac, 0x2, 0xed, 0x43, 0x2e, 0x80, 0x6f, 0xc1, 0xb5, 0x1b, 0xf4, 0x5a, 0x37, 0x99, 0x76, 0xd8, 0x9e, 0x30, 0xdf, 0x71, 0x1c, 0xb2, 0x5d, 0xf3, 0x87, 0x29, 0xc6, 0x68, 0x5, 0xab, 0x44, 0xea, 0x8d, 0x23, 0xcc, 0x62, 0xf, 0xa1, 0x4e, 0xe0, 0x94, 0x3a, 0xd5, 0x7b, 0x16, 0xb8, 0x57, 0xf9, 0xbf, 0x11, 0xfe, 0x50, 0x3d, 0x93, 0x7c, 0xd2, 0xa6, 0x8, 0xe7, 0x49, 0x24, 0x8a, 0x65, 0xcb, 0xe9, 0x47, 0xa8, 0x6, 0x6b, 0xc5, 0x2a, 0x84, 0xf0, 0x5e, 0xb1, 0x1f, 0x72, 0xdc, 0x33, 0x9d, 0xdb, 0x75, 0x9a, 0x34, 0x59, 0xf7, 0x18, 0xb6, 0xc2, 0x6c, 0x83, 0x2d, 0x40, 0xee, 0x1, 0xaf, 0x45, 0xeb, 0x4, 0xaa, 0xc7, 0x69, 0x86, 0x28, 0x5c, 0xf2, 0x1d, 0xb3, 0xde, 0x70, 0x9f, 0x31, 0x77, 0xd9, 0x36, 0x98, 0xf5, 0x5b, 0xb4, 0x1a, 0x6e, 0xc0, 0x2f, 0x81, 0xec, 0x42, 0xad, 0x3, 0x21, 0x8f, 0x60, 0xce, 0xa3, 0xd, 0xe2, 0x4c, 0x38, 0x96, 0x79, 0xd7, 0xba, 0x14, 0xfb, 0x55, 0x13, 0xbd, 0x52, 0xfc, 0x91, 0x3f, 0xd0, 0x7e, 0xa, 0xa4, 0x4b, 0xe5, 0x88, 0x26, 0xc9, 0x67},
+ {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b, 0x22, 0x8d, 0x61, 0xce, 0xa4, 0xb, 0xe7, 0x48, 0x33, 0x9c, 0x70, 0xdf, 0xb5, 0x1a, 0xf6, 0x59, 0x44, 0xeb, 0x7, 0xa8, 0xc2, 0x6d, 0x81, 0x2e, 0x55, 0xfa, 0x16, 0xb9, 0xd3, 0x7c, 0x90, 0x3f, 0x66, 0xc9, 0x25, 0x8a, 0xe0, 0x4f, 0xa3, 0xc, 0x77, 0xd8, 0x34, 0x9b, 0xf1, 0x5e, 0xb2, 0x1d, 0x88, 0x27, 0xcb, 0x64, 0xe, 0xa1, 0x4d, 0xe2, 0x99, 0x36, 0xda, 0x75, 0x1f, 0xb0, 0x5c, 0xf3, 0xaa, 0x5, 0xe9, 0x46, 0x2c, 0x83, 0x6f, 0xc0, 0xbb, 0x14, 0xf8, 0x57, 0x3d, 0x92, 0x7e, 0xd1, 0xcc, 0x63, 0x8f, 0x20, 0x4a, 0xe5, 0x9, 0xa6, 0xdd, 0x72, 0x9e, 0x31, 0x5b, 0xf4, 0x18, 0xb7, 0xee, 0x41, 0xad, 0x2, 0x68, 0xc7, 0x2b, 0x84, 0xff, 0x50, 0xbc, 0x13, 0x79, 0xd6, 0x3a, 0x95, 0xd, 0xa2, 0x4e, 0xe1, 0x8b, 0x24, 0xc8, 0x67, 0x1c, 0xb3, 0x5f, 0xf0, 0x9a, 0x35, 0xd9, 0x76, 0x2f, 0x80, 0x6c, 0xc3, 0xa9, 0x6, 0xea, 0x45, 0x3e, 0x91, 0x7d, 0xd2, 0xb8, 0x17, 0xfb, 0x54, 0x49, 0xe6, 0xa, 0xa5, 0xcf, 0x60, 0x8c, 0x23, 0x58, 0xf7, 0x1b, 0xb4, 0xde, 0x71, 0x9d, 0x32, 0x6b, 0xc4, 0x28, 0x87, 0xed, 0x42, 0xae, 0x1, 0x7a, 0xd5, 0x39, 0x96, 0xfc, 0x53, 0xbf, 0x10, 0x85, 0x2a, 0xc6, 0x69, 0x3, 0xac, 0x40, 0xef, 0x94, 0x3b, 0xd7, 0x78, 0x12, 0xbd, 0x51, 0xfe, 0xa7, 0x8, 0xe4, 0x4b, 0x21, 0x8e, 0x62, 0xcd, 0xb6, 0x19, 0xf5, 0x5a, 0x30, 0x9f, 0x73, 0xdc, 0xc1, 0x6e, 0x82, 0x2d, 0x47, 0xe8, 0x4, 0xab, 0xd0, 0x7f, 0x93, 0x3c, 0x56, 0xf9, 0x15, 0xba, 0xe3, 0x4c, 0xa0, 0xf, 0x65, 0xca, 0x26, 0x89, 0xf2, 0x5d, 0xb1, 0x1e, 0x74, 0xdb, 0x37, 0x98},
+ {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde, 0xcf, 0x7f, 0xb2, 0x2, 0x35, 0x85, 0x48, 0xf8, 0x26, 0x96, 0x5b, 0xeb, 0xdc, 0x6c, 0xa1, 0x11, 0x83, 0x33, 0xfe, 0x4e, 0x79, 0xc9, 0x4, 0xb4, 0x6a, 0xda, 0x17, 0xa7, 0x90, 0x20, 0xed, 0x5d, 0x4c, 0xfc, 0x31, 0x81, 0xb6, 0x6, 0xcb, 0x7b, 0xa5, 0x15, 0xd8, 0x68, 0x5f, 0xef, 0x22, 0x92, 0x1b, 0xab, 0x66, 0xd6, 0xe1, 0x51, 0x9c, 0x2c, 0xf2, 0x42, 0x8f, 0x3f, 0x8, 0xb8, 0x75, 0xc5, 0xd4, 0x64, 0xa9, 0x19, 0x2e, 0x9e, 0x53, 0xe3, 0x3d, 0x8d, 0x40, 0xf0, 0xc7, 0x77, 0xba, 0xa, 0x98, 0x28, 0xe5, 0x55, 0x62, 0xd2, 0x1f, 0xaf, 0x71, 0xc1, 0xc, 0xbc, 0x8b, 0x3b, 0xf6, 0x46, 0x57, 0xe7, 0x2a, 0x9a, 0xad, 0x1d, 0xd0, 0x60, 0xbe, 0xe, 0xc3, 0x73, 0x44, 0xf4, 0x39, 0x89, 0x36, 0x86, 0x4b, 0xfb, 0xcc, 0x7c, 0xb1, 0x1, 0xdf, 0x6f, 0xa2, 0x12, 0x25, 0x95, 0x58, 0xe8, 0xf9, 0x49, 0x84, 0x34, 0x3, 0xb3, 0x7e, 0xce, 0x10, 0xa0, 0x6d, 0xdd, 0xea, 0x5a, 0x97, 0x27, 0xb5, 0x5, 0xc8, 0x78, 0x4f, 0xff, 0x32, 0x82, 0x5c, 0xec, 0x21, 0x91, 0xa6, 0x16, 0xdb, 0x6b, 0x7a, 0xca, 0x7, 0xb7, 0x80, 0x30, 0xfd, 0x4d, 0x93, 0x23, 0xee, 0x5e, 0x69, 0xd9, 0x14, 0xa4, 0x2d, 0x9d, 0x50, 0xe0, 0xd7, 0x67, 0xaa, 0x1a, 0xc4, 0x74, 0xb9, 0x9, 0x3e, 0x8e, 0x43, 0xf3, 0xe2, 0x52, 0x9f, 0x2f, 0x18, 0xa8, 0x65, 0xd5, 0xb, 0xbb, 0x76, 0xc6, 0xf1, 0x41, 0x8c, 0x3c, 0xae, 0x1e, 0xd3, 0x63, 0x54, 0xe4, 0x29, 0x99, 0x47, 0xf7, 0x3a, 0x8a, 0xbd, 0xd, 0xc0, 0x70, 0x61, 0xd1, 0x1c, 0xac, 0x9b, 0x2b, 0xe6, 0x56, 0x88, 0x38, 0xf5, 0x45, 0x72, 0xc2, 0xf, 0xbf},
+ {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1, 0xdf, 0x6e, 0xa0, 0x11, 0x21, 0x90, 0x5e, 0xef, 0x3e, 0x8f, 0x41, 0xf0, 0xc0, 0x71, 0xbf, 0xe, 0xa3, 0x12, 0xdc, 0x6d, 0x5d, 0xec, 0x22, 0x93, 0x42, 0xf3, 0x3d, 0x8c, 0xbc, 0xd, 0xc3, 0x72, 0x7c, 0xcd, 0x3, 0xb2, 0x82, 0x33, 0xfd, 0x4c, 0x9d, 0x2c, 0xe2, 0x53, 0x63, 0xd2, 0x1c, 0xad, 0x5b, 0xea, 0x24, 0x95, 0xa5, 0x14, 0xda, 0x6b, 0xba, 0xb, 0xc5, 0x74, 0x44, 0xf5, 0x3b, 0x8a, 0x84, 0x35, 0xfb, 0x4a, 0x7a, 0xcb, 0x5, 0xb4, 0x65, 0xd4, 0x1a, 0xab, 0x9b, 0x2a, 0xe4, 0x55, 0xf8, 0x49, 0x87, 0x36, 0x6, 0xb7, 0x79, 0xc8, 0x19, 0xa8, 0x66, 0xd7, 0xe7, 0x56, 0x98, 0x29, 0x27, 0x96, 0x58, 0xe9, 0xd9, 0x68, 0xa6, 0x17, 0xc6, 0x77, 0xb9, 0x8, 0x38, 0x89, 0x47, 0xf6, 0xb6, 0x7, 0xc9, 0x78, 0x48, 0xf9, 0x37, 0x86, 0x57, 0xe6, 0x28, 0x99, 0xa9, 0x18, 0xd6, 0x67, 0x69, 0xd8, 0x16, 0xa7, 0x97, 0x26, 0xe8, 0x59, 0x88, 0x39, 0xf7, 0x46, 0x76, 0xc7, 0x9, 0xb8, 0x15, 0xa4, 0x6a, 0xdb, 0xeb, 0x5a, 0x94, 0x25, 0xf4, 0x45, 0x8b, 0x3a, 0xa, 0xbb, 0x75, 0xc4, 0xca, 0x7b, 0xb5, 0x4, 0x34, 0x85, 0x4b, 0xfa, 0x2b, 0x9a, 0x54, 0xe5, 0xd5, 0x64, 0xaa, 0x1b, 0xed, 0x5c, 0x92, 0x23, 0x13, 0xa2, 0x6c, 0xdd, 0xc, 0xbd, 0x73, 0xc2, 0xf2, 0x43, 0x8d, 0x3c, 0x32, 0x83, 0x4d, 0xfc, 0xcc, 0x7d, 0xb3, 0x2, 0xd3, 0x62, 0xac, 0x1d, 0x2d, 0x9c, 0x52, 0xe3, 0x4e, 0xff, 0x31, 0x80, 0xb0, 0x1, 0xcf, 0x7e, 0xaf, 0x1e, 0xd0, 0x61, 0x51, 0xe0, 0x2e, 0x9f, 0x91, 0x20, 0xee, 0x5f, 0x6f, 0xde, 0x10, 0xa1, 0x70, 0xc1, 0xf, 0xbe, 0x8e, 0x3f, 0xf1, 0x40},
+ {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0, 0xef, 0x5d, 0x96, 0x24, 0x1d, 0xaf, 0x64, 0xd6, 0x16, 0xa4, 0x6f, 0xdd, 0xe4, 0x56, 0x9d, 0x2f, 0xc3, 0x71, 0xba, 0x8, 0x31, 0x83, 0x48, 0xfa, 0x3a, 0x88, 0x43, 0xf1, 0xc8, 0x7a, 0xb1, 0x3, 0x2c, 0x9e, 0x55, 0xe7, 0xde, 0x6c, 0xa7, 0x15, 0xd5, 0x67, 0xac, 0x1e, 0x27, 0x95, 0x5e, 0xec, 0x9b, 0x29, 0xe2, 0x50, 0x69, 0xdb, 0x10, 0xa2, 0x62, 0xd0, 0x1b, 0xa9, 0x90, 0x22, 0xe9, 0x5b, 0x74, 0xc6, 0xd, 0xbf, 0x86, 0x34, 0xff, 0x4d, 0x8d, 0x3f, 0xf4, 0x46, 0x7f, 0xcd, 0x6, 0xb4, 0x58, 0xea, 0x21, 0x93, 0xaa, 0x18, 0xd3, 0x61, 0xa1, 0x13, 0xd8, 0x6a, 0x53, 0xe1, 0x2a, 0x98, 0xb7, 0x5, 0xce, 0x7c, 0x45, 0xf7, 0x3c, 0x8e, 0x4e, 0xfc, 0x37, 0x85, 0xbc, 0xe, 0xc5, 0x77, 0x2b, 0x99, 0x52, 0xe0, 0xd9, 0x6b, 0xa0, 0x12, 0xd2, 0x60, 0xab, 0x19, 0x20, 0x92, 0x59, 0xeb, 0xc4, 0x76, 0xbd, 0xf, 0x36, 0x84, 0x4f, 0xfd, 0x3d, 0x8f, 0x44, 0xf6, 0xcf, 0x7d, 0xb6, 0x4, 0xe8, 0x5a, 0x91, 0x23, 0x1a, 0xa8, 0x63, 0xd1, 0x11, 0xa3, 0x68, 0xda, 0xe3, 0x51, 0x9a, 0x28, 0x7, 0xb5, 0x7e, 0xcc, 0xf5, 0x47, 0x8c, 0x3e, 0xfe, 0x4c, 0x87, 0x35, 0xc, 0xbe, 0x75, 0xc7, 0xb0, 0x2, 0xc9, 0x7b, 0x42, 0xf0, 0x3b, 0x89, 0x49, 0xfb, 0x30, 0x82, 0xbb, 0x9, 0xc2, 0x70, 0x5f, 0xed, 0x26, 0x94, 0xad, 0x1f, 0xd4, 0x66, 0xa6, 0x14, 0xdf, 0x6d, 0x54, 0xe6, 0x2d, 0x9f, 0x73, 0xc1, 0xa, 0xb8, 0x81, 0x33, 0xf8, 0x4a, 0x8a, 0x38, 0xf3, 0x41, 0x78, 0xca, 0x1, 0xb3, 0x9c, 0x2e, 0xe5, 0x57, 0x6e, 0xdc, 0x17, 0xa5, 0x65, 0xd7, 0x1c, 0xae, 0x97, 0x25, 0xee, 0x5c},
+ {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf, 0xff, 0x4c, 0x84, 0x37, 0x9, 0xba, 0x72, 0xc1, 0xe, 0xbd, 0x75, 0xc6, 0xf8, 0x4b, 0x83, 0x30, 0xe3, 0x50, 0x98, 0x2b, 0x15, 0xa6, 0x6e, 0xdd, 0x12, 0xa1, 0x69, 0xda, 0xe4, 0x57, 0x9f, 0x2c, 0x1c, 0xaf, 0x67, 0xd4, 0xea, 0x59, 0x91, 0x22, 0xed, 0x5e, 0x96, 0x25, 0x1b, 0xa8, 0x60, 0xd3, 0xdb, 0x68, 0xa0, 0x13, 0x2d, 0x9e, 0x56, 0xe5, 0x2a, 0x99, 0x51, 0xe2, 0xdc, 0x6f, 0xa7, 0x14, 0x24, 0x97, 0x5f, 0xec, 0xd2, 0x61, 0xa9, 0x1a, 0xd5, 0x66, 0xae, 0x1d, 0x23, 0x90, 0x58, 0xeb, 0x38, 0x8b, 0x43, 0xf0, 0xce, 0x7d, 0xb5, 0x6, 0xc9, 0x7a, 0xb2, 0x1, 0x3f, 0x8c, 0x44, 0xf7, 0xc7, 0x74, 0xbc, 0xf, 0x31, 0x82, 0x4a, 0xf9, 0x36, 0x85, 0x4d, 0xfe, 0xc0, 0x73, 0xbb, 0x8, 0xab, 0x18, 0xd0, 0x63, 0x5d, 0xee, 0x26, 0x95, 0x5a, 0xe9, 0x21, 0x92, 0xac, 0x1f, 0xd7, 0x64, 0x54, 0xe7, 0x2f, 0x9c, 0xa2, 0x11, 0xd9, 0x6a, 0xa5, 0x16, 0xde, 0x6d, 0x53, 0xe0, 0x28, 0x9b, 0x48, 0xfb, 0x33, 0x80, 0xbe, 0xd, 0xc5, 0x76, 0xb9, 0xa, 0xc2, 0x71, 0x4f, 0xfc, 0x34, 0x87, 0xb7, 0x4, 0xcc, 0x7f, 0x41, 0xf2, 0x3a, 0x89, 0x46, 0xf5, 0x3d, 0x8e, 0xb0, 0x3, 0xcb, 0x78, 0x70, 0xc3, 0xb, 0xb8, 0x86, 0x35, 0xfd, 0x4e, 0x81, 0x32, 0xfa, 0x49, 0x77, 0xc4, 0xc, 0xbf, 0x8f, 0x3c, 0xf4, 0x47, 0x79, 0xca, 0x2, 0xb1, 0x7e, 0xcd, 0x5, 0xb6, 0x88, 0x3b, 0xf3, 0x40, 0x93, 0x20, 0xe8, 0x5b, 0x65, 0xd6, 0x1e, 0xad, 0x62, 0xd1, 0x19, 0xaa, 0x94, 0x27, 0xef, 0x5c, 0x6c, 0xdf, 0x17, 0xa4, 0x9a, 0x29, 0xe1, 0x52, 0x9d, 0x2e, 0xe6, 0x55, 0x6b, 0xd8, 0x10, 0xa3},
+ {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2, 0x8f, 0x3b, 0xfa, 0x4e, 0x65, 0xd1, 0x10, 0xa4, 0x46, 0xf2, 0x33, 0x87, 0xac, 0x18, 0xd9, 0x6d, 0x3, 0xb7, 0x76, 0xc2, 0xe9, 0x5d, 0x9c, 0x28, 0xca, 0x7e, 0xbf, 0xb, 0x20, 0x94, 0x55, 0xe1, 0x8c, 0x38, 0xf9, 0x4d, 0x66, 0xd2, 0x13, 0xa7, 0x45, 0xf1, 0x30, 0x84, 0xaf, 0x1b, 0xda, 0x6e, 0x6, 0xb2, 0x73, 0xc7, 0xec, 0x58, 0x99, 0x2d, 0xcf, 0x7b, 0xba, 0xe, 0x25, 0x91, 0x50, 0xe4, 0x89, 0x3d, 0xfc, 0x48, 0x63, 0xd7, 0x16, 0xa2, 0x40, 0xf4, 0x35, 0x81, 0xaa, 0x1e, 0xdf, 0x6b, 0x5, 0xb1, 0x70, 0xc4, 0xef, 0x5b, 0x9a, 0x2e, 0xcc, 0x78, 0xb9, 0xd, 0x26, 0x92, 0x53, 0xe7, 0x8a, 0x3e, 0xff, 0x4b, 0x60, 0xd4, 0x15, 0xa1, 0x43, 0xf7, 0x36, 0x82, 0xa9, 0x1d, 0xdc, 0x68, 0xc, 0xb8, 0x79, 0xcd, 0xe6, 0x52, 0x93, 0x27, 0xc5, 0x71, 0xb0, 0x4, 0x2f, 0x9b, 0x5a, 0xee, 0x83, 0x37, 0xf6, 0x42, 0x69, 0xdd, 0x1c, 0xa8, 0x4a, 0xfe, 0x3f, 0x8b, 0xa0, 0x14, 0xd5, 0x61, 0xf, 0xbb, 0x7a, 0xce, 0xe5, 0x51, 0x90, 0x24, 0xc6, 0x72, 0xb3, 0x7, 0x2c, 0x98, 0x59, 0xed, 0x80, 0x34, 0xf5, 0x41, 0x6a, 0xde, 0x1f, 0xab, 0x49, 0xfd, 0x3c, 0x88, 0xa3, 0x17, 0xd6, 0x62, 0xa, 0xbe, 0x7f, 0xcb, 0xe0, 0x54, 0x95, 0x21, 0xc3, 0x77, 0xb6, 0x2, 0x29, 0x9d, 0x5c, 0xe8, 0x85, 0x31, 0xf0, 0x44, 0x6f, 0xdb, 0x1a, 0xae, 0x4c, 0xf8, 0x39, 0x8d, 0xa6, 0x12, 0xd3, 0x67, 0x9, 0xbd, 0x7c, 0xc8, 0xe3, 0x57, 0x96, 0x22, 0xc0, 0x74, 0xb5, 0x1, 0x2a, 0x9e, 0x5f, 0xeb, 0x86, 0x32, 0xf3, 0x47, 0x6c, 0xd8, 0x19, 0xad, 0x4f, 0xfb, 0x3a, 0x8e, 0xa5, 0x11, 0xd0, 0x64},
+ {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed, 0x9f, 0x2a, 0xe8, 0x5d, 0x71, 0xc4, 0x6, 0xb3, 0x5e, 0xeb, 0x29, 0x9c, 0xb0, 0x5, 0xc7, 0x72, 0x23, 0x96, 0x54, 0xe1, 0xcd, 0x78, 0xba, 0xf, 0xe2, 0x57, 0x95, 0x20, 0xc, 0xb9, 0x7b, 0xce, 0xbc, 0x9, 0xcb, 0x7e, 0x52, 0xe7, 0x25, 0x90, 0x7d, 0xc8, 0xa, 0xbf, 0x93, 0x26, 0xe4, 0x51, 0x46, 0xf3, 0x31, 0x84, 0xa8, 0x1d, 0xdf, 0x6a, 0x87, 0x32, 0xf0, 0x45, 0x69, 0xdc, 0x1e, 0xab, 0xd9, 0x6c, 0xae, 0x1b, 0x37, 0x82, 0x40, 0xf5, 0x18, 0xad, 0x6f, 0xda, 0xf6, 0x43, 0x81, 0x34, 0x65, 0xd0, 0x12, 0xa7, 0x8b, 0x3e, 0xfc, 0x49, 0xa4, 0x11, 0xd3, 0x66, 0x4a, 0xff, 0x3d, 0x88, 0xfa, 0x4f, 0x8d, 0x38, 0x14, 0xa1, 0x63, 0xd6, 0x3b, 0x8e, 0x4c, 0xf9, 0xd5, 0x60, 0xa2, 0x17, 0x8c, 0x39, 0xfb, 0x4e, 0x62, 0xd7, 0x15, 0xa0, 0x4d, 0xf8, 0x3a, 0x8f, 0xa3, 0x16, 0xd4, 0x61, 0x13, 0xa6, 0x64, 0xd1, 0xfd, 0x48, 0x8a, 0x3f, 0xd2, 0x67, 0xa5, 0x10, 0x3c, 0x89, 0x4b, 0xfe, 0xaf, 0x1a, 0xd8, 0x6d, 0x41, 0xf4, 0x36, 0x83, 0x6e, 0xdb, 0x19, 0xac, 0x80, 0x35, 0xf7, 0x42, 0x30, 0x85, 0x47, 0xf2, 0xde, 0x6b, 0xa9, 0x1c, 0xf1, 0x44, 0x86, 0x33, 0x1f, 0xaa, 0x68, 0xdd, 0xca, 0x7f, 0xbd, 0x8, 0x24, 0x91, 0x53, 0xe6, 0xb, 0xbe, 0x7c, 0xc9, 0xe5, 0x50, 0x92, 0x27, 0x55, 0xe0, 0x22, 0x97, 0xbb, 0xe, 0xcc, 0x79, 0x94, 0x21, 0xe3, 0x56, 0x7a, 0xcf, 0xd, 0xb8, 0xe9, 0x5c, 0x9e, 0x2b, 0x7, 0xb2, 0x70, 0xc5, 0x28, 0x9d, 0x5f, 0xea, 0xc6, 0x73, 0xb1, 0x4, 0x76, 0xc3, 0x1, 0xb4, 0x98, 0x2d, 0xef, 0x5a, 0xb7, 0x2, 0xc0, 0x75, 0x59, 0xec, 0x2e, 0x9b},
+ {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc, 0xaf, 0x19, 0xde, 0x68, 0x4d, 0xfb, 0x3c, 0x8a, 0x76, 0xc0, 0x7, 0xb1, 0x94, 0x22, 0xe5, 0x53, 0x43, 0xf5, 0x32, 0x84, 0xa1, 0x17, 0xd0, 0x66, 0x9a, 0x2c, 0xeb, 0x5d, 0x78, 0xce, 0x9, 0xbf, 0xec, 0x5a, 0x9d, 0x2b, 0xe, 0xb8, 0x7f, 0xc9, 0x35, 0x83, 0x44, 0xf2, 0xd7, 0x61, 0xa6, 0x10, 0x86, 0x30, 0xf7, 0x41, 0x64, 0xd2, 0x15, 0xa3, 0x5f, 0xe9, 0x2e, 0x98, 0xbd, 0xb, 0xcc, 0x7a, 0x29, 0x9f, 0x58, 0xee, 0xcb, 0x7d, 0xba, 0xc, 0xf0, 0x46, 0x81, 0x37, 0x12, 0xa4, 0x63, 0xd5, 0xc5, 0x73, 0xb4, 0x2, 0x27, 0x91, 0x56, 0xe0, 0x1c, 0xaa, 0x6d, 0xdb, 0xfe, 0x48, 0x8f, 0x39, 0x6a, 0xdc, 0x1b, 0xad, 0x88, 0x3e, 0xf9, 0x4f, 0xb3, 0x5, 0xc2, 0x74, 0x51, 0xe7, 0x20, 0x96, 0x11, 0xa7, 0x60, 0xd6, 0xf3, 0x45, 0x82, 0x34, 0xc8, 0x7e, 0xb9, 0xf, 0x2a, 0x9c, 0x5b, 0xed, 0xbe, 0x8, 0xcf, 0x79, 0x5c, 0xea, 0x2d, 0x9b, 0x67, 0xd1, 0x16, 0xa0, 0x85, 0x33, 0xf4, 0x42, 0x52, 0xe4, 0x23, 0x95, 0xb0, 0x6, 0xc1, 0x77, 0x8b, 0x3d, 0xfa, 0x4c, 0x69, 0xdf, 0x18, 0xae, 0xfd, 0x4b, 0x8c, 0x3a, 0x1f, 0xa9, 0x6e, 0xd8, 0x24, 0x92, 0x55, 0xe3, 0xc6, 0x70, 0xb7, 0x1, 0x97, 0x21, 0xe6, 0x50, 0x75, 0xc3, 0x4, 0xb2, 0x4e, 0xf8, 0x3f, 0x89, 0xac, 0x1a, 0xdd, 0x6b, 0x38, 0x8e, 0x49, 0xff, 0xda, 0x6c, 0xab, 0x1d, 0xe1, 0x57, 0x90, 0x26, 0x3, 0xb5, 0x72, 0xc4, 0xd4, 0x62, 0xa5, 0x13, 0x36, 0x80, 0x47, 0xf1, 0xd, 0xbb, 0x7c, 0xca, 0xef, 0x59, 0x9e, 0x28, 0x7b, 0xcd, 0xa, 0xbc, 0x99, 0x2f, 0xe8, 0x5e, 0xa2, 0x14, 0xd3, 0x65, 0x40, 0xf6, 0x31, 0x87},
+ {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3, 0xbf, 0x8, 0xcc, 0x7b, 0x59, 0xee, 0x2a, 0x9d, 0x6e, 0xd9, 0x1d, 0xaa, 0x88, 0x3f, 0xfb, 0x4c, 0x63, 0xd4, 0x10, 0xa7, 0x85, 0x32, 0xf6, 0x41, 0xb2, 0x5, 0xc1, 0x76, 0x54, 0xe3, 0x27, 0x90, 0xdc, 0x6b, 0xaf, 0x18, 0x3a, 0x8d, 0x49, 0xfe, 0xd, 0xba, 0x7e, 0xc9, 0xeb, 0x5c, 0x98, 0x2f, 0xc6, 0x71, 0xb5, 0x2, 0x20, 0x97, 0x53, 0xe4, 0x17, 0xa0, 0x64, 0xd3, 0xf1, 0x46, 0x82, 0x35, 0x79, 0xce, 0xa, 0xbd, 0x9f, 0x28, 0xec, 0x5b, 0xa8, 0x1f, 0xdb, 0x6c, 0x4e, 0xf9, 0x3d, 0x8a, 0xa5, 0x12, 0xd6, 0x61, 0x43, 0xf4, 0x30, 0x87, 0x74, 0xc3, 0x7, 0xb0, 0x92, 0x25, 0xe1, 0x56, 0x1a, 0xad, 0x69, 0xde, 0xfc, 0x4b, 0x8f, 0x38, 0xcb, 0x7c, 0xb8, 0xf, 0x2d, 0x9a, 0x5e, 0xe9, 0x91, 0x26, 0xe2, 0x55, 0x77, 0xc0, 0x4, 0xb3, 0x40, 0xf7, 0x33, 0x84, 0xa6, 0x11, 0xd5, 0x62, 0x2e, 0x99, 0x5d, 0xea, 0xc8, 0x7f, 0xbb, 0xc, 0xff, 0x48, 0x8c, 0x3b, 0x19, 0xae, 0x6a, 0xdd, 0xf2, 0x45, 0x81, 0x36, 0x14, 0xa3, 0x67, 0xd0, 0x23, 0x94, 0x50, 0xe7, 0xc5, 0x72, 0xb6, 0x1, 0x4d, 0xfa, 0x3e, 0x89, 0xab, 0x1c, 0xd8, 0x6f, 0x9c, 0x2b, 0xef, 0x58, 0x7a, 0xcd, 0x9, 0xbe, 0x57, 0xe0, 0x24, 0x93, 0xb1, 0x6, 0xc2, 0x75, 0x86, 0x31, 0xf5, 0x42, 0x60, 0xd7, 0x13, 0xa4, 0xe8, 0x5f, 0x9b, 0x2c, 0xe, 0xb9, 0x7d, 0xca, 0x39, 0x8e, 0x4a, 0xfd, 0xdf, 0x68, 0xac, 0x1b, 0x34, 0x83, 0x47, 0xf0, 0xd2, 0x65, 0xa1, 0x16, 0xe5, 0x52, 0x96, 0x21, 0x3, 0xb4, 0x70, 0xc7, 0x8b, 0x3c, 0xf8, 0x4f, 0x6d, 0xda, 0x1e, 0xa9, 0x5a, 0xed, 0x29, 0x9e, 0xbc, 0xb, 0xcf, 0x78},
+ {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6, 0x4f, 0xf7, 0x22, 0x9a, 0x95, 0x2d, 0xf8, 0x40, 0xe6, 0x5e, 0x8b, 0x33, 0x3c, 0x84, 0x51, 0xe9, 0x9e, 0x26, 0xf3, 0x4b, 0x44, 0xfc, 0x29, 0x91, 0x37, 0x8f, 0x5a, 0xe2, 0xed, 0x55, 0x80, 0x38, 0xd1, 0x69, 0xbc, 0x4, 0xb, 0xb3, 0x66, 0xde, 0x78, 0xc0, 0x15, 0xad, 0xa2, 0x1a, 0xcf, 0x77, 0x21, 0x99, 0x4c, 0xf4, 0xfb, 0x43, 0x96, 0x2e, 0x88, 0x30, 0xe5, 0x5d, 0x52, 0xea, 0x3f, 0x87, 0x6e, 0xd6, 0x3, 0xbb, 0xb4, 0xc, 0xd9, 0x61, 0xc7, 0x7f, 0xaa, 0x12, 0x1d, 0xa5, 0x70, 0xc8, 0xbf, 0x7, 0xd2, 0x6a, 0x65, 0xdd, 0x8, 0xb0, 0x16, 0xae, 0x7b, 0xc3, 0xcc, 0x74, 0xa1, 0x19, 0xf0, 0x48, 0x9d, 0x25, 0x2a, 0x92, 0x47, 0xff, 0x59, 0xe1, 0x34, 0x8c, 0x83, 0x3b, 0xee, 0x56, 0x42, 0xfa, 0x2f, 0x97, 0x98, 0x20, 0xf5, 0x4d, 0xeb, 0x53, 0x86, 0x3e, 0x31, 0x89, 0x5c, 0xe4, 0xd, 0xb5, 0x60, 0xd8, 0xd7, 0x6f, 0xba, 0x2, 0xa4, 0x1c, 0xc9, 0x71, 0x7e, 0xc6, 0x13, 0xab, 0xdc, 0x64, 0xb1, 0x9, 0x6, 0xbe, 0x6b, 0xd3, 0x75, 0xcd, 0x18, 0xa0, 0xaf, 0x17, 0xc2, 0x7a, 0x93, 0x2b, 0xfe, 0x46, 0x49, 0xf1, 0x24, 0x9c, 0x3a, 0x82, 0x57, 0xef, 0xe0, 0x58, 0x8d, 0x35, 0x63, 0xdb, 0xe, 0xb6, 0xb9, 0x1, 0xd4, 0x6c, 0xca, 0x72, 0xa7, 0x1f, 0x10, 0xa8, 0x7d, 0xc5, 0x2c, 0x94, 0x41, 0xf9, 0xf6, 0x4e, 0x9b, 0x23, 0x85, 0x3d, 0xe8, 0x50, 0x5f, 0xe7, 0x32, 0x8a, 0xfd, 0x45, 0x90, 0x28, 0x27, 0x9f, 0x4a, 0xf2, 0x54, 0xec, 0x39, 0x81, 0x8e, 0x36, 0xe3, 0x5b, 0xb2, 0xa, 0xdf, 0x67, 0x68, 0xd0, 0x5, 0xbd, 0x1b, 0xa3, 0x76, 0xce, 0xc1, 0x79, 0xac, 0x14},
+ {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9, 0x5f, 0xe6, 0x30, 0x89, 0x81, 0x38, 0xee, 0x57, 0xfe, 0x47, 0x91, 0x28, 0x20, 0x99, 0x4f, 0xf6, 0xbe, 0x7, 0xd1, 0x68, 0x60, 0xd9, 0xf, 0xb6, 0x1f, 0xa6, 0x70, 0xc9, 0xc1, 0x78, 0xae, 0x17, 0xe1, 0x58, 0x8e, 0x37, 0x3f, 0x86, 0x50, 0xe9, 0x40, 0xf9, 0x2f, 0x96, 0x9e, 0x27, 0xf1, 0x48, 0x61, 0xd8, 0xe, 0xb7, 0xbf, 0x6, 0xd0, 0x69, 0xc0, 0x79, 0xaf, 0x16, 0x1e, 0xa7, 0x71, 0xc8, 0x3e, 0x87, 0x51, 0xe8, 0xe0, 0x59, 0x8f, 0x36, 0x9f, 0x26, 0xf0, 0x49, 0x41, 0xf8, 0x2e, 0x97, 0xdf, 0x66, 0xb0, 0x9, 0x1, 0xb8, 0x6e, 0xd7, 0x7e, 0xc7, 0x11, 0xa8, 0xa0, 0x19, 0xcf, 0x76, 0x80, 0x39, 0xef, 0x56, 0x5e, 0xe7, 0x31, 0x88, 0x21, 0x98, 0x4e, 0xf7, 0xff, 0x46, 0x90, 0x29, 0xc2, 0x7b, 0xad, 0x14, 0x1c, 0xa5, 0x73, 0xca, 0x63, 0xda, 0xc, 0xb5, 0xbd, 0x4, 0xd2, 0x6b, 0x9d, 0x24, 0xf2, 0x4b, 0x43, 0xfa, 0x2c, 0x95, 0x3c, 0x85, 0x53, 0xea, 0xe2, 0x5b, 0x8d, 0x34, 0x7c, 0xc5, 0x13, 0xaa, 0xa2, 0x1b, 0xcd, 0x74, 0xdd, 0x64, 0xb2, 0xb, 0x3, 0xba, 0x6c, 0xd5, 0x23, 0x9a, 0x4c, 0xf5, 0xfd, 0x44, 0x92, 0x2b, 0x82, 0x3b, 0xed, 0x54, 0x5c, 0xe5, 0x33, 0x8a, 0xa3, 0x1a, 0xcc, 0x75, 0x7d, 0xc4, 0x12, 0xab, 0x2, 0xbb, 0x6d, 0xd4, 0xdc, 0x65, 0xb3, 0xa, 0xfc, 0x45, 0x93, 0x2a, 0x22, 0x9b, 0x4d, 0xf4, 0x5d, 0xe4, 0x32, 0x8b, 0x83, 0x3a, 0xec, 0x55, 0x1d, 0xa4, 0x72, 0xcb, 0xc3, 0x7a, 0xac, 0x15, 0xbc, 0x5, 0xd3, 0x6a, 0x62, 0xdb, 0xd, 0xb4, 0x42, 0xfb, 0x2d, 0x94, 0x9c, 0x25, 0xf3, 0x4a, 0xe3, 0x5a, 0x8c, 0x35, 0x3d, 0x84, 0x52, 0xeb},
+ {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8, 0x6f, 0xd5, 0x6, 0xbc, 0xbd, 0x7, 0xd4, 0x6e, 0xd6, 0x6c, 0xbf, 0x5, 0x4, 0xbe, 0x6d, 0xd7, 0xde, 0x64, 0xb7, 0xd, 0xc, 0xb6, 0x65, 0xdf, 0x67, 0xdd, 0xe, 0xb4, 0xb5, 0xf, 0xdc, 0x66, 0xb1, 0xb, 0xd8, 0x62, 0x63, 0xd9, 0xa, 0xb0, 0x8, 0xb2, 0x61, 0xdb, 0xda, 0x60, 0xb3, 0x9, 0xa1, 0x1b, 0xc8, 0x72, 0x73, 0xc9, 0x1a, 0xa0, 0x18, 0xa2, 0x71, 0xcb, 0xca, 0x70, 0xa3, 0x19, 0xce, 0x74, 0xa7, 0x1d, 0x1c, 0xa6, 0x75, 0xcf, 0x77, 0xcd, 0x1e, 0xa4, 0xa5, 0x1f, 0xcc, 0x76, 0x7f, 0xc5, 0x16, 0xac, 0xad, 0x17, 0xc4, 0x7e, 0xc6, 0x7c, 0xaf, 0x15, 0x14, 0xae, 0x7d, 0xc7, 0x10, 0xaa, 0x79, 0xc3, 0xc2, 0x78, 0xab, 0x11, 0xa9, 0x13, 0xc0, 0x7a, 0x7b, 0xc1, 0x12, 0xa8, 0x5f, 0xe5, 0x36, 0x8c, 0x8d, 0x37, 0xe4, 0x5e, 0xe6, 0x5c, 0x8f, 0x35, 0x34, 0x8e, 0x5d, 0xe7, 0x30, 0x8a, 0x59, 0xe3, 0xe2, 0x58, 0x8b, 0x31, 0x89, 0x33, 0xe0, 0x5a, 0x5b, 0xe1, 0x32, 0x88, 0x81, 0x3b, 0xe8, 0x52, 0x53, 0xe9, 0x3a, 0x80, 0x38, 0x82, 0x51, 0xeb, 0xea, 0x50, 0x83, 0x39, 0xee, 0x54, 0x87, 0x3d, 0x3c, 0x86, 0x55, 0xef, 0x57, 0xed, 0x3e, 0x84, 0x85, 0x3f, 0xec, 0x56, 0xfe, 0x44, 0x97, 0x2d, 0x2c, 0x96, 0x45, 0xff, 0x47, 0xfd, 0x2e, 0x94, 0x95, 0x2f, 0xfc, 0x46, 0x91, 0x2b, 0xf8, 0x42, 0x43, 0xf9, 0x2a, 0x90, 0x28, 0x92, 0x41, 0xfb, 0xfa, 0x40, 0x93, 0x29, 0x20, 0x9a, 0x49, 0xf3, 0xf2, 0x48, 0x9b, 0x21, 0x99, 0x23, 0xf0, 0x4a, 0x4b, 0xf1, 0x22, 0x98, 0x4f, 0xf5, 0x26, 0x9c, 0x9d, 0x27, 0xf4, 0x4e, 0xf6, 0x4c, 0x9f, 0x25, 0x24, 0x9e, 0x4d, 0xf7},
+ {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7, 0x7f, 0xc4, 0x14, 0xaf, 0xa9, 0x12, 0xc2, 0x79, 0xce, 0x75, 0xa5, 0x1e, 0x18, 0xa3, 0x73, 0xc8, 0xfe, 0x45, 0x95, 0x2e, 0x28, 0x93, 0x43, 0xf8, 0x4f, 0xf4, 0x24, 0x9f, 0x99, 0x22, 0xf2, 0x49, 0x81, 0x3a, 0xea, 0x51, 0x57, 0xec, 0x3c, 0x87, 0x30, 0x8b, 0x5b, 0xe0, 0xe6, 0x5d, 0x8d, 0x36, 0xe1, 0x5a, 0x8a, 0x31, 0x37, 0x8c, 0x5c, 0xe7, 0x50, 0xeb, 0x3b, 0x80, 0x86, 0x3d, 0xed, 0x56, 0x9e, 0x25, 0xf5, 0x4e, 0x48, 0xf3, 0x23, 0x98, 0x2f, 0x94, 0x44, 0xff, 0xf9, 0x42, 0x92, 0x29, 0x1f, 0xa4, 0x74, 0xcf, 0xc9, 0x72, 0xa2, 0x19, 0xae, 0x15, 0xc5, 0x7e, 0x78, 0xc3, 0x13, 0xa8, 0x60, 0xdb, 0xb, 0xb0, 0xb6, 0xd, 0xdd, 0x66, 0xd1, 0x6a, 0xba, 0x1, 0x7, 0xbc, 0x6c, 0xd7, 0xdf, 0x64, 0xb4, 0xf, 0x9, 0xb2, 0x62, 0xd9, 0x6e, 0xd5, 0x5, 0xbe, 0xb8, 0x3, 0xd3, 0x68, 0xa0, 0x1b, 0xcb, 0x70, 0x76, 0xcd, 0x1d, 0xa6, 0x11, 0xaa, 0x7a, 0xc1, 0xc7, 0x7c, 0xac, 0x17, 0x21, 0x9a, 0x4a, 0xf1, 0xf7, 0x4c, 0x9c, 0x27, 0x90, 0x2b, 0xfb, 0x40, 0x46, 0xfd, 0x2d, 0x96, 0x5e, 0xe5, 0x35, 0x8e, 0x88, 0x33, 0xe3, 0x58, 0xef, 0x54, 0x84, 0x3f, 0x39, 0x82, 0x52, 0xe9, 0x3e, 0x85, 0x55, 0xee, 0xe8, 0x53, 0x83, 0x38, 0x8f, 0x34, 0xe4, 0x5f, 0x59, 0xe2, 0x32, 0x89, 0x41, 0xfa, 0x2a, 0x91, 0x97, 0x2c, 0xfc, 0x47, 0xf0, 0x4b, 0x9b, 0x20, 0x26, 0x9d, 0x4d, 0xf6, 0xc0, 0x7b, 0xab, 0x10, 0x16, 0xad, 0x7d, 0xc6, 0x71, 0xca, 0x1a, 0xa1, 0xa7, 0x1c, 0xcc, 0x77, 0xbf, 0x4, 0xd4, 0x6f, 0x69, 0xd2, 0x2, 0xb9, 0xe, 0xb5, 0x65, 0xde, 0xd8, 0x63, 0xb3, 0x8},
+ {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a, 0xf, 0xb3, 0x6a, 0xd6, 0xc5, 0x79, 0xa0, 0x1c, 0x86, 0x3a, 0xe3, 0x5f, 0x4c, 0xf0, 0x29, 0x95, 0x1e, 0xa2, 0x7b, 0xc7, 0xd4, 0x68, 0xb1, 0xd, 0x97, 0x2b, 0xf2, 0x4e, 0x5d, 0xe1, 0x38, 0x84, 0x11, 0xad, 0x74, 0xc8, 0xdb, 0x67, 0xbe, 0x2, 0x98, 0x24, 0xfd, 0x41, 0x52, 0xee, 0x37, 0x8b, 0x3c, 0x80, 0x59, 0xe5, 0xf6, 0x4a, 0x93, 0x2f, 0xb5, 0x9, 0xd0, 0x6c, 0x7f, 0xc3, 0x1a, 0xa6, 0x33, 0x8f, 0x56, 0xea, 0xf9, 0x45, 0x9c, 0x20, 0xba, 0x6, 0xdf, 0x63, 0x70, 0xcc, 0x15, 0xa9, 0x22, 0x9e, 0x47, 0xfb, 0xe8, 0x54, 0x8d, 0x31, 0xab, 0x17, 0xce, 0x72, 0x61, 0xdd, 0x4, 0xb8, 0x2d, 0x91, 0x48, 0xf4, 0xe7, 0x5b, 0x82, 0x3e, 0xa4, 0x18, 0xc1, 0x7d, 0x6e, 0xd2, 0xb, 0xb7, 0x78, 0xc4, 0x1d, 0xa1, 0xb2, 0xe, 0xd7, 0x6b, 0xf1, 0x4d, 0x94, 0x28, 0x3b, 0x87, 0x5e, 0xe2, 0x77, 0xcb, 0x12, 0xae, 0xbd, 0x1, 0xd8, 0x64, 0xfe, 0x42, 0x9b, 0x27, 0x34, 0x88, 0x51, 0xed, 0x66, 0xda, 0x3, 0xbf, 0xac, 0x10, 0xc9, 0x75, 0xef, 0x53, 0x8a, 0x36, 0x25, 0x99, 0x40, 0xfc, 0x69, 0xd5, 0xc, 0xb0, 0xa3, 0x1f, 0xc6, 0x7a, 0xe0, 0x5c, 0x85, 0x39, 0x2a, 0x96, 0x4f, 0xf3, 0x44, 0xf8, 0x21, 0x9d, 0x8e, 0x32, 0xeb, 0x57, 0xcd, 0x71, 0xa8, 0x14, 0x7, 0xbb, 0x62, 0xde, 0x4b, 0xf7, 0x2e, 0x92, 0x81, 0x3d, 0xe4, 0x58, 0xc2, 0x7e, 0xa7, 0x1b, 0x8, 0xb4, 0x6d, 0xd1, 0x5a, 0xe6, 0x3f, 0x83, 0x90, 0x2c, 0xf5, 0x49, 0xd3, 0x6f, 0xb6, 0xa, 0x19, 0xa5, 0x7c, 0xc0, 0x55, 0xe9, 0x30, 0x8c, 0x9f, 0x23, 0xfa, 0x46, 0xdc, 0x60, 0xb9, 0x5, 0x16, 0xaa, 0x73, 0xcf},
+ {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95, 0x1f, 0xa2, 0x78, 0xc5, 0xd1, 0x6c, 0xb6, 0xb, 0x9e, 0x23, 0xf9, 0x44, 0x50, 0xed, 0x37, 0x8a, 0x3e, 0x83, 0x59, 0xe4, 0xf0, 0x4d, 0x97, 0x2a, 0xbf, 0x2, 0xd8, 0x65, 0x71, 0xcc, 0x16, 0xab, 0x21, 0x9c, 0x46, 0xfb, 0xef, 0x52, 0x88, 0x35, 0xa0, 0x1d, 0xc7, 0x7a, 0x6e, 0xd3, 0x9, 0xb4, 0x7c, 0xc1, 0x1b, 0xa6, 0xb2, 0xf, 0xd5, 0x68, 0xfd, 0x40, 0x9a, 0x27, 0x33, 0x8e, 0x54, 0xe9, 0x63, 0xde, 0x4, 0xb9, 0xad, 0x10, 0xca, 0x77, 0xe2, 0x5f, 0x85, 0x38, 0x2c, 0x91, 0x4b, 0xf6, 0x42, 0xff, 0x25, 0x98, 0x8c, 0x31, 0xeb, 0x56, 0xc3, 0x7e, 0xa4, 0x19, 0xd, 0xb0, 0x6a, 0xd7, 0x5d, 0xe0, 0x3a, 0x87, 0x93, 0x2e, 0xf4, 0x49, 0xdc, 0x61, 0xbb, 0x6, 0x12, 0xaf, 0x75, 0xc8, 0xf8, 0x45, 0x9f, 0x22, 0x36, 0x8b, 0x51, 0xec, 0x79, 0xc4, 0x1e, 0xa3, 0xb7, 0xa, 0xd0, 0x6d, 0xe7, 0x5a, 0x80, 0x3d, 0x29, 0x94, 0x4e, 0xf3, 0x66, 0xdb, 0x1, 0xbc, 0xa8, 0x15, 0xcf, 0x72, 0xc6, 0x7b, 0xa1, 0x1c, 0x8, 0xb5, 0x6f, 0xd2, 0x47, 0xfa, 0x20, 0x9d, 0x89, 0x34, 0xee, 0x53, 0xd9, 0x64, 0xbe, 0x3, 0x17, 0xaa, 0x70, 0xcd, 0x58, 0xe5, 0x3f, 0x82, 0x96, 0x2b, 0xf1, 0x4c, 0x84, 0x39, 0xe3, 0x5e, 0x4a, 0xf7, 0x2d, 0x90, 0x5, 0xb8, 0x62, 0xdf, 0xcb, 0x76, 0xac, 0x11, 0x9b, 0x26, 0xfc, 0x41, 0x55, 0xe8, 0x32, 0x8f, 0x1a, 0xa7, 0x7d, 0xc0, 0xd4, 0x69, 0xb3, 0xe, 0xba, 0x7, 0xdd, 0x60, 0x74, 0xc9, 0x13, 0xae, 0x3b, 0x86, 0x5c, 0xe1, 0xf5, 0x48, 0x92, 0x2f, 0xa5, 0x18, 0xc2, 0x7f, 0x6b, 0xd6, 0xc, 0xb1, 0x24, 0x99, 0x43, 0xfe, 0xea, 0x57, 0x8d, 0x30},
+ {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84, 0x2f, 0x91, 0x4e, 0xf0, 0xed, 0x53, 0x8c, 0x32, 0xb6, 0x8, 0xd7, 0x69, 0x74, 0xca, 0x15, 0xab, 0x5e, 0xe0, 0x3f, 0x81, 0x9c, 0x22, 0xfd, 0x43, 0xc7, 0x79, 0xa6, 0x18, 0x5, 0xbb, 0x64, 0xda, 0x71, 0xcf, 0x10, 0xae, 0xb3, 0xd, 0xd2, 0x6c, 0xe8, 0x56, 0x89, 0x37, 0x2a, 0x94, 0x4b, 0xf5, 0xbc, 0x2, 0xdd, 0x63, 0x7e, 0xc0, 0x1f, 0xa1, 0x25, 0x9b, 0x44, 0xfa, 0xe7, 0x59, 0x86, 0x38, 0x93, 0x2d, 0xf2, 0x4c, 0x51, 0xef, 0x30, 0x8e, 0xa, 0xb4, 0x6b, 0xd5, 0xc8, 0x76, 0xa9, 0x17, 0xe2, 0x5c, 0x83, 0x3d, 0x20, 0x9e, 0x41, 0xff, 0x7b, 0xc5, 0x1a, 0xa4, 0xb9, 0x7, 0xd8, 0x66, 0xcd, 0x73, 0xac, 0x12, 0xf, 0xb1, 0x6e, 0xd0, 0x54, 0xea, 0x35, 0x8b, 0x96, 0x28, 0xf7, 0x49, 0x65, 0xdb, 0x4, 0xba, 0xa7, 0x19, 0xc6, 0x78, 0xfc, 0x42, 0x9d, 0x23, 0x3e, 0x80, 0x5f, 0xe1, 0x4a, 0xf4, 0x2b, 0x95, 0x88, 0x36, 0xe9, 0x57, 0xd3, 0x6d, 0xb2, 0xc, 0x11, 0xaf, 0x70, 0xce, 0x3b, 0x85, 0x5a, 0xe4, 0xf9, 0x47, 0x98, 0x26, 0xa2, 0x1c, 0xc3, 0x7d, 0x60, 0xde, 0x1, 0xbf, 0x14, 0xaa, 0x75, 0xcb, 0xd6, 0x68, 0xb7, 0x9, 0x8d, 0x33, 0xec, 0x52, 0x4f, 0xf1, 0x2e, 0x90, 0xd9, 0x67, 0xb8, 0x6, 0x1b, 0xa5, 0x7a, 0xc4, 0x40, 0xfe, 0x21, 0x9f, 0x82, 0x3c, 0xe3, 0x5d, 0xf6, 0x48, 0x97, 0x29, 0x34, 0x8a, 0x55, 0xeb, 0x6f, 0xd1, 0xe, 0xb0, 0xad, 0x13, 0xcc, 0x72, 0x87, 0x39, 0xe6, 0x58, 0x45, 0xfb, 0x24, 0x9a, 0x1e, 0xa0, 0x7f, 0xc1, 0xdc, 0x62, 0xbd, 0x3, 0xa8, 0x16, 0xc9, 0x77, 0x6a, 0xd4, 0xb, 0xb5, 0x31, 0x8f, 0x50, 0xee, 0xf3, 0x4d, 0x92, 0x2c},
+ {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b, 0x3f, 0x80, 0x5c, 0xe3, 0xf9, 0x46, 0x9a, 0x25, 0xae, 0x11, 0xcd, 0x72, 0x68, 0xd7, 0xb, 0xb4, 0x7e, 0xc1, 0x1d, 0xa2, 0xb8, 0x7, 0xdb, 0x64, 0xef, 0x50, 0x8c, 0x33, 0x29, 0x96, 0x4a, 0xf5, 0x41, 0xfe, 0x22, 0x9d, 0x87, 0x38, 0xe4, 0x5b, 0xd0, 0x6f, 0xb3, 0xc, 0x16, 0xa9, 0x75, 0xca, 0xfc, 0x43, 0x9f, 0x20, 0x3a, 0x85, 0x59, 0xe6, 0x6d, 0xd2, 0xe, 0xb1, 0xab, 0x14, 0xc8, 0x77, 0xc3, 0x7c, 0xa0, 0x1f, 0x5, 0xba, 0x66, 0xd9, 0x52, 0xed, 0x31, 0x8e, 0x94, 0x2b, 0xf7, 0x48, 0x82, 0x3d, 0xe1, 0x5e, 0x44, 0xfb, 0x27, 0x98, 0x13, 0xac, 0x70, 0xcf, 0xd5, 0x6a, 0xb6, 0x9, 0xbd, 0x2, 0xde, 0x61, 0x7b, 0xc4, 0x18, 0xa7, 0x2c, 0x93, 0x4f, 0xf0, 0xea, 0x55, 0x89, 0x36, 0xe5, 0x5a, 0x86, 0x39, 0x23, 0x9c, 0x40, 0xff, 0x74, 0xcb, 0x17, 0xa8, 0xb2, 0xd, 0xd1, 0x6e, 0xda, 0x65, 0xb9, 0x6, 0x1c, 0xa3, 0x7f, 0xc0, 0x4b, 0xf4, 0x28, 0x97, 0x8d, 0x32, 0xee, 0x51, 0x9b, 0x24, 0xf8, 0x47, 0x5d, 0xe2, 0x3e, 0x81, 0xa, 0xb5, 0x69, 0xd6, 0xcc, 0x73, 0xaf, 0x10, 0xa4, 0x1b, 0xc7, 0x78, 0x62, 0xdd, 0x1, 0xbe, 0x35, 0x8a, 0x56, 0xe9, 0xf3, 0x4c, 0x90, 0x2f, 0x19, 0xa6, 0x7a, 0xc5, 0xdf, 0x60, 0xbc, 0x3, 0x88, 0x37, 0xeb, 0x54, 0x4e, 0xf1, 0x2d, 0x92, 0x26, 0x99, 0x45, 0xfa, 0xe0, 0x5f, 0x83, 0x3c, 0xb7, 0x8, 0xd4, 0x6b, 0x71, 0xce, 0x12, 0xad, 0x67, 0xd8, 0x4, 0xbb, 0xa1, 0x1e, 0xc2, 0x7d, 0xf6, 0x49, 0x95, 0x2a, 0x30, 0x8f, 0x53, 0xec, 0x58, 0xe7, 0x3b, 0x84, 0x9e, 0x21, 0xfd, 0x42, 0xc9, 0x76, 0xaa, 0x15, 0xf, 0xb0, 0x6c, 0xd3},
+ {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34, 0x9c, 0x5c, 0x1, 0xc1, 0xbb, 0x7b, 0x26, 0xe6, 0xd2, 0x12, 0x4f, 0x8f, 0xf5, 0x35, 0x68, 0xa8, 0x25, 0xe5, 0xb8, 0x78, 0x2, 0xc2, 0x9f, 0x5f, 0x6b, 0xab, 0xf6, 0x36, 0x4c, 0x8c, 0xd1, 0x11, 0xb9, 0x79, 0x24, 0xe4, 0x9e, 0x5e, 0x3, 0xc3, 0xf7, 0x37, 0x6a, 0xaa, 0xd0, 0x10, 0x4d, 0x8d, 0x4a, 0x8a, 0xd7, 0x17, 0x6d, 0xad, 0xf0, 0x30, 0x4, 0xc4, 0x99, 0x59, 0x23, 0xe3, 0xbe, 0x7e, 0xd6, 0x16, 0x4b, 0x8b, 0xf1, 0x31, 0x6c, 0xac, 0x98, 0x58, 0x5, 0xc5, 0xbf, 0x7f, 0x22, 0xe2, 0x6f, 0xaf, 0xf2, 0x32, 0x48, 0x88, 0xd5, 0x15, 0x21, 0xe1, 0xbc, 0x7c, 0x6, 0xc6, 0x9b, 0x5b, 0xf3, 0x33, 0x6e, 0xae, 0xd4, 0x14, 0x49, 0x89, 0xbd, 0x7d, 0x20, 0xe0, 0x9a, 0x5a, 0x7, 0xc7, 0x94, 0x54, 0x9, 0xc9, 0xb3, 0x73, 0x2e, 0xee, 0xda, 0x1a, 0x47, 0x87, 0xfd, 0x3d, 0x60, 0xa0, 0x8, 0xc8, 0x95, 0x55, 0x2f, 0xef, 0xb2, 0x72, 0x46, 0x86, 0xdb, 0x1b, 0x61, 0xa1, 0xfc, 0x3c, 0xb1, 0x71, 0x2c, 0xec, 0x96, 0x56, 0xb, 0xcb, 0xff, 0x3f, 0x62, 0xa2, 0xd8, 0x18, 0x45, 0x85, 0x2d, 0xed, 0xb0, 0x70, 0xa, 0xca, 0x97, 0x57, 0x63, 0xa3, 0xfe, 0x3e, 0x44, 0x84, 0xd9, 0x19, 0xde, 0x1e, 0x43, 0x83, 0xf9, 0x39, 0x64, 0xa4, 0x90, 0x50, 0xd, 0xcd, 0xb7, 0x77, 0x2a, 0xea, 0x42, 0x82, 0xdf, 0x1f, 0x65, 0xa5, 0xf8, 0x38, 0xc, 0xcc, 0x91, 0x51, 0x2b, 0xeb, 0xb6, 0x76, 0xfb, 0x3b, 0x66, 0xa6, 0xdc, 0x1c, 0x41, 0x81, 0xb5, 0x75, 0x28, 0xe8, 0x92, 0x52, 0xf, 0xcf, 0x67, 0xa7, 0xfa, 0x3a, 0x40, 0x80, 0xdd, 0x1d, 0x29, 0xe9, 0xb4, 0x74, 0xe, 0xce, 0x93, 0x53},
+ {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b, 0x8c, 0x4d, 0x13, 0xd2, 0xaf, 0x6e, 0x30, 0xf1, 0xca, 0xb, 0x55, 0x94, 0xe9, 0x28, 0x76, 0xb7, 0x5, 0xc4, 0x9a, 0x5b, 0x26, 0xe7, 0xb9, 0x78, 0x43, 0x82, 0xdc, 0x1d, 0x60, 0xa1, 0xff, 0x3e, 0x89, 0x48, 0x16, 0xd7, 0xaa, 0x6b, 0x35, 0xf4, 0xcf, 0xe, 0x50, 0x91, 0xec, 0x2d, 0x73, 0xb2, 0xa, 0xcb, 0x95, 0x54, 0x29, 0xe8, 0xb6, 0x77, 0x4c, 0x8d, 0xd3, 0x12, 0x6f, 0xae, 0xf0, 0x31, 0x86, 0x47, 0x19, 0xd8, 0xa5, 0x64, 0x3a, 0xfb, 0xc0, 0x1, 0x5f, 0x9e, 0xe3, 0x22, 0x7c, 0xbd, 0xf, 0xce, 0x90, 0x51, 0x2c, 0xed, 0xb3, 0x72, 0x49, 0x88, 0xd6, 0x17, 0x6a, 0xab, 0xf5, 0x34, 0x83, 0x42, 0x1c, 0xdd, 0xa0, 0x61, 0x3f, 0xfe, 0xc5, 0x4, 0x5a, 0x9b, 0xe6, 0x27, 0x79, 0xb8, 0x14, 0xd5, 0x8b, 0x4a, 0x37, 0xf6, 0xa8, 0x69, 0x52, 0x93, 0xcd, 0xc, 0x71, 0xb0, 0xee, 0x2f, 0x98, 0x59, 0x7, 0xc6, 0xbb, 0x7a, 0x24, 0xe5, 0xde, 0x1f, 0x41, 0x80, 0xfd, 0x3c, 0x62, 0xa3, 0x11, 0xd0, 0x8e, 0x4f, 0x32, 0xf3, 0xad, 0x6c, 0x57, 0x96, 0xc8, 0x9, 0x74, 0xb5, 0xeb, 0x2a, 0x9d, 0x5c, 0x2, 0xc3, 0xbe, 0x7f, 0x21, 0xe0, 0xdb, 0x1a, 0x44, 0x85, 0xf8, 0x39, 0x67, 0xa6, 0x1e, 0xdf, 0x81, 0x40, 0x3d, 0xfc, 0xa2, 0x63, 0x58, 0x99, 0xc7, 0x6, 0x7b, 0xba, 0xe4, 0x25, 0x92, 0x53, 0xd, 0xcc, 0xb1, 0x70, 0x2e, 0xef, 0xd4, 0x15, 0x4b, 0x8a, 0xf7, 0x36, 0x68, 0xa9, 0x1b, 0xda, 0x84, 0x45, 0x38, 0xf9, 0xa7, 0x66, 0x5d, 0x9c, 0xc2, 0x3, 0x7e, 0xbf, 0xe1, 0x20, 0x97, 0x56, 0x8, 0xc9, 0xb4, 0x75, 0x2b, 0xea, 0xd1, 0x10, 0x4e, 0x8f, 0xf2, 0x33, 0x6d, 0xac},
+ {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a, 0xbc, 0x7e, 0x25, 0xe7, 0x93, 0x51, 0xa, 0xc8, 0xe2, 0x20, 0x7b, 0xb9, 0xcd, 0xf, 0x54, 0x96, 0x65, 0xa7, 0xfc, 0x3e, 0x4a, 0x88, 0xd3, 0x11, 0x3b, 0xf9, 0xa2, 0x60, 0x14, 0xd6, 0x8d, 0x4f, 0xd9, 0x1b, 0x40, 0x82, 0xf6, 0x34, 0x6f, 0xad, 0x87, 0x45, 0x1e, 0xdc, 0xa8, 0x6a, 0x31, 0xf3, 0xca, 0x8, 0x53, 0x91, 0xe5, 0x27, 0x7c, 0xbe, 0x94, 0x56, 0xd, 0xcf, 0xbb, 0x79, 0x22, 0xe0, 0x76, 0xb4, 0xef, 0x2d, 0x59, 0x9b, 0xc0, 0x2, 0x28, 0xea, 0xb1, 0x73, 0x7, 0xc5, 0x9e, 0x5c, 0xaf, 0x6d, 0x36, 0xf4, 0x80, 0x42, 0x19, 0xdb, 0xf1, 0x33, 0x68, 0xaa, 0xde, 0x1c, 0x47, 0x85, 0x13, 0xd1, 0x8a, 0x48, 0x3c, 0xfe, 0xa5, 0x67, 0x4d, 0x8f, 0xd4, 0x16, 0x62, 0xa0, 0xfb, 0x39, 0x89, 0x4b, 0x10, 0xd2, 0xa6, 0x64, 0x3f, 0xfd, 0xd7, 0x15, 0x4e, 0x8c, 0xf8, 0x3a, 0x61, 0xa3, 0x35, 0xf7, 0xac, 0x6e, 0x1a, 0xd8, 0x83, 0x41, 0x6b, 0xa9, 0xf2, 0x30, 0x44, 0x86, 0xdd, 0x1f, 0xec, 0x2e, 0x75, 0xb7, 0xc3, 0x1, 0x5a, 0x98, 0xb2, 0x70, 0x2b, 0xe9, 0x9d, 0x5f, 0x4, 0xc6, 0x50, 0x92, 0xc9, 0xb, 0x7f, 0xbd, 0xe6, 0x24, 0xe, 0xcc, 0x97, 0x55, 0x21, 0xe3, 0xb8, 0x7a, 0x43, 0x81, 0xda, 0x18, 0x6c, 0xae, 0xf5, 0x37, 0x1d, 0xdf, 0x84, 0x46, 0x32, 0xf0, 0xab, 0x69, 0xff, 0x3d, 0x66, 0xa4, 0xd0, 0x12, 0x49, 0x8b, 0xa1, 0x63, 0x38, 0xfa, 0x8e, 0x4c, 0x17, 0xd5, 0x26, 0xe4, 0xbf, 0x7d, 0x9, 0xcb, 0x90, 0x52, 0x78, 0xba, 0xe1, 0x23, 0x57, 0x95, 0xce, 0xc, 0x9a, 0x58, 0x3, 0xc1, 0xb5, 0x77, 0x2c, 0xee, 0xc4, 0x6, 0x5d, 0x9f, 0xeb, 0x29, 0x72, 0xb0},
+ {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25, 0xac, 0x6f, 0x37, 0xf4, 0x87, 0x44, 0x1c, 0xdf, 0xfa, 0x39, 0x61, 0xa2, 0xd1, 0x12, 0x4a, 0x89, 0x45, 0x86, 0xde, 0x1d, 0x6e, 0xad, 0xf5, 0x36, 0x13, 0xd0, 0x88, 0x4b, 0x38, 0xfb, 0xa3, 0x60, 0xe9, 0x2a, 0x72, 0xb1, 0xc2, 0x1, 0x59, 0x9a, 0xbf, 0x7c, 0x24, 0xe7, 0x94, 0x57, 0xf, 0xcc, 0x8a, 0x49, 0x11, 0xd2, 0xa1, 0x62, 0x3a, 0xf9, 0xdc, 0x1f, 0x47, 0x84, 0xf7, 0x34, 0x6c, 0xaf, 0x26, 0xe5, 0xbd, 0x7e, 0xd, 0xce, 0x96, 0x55, 0x70, 0xb3, 0xeb, 0x28, 0x5b, 0x98, 0xc0, 0x3, 0xcf, 0xc, 0x54, 0x97, 0xe4, 0x27, 0x7f, 0xbc, 0x99, 0x5a, 0x2, 0xc1, 0xb2, 0x71, 0x29, 0xea, 0x63, 0xa0, 0xf8, 0x3b, 0x48, 0x8b, 0xd3, 0x10, 0x35, 0xf6, 0xae, 0x6d, 0x1e, 0xdd, 0x85, 0x46, 0x9, 0xca, 0x92, 0x51, 0x22, 0xe1, 0xb9, 0x7a, 0x5f, 0x9c, 0xc4, 0x7, 0x74, 0xb7, 0xef, 0x2c, 0xa5, 0x66, 0x3e, 0xfd, 0x8e, 0x4d, 0x15, 0xd6, 0xf3, 0x30, 0x68, 0xab, 0xd8, 0x1b, 0x43, 0x80, 0x4c, 0x8f, 0xd7, 0x14, 0x67, 0xa4, 0xfc, 0x3f, 0x1a, 0xd9, 0x81, 0x42, 0x31, 0xf2, 0xaa, 0x69, 0xe0, 0x23, 0x7b, 0xb8, 0xcb, 0x8, 0x50, 0x93, 0xb6, 0x75, 0x2d, 0xee, 0x9d, 0x5e, 0x6, 0xc5, 0x83, 0x40, 0x18, 0xdb, 0xa8, 0x6b, 0x33, 0xf0, 0xd5, 0x16, 0x4e, 0x8d, 0xfe, 0x3d, 0x65, 0xa6, 0x2f, 0xec, 0xb4, 0x77, 0x4, 0xc7, 0x9f, 0x5c, 0x79, 0xba, 0xe2, 0x21, 0x52, 0x91, 0xc9, 0xa, 0xc6, 0x5, 0x5d, 0x9e, 0xed, 0x2e, 0x76, 0xb5, 0x90, 0x53, 0xb, 0xc8, 0xbb, 0x78, 0x20, 0xe3, 0x6a, 0xa9, 0xf1, 0x32, 0x41, 0x82, 0xda, 0x19, 0x3c, 0xff, 0xa7, 0x64, 0x17, 0xd4, 0x8c, 0x4f},
+ {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8, 0xdc, 0x18, 0x49, 0x8d, 0xeb, 0x2f, 0x7e, 0xba, 0xb2, 0x76, 0x27, 0xe3, 0x85, 0x41, 0x10, 0xd4, 0xa5, 0x61, 0x30, 0xf4, 0x92, 0x56, 0x7, 0xc3, 0xcb, 0xf, 0x5e, 0x9a, 0xfc, 0x38, 0x69, 0xad, 0x79, 0xbd, 0xec, 0x28, 0x4e, 0x8a, 0xdb, 0x1f, 0x17, 0xd3, 0x82, 0x46, 0x20, 0xe4, 0xb5, 0x71, 0x57, 0x93, 0xc2, 0x6, 0x60, 0xa4, 0xf5, 0x31, 0x39, 0xfd, 0xac, 0x68, 0xe, 0xca, 0x9b, 0x5f, 0x8b, 0x4f, 0x1e, 0xda, 0xbc, 0x78, 0x29, 0xed, 0xe5, 0x21, 0x70, 0xb4, 0xd2, 0x16, 0x47, 0x83, 0xf2, 0x36, 0x67, 0xa3, 0xc5, 0x1, 0x50, 0x94, 0x9c, 0x58, 0x9, 0xcd, 0xab, 0x6f, 0x3e, 0xfa, 0x2e, 0xea, 0xbb, 0x7f, 0x19, 0xdd, 0x8c, 0x48, 0x40, 0x84, 0xd5, 0x11, 0x77, 0xb3, 0xe2, 0x26, 0xae, 0x6a, 0x3b, 0xff, 0x99, 0x5d, 0xc, 0xc8, 0xc0, 0x4, 0x55, 0x91, 0xf7, 0x33, 0x62, 0xa6, 0x72, 0xb6, 0xe7, 0x23, 0x45, 0x81, 0xd0, 0x14, 0x1c, 0xd8, 0x89, 0x4d, 0x2b, 0xef, 0xbe, 0x7a, 0xb, 0xcf, 0x9e, 0x5a, 0x3c, 0xf8, 0xa9, 0x6d, 0x65, 0xa1, 0xf0, 0x34, 0x52, 0x96, 0xc7, 0x3, 0xd7, 0x13, 0x42, 0x86, 0xe0, 0x24, 0x75, 0xb1, 0xb9, 0x7d, 0x2c, 0xe8, 0x8e, 0x4a, 0x1b, 0xdf, 0xf9, 0x3d, 0x6c, 0xa8, 0xce, 0xa, 0x5b, 0x9f, 0x97, 0x53, 0x2, 0xc6, 0xa0, 0x64, 0x35, 0xf1, 0x25, 0xe1, 0xb0, 0x74, 0x12, 0xd6, 0x87, 0x43, 0x4b, 0x8f, 0xde, 0x1a, 0x7c, 0xb8, 0xe9, 0x2d, 0x5c, 0x98, 0xc9, 0xd, 0x6b, 0xaf, 0xfe, 0x3a, 0x32, 0xf6, 0xa7, 0x63, 0x5, 0xc1, 0x90, 0x54, 0x80, 0x44, 0x15, 0xd1, 0xb7, 0x73, 0x22, 0xe6, 0xee, 0x2a, 0x7b, 0xbf, 0xd9, 0x1d, 0x4c, 0x88},
+ {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7, 0xcc, 0x9, 0x5b, 0x9e, 0xff, 0x3a, 0x68, 0xad, 0xaa, 0x6f, 0x3d, 0xf8, 0x99, 0x5c, 0xe, 0xcb, 0x85, 0x40, 0x12, 0xd7, 0xb6, 0x73, 0x21, 0xe4, 0xe3, 0x26, 0x74, 0xb1, 0xd0, 0x15, 0x47, 0x82, 0x49, 0x8c, 0xde, 0x1b, 0x7a, 0xbf, 0xed, 0x28, 0x2f, 0xea, 0xb8, 0x7d, 0x1c, 0xd9, 0x8b, 0x4e, 0x17, 0xd2, 0x80, 0x45, 0x24, 0xe1, 0xb3, 0x76, 0x71, 0xb4, 0xe6, 0x23, 0x42, 0x87, 0xd5, 0x10, 0xdb, 0x1e, 0x4c, 0x89, 0xe8, 0x2d, 0x7f, 0xba, 0xbd, 0x78, 0x2a, 0xef, 0x8e, 0x4b, 0x19, 0xdc, 0x92, 0x57, 0x5, 0xc0, 0xa1, 0x64, 0x36, 0xf3, 0xf4, 0x31, 0x63, 0xa6, 0xc7, 0x2, 0x50, 0x95, 0x5e, 0x9b, 0xc9, 0xc, 0x6d, 0xa8, 0xfa, 0x3f, 0x38, 0xfd, 0xaf, 0x6a, 0xb, 0xce, 0x9c, 0x59, 0x2e, 0xeb, 0xb9, 0x7c, 0x1d, 0xd8, 0x8a, 0x4f, 0x48, 0x8d, 0xdf, 0x1a, 0x7b, 0xbe, 0xec, 0x29, 0xe2, 0x27, 0x75, 0xb0, 0xd1, 0x14, 0x46, 0x83, 0x84, 0x41, 0x13, 0xd6, 0xb7, 0x72, 0x20, 0xe5, 0xab, 0x6e, 0x3c, 0xf9, 0x98, 0x5d, 0xf, 0xca, 0xcd, 0x8, 0x5a, 0x9f, 0xfe, 0x3b, 0x69, 0xac, 0x67, 0xa2, 0xf0, 0x35, 0x54, 0x91, 0xc3, 0x6, 0x1, 0xc4, 0x96, 0x53, 0x32, 0xf7, 0xa5, 0x60, 0x39, 0xfc, 0xae, 0x6b, 0xa, 0xcf, 0x9d, 0x58, 0x5f, 0x9a, 0xc8, 0xd, 0x6c, 0xa9, 0xfb, 0x3e, 0xf5, 0x30, 0x62, 0xa7, 0xc6, 0x3, 0x51, 0x94, 0x93, 0x56, 0x4, 0xc1, 0xa0, 0x65, 0x37, 0xf2, 0xbc, 0x79, 0x2b, 0xee, 0x8f, 0x4a, 0x18, 0xdd, 0xda, 0x1f, 0x4d, 0x88, 0xe9, 0x2c, 0x7e, 0xbb, 0x70, 0xb5, 0xe7, 0x22, 0x43, 0x86, 0xd4, 0x11, 0x16, 0xd3, 0x81, 0x44, 0x25, 0xe0, 0xb2, 0x77},
+ {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16, 0xfc, 0x3a, 0x6d, 0xab, 0xc3, 0x5, 0x52, 0x94, 0x82, 0x44, 0x13, 0xd5, 0xbd, 0x7b, 0x2c, 0xea, 0xe5, 0x23, 0x74, 0xb2, 0xda, 0x1c, 0x4b, 0x8d, 0x9b, 0x5d, 0xa, 0xcc, 0xa4, 0x62, 0x35, 0xf3, 0x19, 0xdf, 0x88, 0x4e, 0x26, 0xe0, 0xb7, 0x71, 0x67, 0xa1, 0xf6, 0x30, 0x58, 0x9e, 0xc9, 0xf, 0xd7, 0x11, 0x46, 0x80, 0xe8, 0x2e, 0x79, 0xbf, 0xa9, 0x6f, 0x38, 0xfe, 0x96, 0x50, 0x7, 0xc1, 0x2b, 0xed, 0xba, 0x7c, 0x14, 0xd2, 0x85, 0x43, 0x55, 0x93, 0xc4, 0x2, 0x6a, 0xac, 0xfb, 0x3d, 0x32, 0xf4, 0xa3, 0x65, 0xd, 0xcb, 0x9c, 0x5a, 0x4c, 0x8a, 0xdd, 0x1b, 0x73, 0xb5, 0xe2, 0x24, 0xce, 0x8, 0x5f, 0x99, 0xf1, 0x37, 0x60, 0xa6, 0xb0, 0x76, 0x21, 0xe7, 0x8f, 0x49, 0x1e, 0xd8, 0xb3, 0x75, 0x22, 0xe4, 0x8c, 0x4a, 0x1d, 0xdb, 0xcd, 0xb, 0x5c, 0x9a, 0xf2, 0x34, 0x63, 0xa5, 0x4f, 0x89, 0xde, 0x18, 0x70, 0xb6, 0xe1, 0x27, 0x31, 0xf7, 0xa0, 0x66, 0xe, 0xc8, 0x9f, 0x59, 0x56, 0x90, 0xc7, 0x1, 0x69, 0xaf, 0xf8, 0x3e, 0x28, 0xee, 0xb9, 0x7f, 0x17, 0xd1, 0x86, 0x40, 0xaa, 0x6c, 0x3b, 0xfd, 0x95, 0x53, 0x4, 0xc2, 0xd4, 0x12, 0x45, 0x83, 0xeb, 0x2d, 0x7a, 0xbc, 0x64, 0xa2, 0xf5, 0x33, 0x5b, 0x9d, 0xca, 0xc, 0x1a, 0xdc, 0x8b, 0x4d, 0x25, 0xe3, 0xb4, 0x72, 0x98, 0x5e, 0x9, 0xcf, 0xa7, 0x61, 0x36, 0xf0, 0xe6, 0x20, 0x77, 0xb1, 0xd9, 0x1f, 0x48, 0x8e, 0x81, 0x47, 0x10, 0xd6, 0xbe, 0x78, 0x2f, 0xe9, 0xff, 0x39, 0x6e, 0xa8, 0xc0, 0x6, 0x51, 0x97, 0x7d, 0xbb, 0xec, 0x2a, 0x42, 0x84, 0xd3, 0x15, 0x3, 0xc5, 0x92, 0x54, 0x3c, 0xfa, 0xad, 0x6b},
+ {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19, 0xec, 0x2b, 0x7f, 0xb8, 0xd7, 0x10, 0x44, 0x83, 0x9a, 0x5d, 0x9, 0xce, 0xa1, 0x66, 0x32, 0xf5, 0xc5, 0x2, 0x56, 0x91, 0xfe, 0x39, 0x6d, 0xaa, 0xb3, 0x74, 0x20, 0xe7, 0x88, 0x4f, 0x1b, 0xdc, 0x29, 0xee, 0xba, 0x7d, 0x12, 0xd5, 0x81, 0x46, 0x5f, 0x98, 0xcc, 0xb, 0x64, 0xa3, 0xf7, 0x30, 0x97, 0x50, 0x4, 0xc3, 0xac, 0x6b, 0x3f, 0xf8, 0xe1, 0x26, 0x72, 0xb5, 0xda, 0x1d, 0x49, 0x8e, 0x7b, 0xbc, 0xe8, 0x2f, 0x40, 0x87, 0xd3, 0x14, 0xd, 0xca, 0x9e, 0x59, 0x36, 0xf1, 0xa5, 0x62, 0x52, 0x95, 0xc1, 0x6, 0x69, 0xae, 0xfa, 0x3d, 0x24, 0xe3, 0xb7, 0x70, 0x1f, 0xd8, 0x8c, 0x4b, 0xbe, 0x79, 0x2d, 0xea, 0x85, 0x42, 0x16, 0xd1, 0xc8, 0xf, 0x5b, 0x9c, 0xf3, 0x34, 0x60, 0xa7, 0x33, 0xf4, 0xa0, 0x67, 0x8, 0xcf, 0x9b, 0x5c, 0x45, 0x82, 0xd6, 0x11, 0x7e, 0xb9, 0xed, 0x2a, 0xdf, 0x18, 0x4c, 0x8b, 0xe4, 0x23, 0x77, 0xb0, 0xa9, 0x6e, 0x3a, 0xfd, 0x92, 0x55, 0x1, 0xc6, 0xf6, 0x31, 0x65, 0xa2, 0xcd, 0xa, 0x5e, 0x99, 0x80, 0x47, 0x13, 0xd4, 0xbb, 0x7c, 0x28, 0xef, 0x1a, 0xdd, 0x89, 0x4e, 0x21, 0xe6, 0xb2, 0x75, 0x6c, 0xab, 0xff, 0x38, 0x57, 0x90, 0xc4, 0x3, 0xa4, 0x63, 0x37, 0xf0, 0x9f, 0x58, 0xc, 0xcb, 0xd2, 0x15, 0x41, 0x86, 0xe9, 0x2e, 0x7a, 0xbd, 0x48, 0x8f, 0xdb, 0x1c, 0x73, 0xb4, 0xe0, 0x27, 0x3e, 0xf9, 0xad, 0x6a, 0x5, 0xc2, 0x96, 0x51, 0x61, 0xa6, 0xf2, 0x35, 0x5a, 0x9d, 0xc9, 0xe, 0x17, 0xd0, 0x84, 0x43, 0x2c, 0xeb, 0xbf, 0x78, 0x8d, 0x4a, 0x1e, 0xd9, 0xb6, 0x71, 0x25, 0xe2, 0xfb, 0x3c, 0x68, 0xaf, 0xc0, 0x7, 0x53, 0x94},
+ {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c, 0x1c, 0xd4, 0x91, 0x59, 0x1b, 0xd3, 0x96, 0x5e, 0x12, 0xda, 0x9f, 0x57, 0x15, 0xdd, 0x98, 0x50, 0x38, 0xf0, 0xb5, 0x7d, 0x3f, 0xf7, 0xb2, 0x7a, 0x36, 0xfe, 0xbb, 0x73, 0x31, 0xf9, 0xbc, 0x74, 0x24, 0xec, 0xa9, 0x61, 0x23, 0xeb, 0xae, 0x66, 0x2a, 0xe2, 0xa7, 0x6f, 0x2d, 0xe5, 0xa0, 0x68, 0x70, 0xb8, 0xfd, 0x35, 0x77, 0xbf, 0xfa, 0x32, 0x7e, 0xb6, 0xf3, 0x3b, 0x79, 0xb1, 0xf4, 0x3c, 0x6c, 0xa4, 0xe1, 0x29, 0x6b, 0xa3, 0xe6, 0x2e, 0x62, 0xaa, 0xef, 0x27, 0x65, 0xad, 0xe8, 0x20, 0x48, 0x80, 0xc5, 0xd, 0x4f, 0x87, 0xc2, 0xa, 0x46, 0x8e, 0xcb, 0x3, 0x41, 0x89, 0xcc, 0x4, 0x54, 0x9c, 0xd9, 0x11, 0x53, 0x9b, 0xde, 0x16, 0x5a, 0x92, 0xd7, 0x1f, 0x5d, 0x95, 0xd0, 0x18, 0xe0, 0x28, 0x6d, 0xa5, 0xe7, 0x2f, 0x6a, 0xa2, 0xee, 0x26, 0x63, 0xab, 0xe9, 0x21, 0x64, 0xac, 0xfc, 0x34, 0x71, 0xb9, 0xfb, 0x33, 0x76, 0xbe, 0xf2, 0x3a, 0x7f, 0xb7, 0xf5, 0x3d, 0x78, 0xb0, 0xd8, 0x10, 0x55, 0x9d, 0xdf, 0x17, 0x52, 0x9a, 0xd6, 0x1e, 0x5b, 0x93, 0xd1, 0x19, 0x5c, 0x94, 0xc4, 0xc, 0x49, 0x81, 0xc3, 0xb, 0x4e, 0x86, 0xca, 0x2, 0x47, 0x8f, 0xcd, 0x5, 0x40, 0x88, 0x90, 0x58, 0x1d, 0xd5, 0x97, 0x5f, 0x1a, 0xd2, 0x9e, 0x56, 0x13, 0xdb, 0x99, 0x51, 0x14, 0xdc, 0x8c, 0x44, 0x1, 0xc9, 0x8b, 0x43, 0x6, 0xce, 0x82, 0x4a, 0xf, 0xc7, 0x85, 0x4d, 0x8, 0xc0, 0xa8, 0x60, 0x25, 0xed, 0xaf, 0x67, 0x22, 0xea, 0xa6, 0x6e, 0x2b, 0xe3, 0xa1, 0x69, 0x2c, 0xe4, 0xb4, 0x7c, 0x39, 0xf1, 0xb3, 0x7b, 0x3e, 0xf6, 0xba, 0x72, 0x37, 0xff, 0xbd, 0x75, 0x30, 0xf8},
+ {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43, 0xc, 0xc5, 0x83, 0x4a, 0xf, 0xc6, 0x80, 0x49, 0xa, 0xc3, 0x85, 0x4c, 0x9, 0xc0, 0x86, 0x4f, 0x18, 0xd1, 0x97, 0x5e, 0x1b, 0xd2, 0x94, 0x5d, 0x1e, 0xd7, 0x91, 0x58, 0x1d, 0xd4, 0x92, 0x5b, 0x14, 0xdd, 0x9b, 0x52, 0x17, 0xde, 0x98, 0x51, 0x12, 0xdb, 0x9d, 0x54, 0x11, 0xd8, 0x9e, 0x57, 0x30, 0xf9, 0xbf, 0x76, 0x33, 0xfa, 0xbc, 0x75, 0x36, 0xff, 0xb9, 0x70, 0x35, 0xfc, 0xba, 0x73, 0x3c, 0xf5, 0xb3, 0x7a, 0x3f, 0xf6, 0xb0, 0x79, 0x3a, 0xf3, 0xb5, 0x7c, 0x39, 0xf0, 0xb6, 0x7f, 0x28, 0xe1, 0xa7, 0x6e, 0x2b, 0xe2, 0xa4, 0x6d, 0x2e, 0xe7, 0xa1, 0x68, 0x2d, 0xe4, 0xa2, 0x6b, 0x24, 0xed, 0xab, 0x62, 0x27, 0xee, 0xa8, 0x61, 0x22, 0xeb, 0xad, 0x64, 0x21, 0xe8, 0xae, 0x67, 0x60, 0xa9, 0xef, 0x26, 0x63, 0xaa, 0xec, 0x25, 0x66, 0xaf, 0xe9, 0x20, 0x65, 0xac, 0xea, 0x23, 0x6c, 0xa5, 0xe3, 0x2a, 0x6f, 0xa6, 0xe0, 0x29, 0x6a, 0xa3, 0xe5, 0x2c, 0x69, 0xa0, 0xe6, 0x2f, 0x78, 0xb1, 0xf7, 0x3e, 0x7b, 0xb2, 0xf4, 0x3d, 0x7e, 0xb7, 0xf1, 0x38, 0x7d, 0xb4, 0xf2, 0x3b, 0x74, 0xbd, 0xfb, 0x32, 0x77, 0xbe, 0xf8, 0x31, 0x72, 0xbb, 0xfd, 0x34, 0x71, 0xb8, 0xfe, 0x37, 0x50, 0x99, 0xdf, 0x16, 0x53, 0x9a, 0xdc, 0x15, 0x56, 0x9f, 0xd9, 0x10, 0x55, 0x9c, 0xda, 0x13, 0x5c, 0x95, 0xd3, 0x1a, 0x5f, 0x96, 0xd0, 0x19, 0x5a, 0x93, 0xd5, 0x1c, 0x59, 0x90, 0xd6, 0x1f, 0x48, 0x81, 0xc7, 0xe, 0x4b, 0x82, 0xc4, 0xd, 0x4e, 0x87, 0xc1, 0x8, 0x4d, 0x84, 0xc2, 0xb, 0x44, 0x8d, 0xcb, 0x2, 0x47, 0x8e, 0xc8, 0x1, 0x42, 0x8b, 0xcd, 0x4, 0x41, 0x88, 0xce, 0x7},
+ {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52, 0x3c, 0xf6, 0xb5, 0x7f, 0x33, 0xf9, 0xba, 0x70, 0x22, 0xe8, 0xab, 0x61, 0x2d, 0xe7, 0xa4, 0x6e, 0x78, 0xb2, 0xf1, 0x3b, 0x77, 0xbd, 0xfe, 0x34, 0x66, 0xac, 0xef, 0x25, 0x69, 0xa3, 0xe0, 0x2a, 0x44, 0x8e, 0xcd, 0x7, 0x4b, 0x81, 0xc2, 0x8, 0x5a, 0x90, 0xd3, 0x19, 0x55, 0x9f, 0xdc, 0x16, 0xf0, 0x3a, 0x79, 0xb3, 0xff, 0x35, 0x76, 0xbc, 0xee, 0x24, 0x67, 0xad, 0xe1, 0x2b, 0x68, 0xa2, 0xcc, 0x6, 0x45, 0x8f, 0xc3, 0x9, 0x4a, 0x80, 0xd2, 0x18, 0x5b, 0x91, 0xdd, 0x17, 0x54, 0x9e, 0x88, 0x42, 0x1, 0xcb, 0x87, 0x4d, 0xe, 0xc4, 0x96, 0x5c, 0x1f, 0xd5, 0x99, 0x53, 0x10, 0xda, 0xb4, 0x7e, 0x3d, 0xf7, 0xbb, 0x71, 0x32, 0xf8, 0xaa, 0x60, 0x23, 0xe9, 0xa5, 0x6f, 0x2c, 0xe6, 0xfd, 0x37, 0x74, 0xbe, 0xf2, 0x38, 0x7b, 0xb1, 0xe3, 0x29, 0x6a, 0xa0, 0xec, 0x26, 0x65, 0xaf, 0xc1, 0xb, 0x48, 0x82, 0xce, 0x4, 0x47, 0x8d, 0xdf, 0x15, 0x56, 0x9c, 0xd0, 0x1a, 0x59, 0x93, 0x85, 0x4f, 0xc, 0xc6, 0x8a, 0x40, 0x3, 0xc9, 0x9b, 0x51, 0x12, 0xd8, 0x94, 0x5e, 0x1d, 0xd7, 0xb9, 0x73, 0x30, 0xfa, 0xb6, 0x7c, 0x3f, 0xf5, 0xa7, 0x6d, 0x2e, 0xe4, 0xa8, 0x62, 0x21, 0xeb, 0xd, 0xc7, 0x84, 0x4e, 0x2, 0xc8, 0x8b, 0x41, 0x13, 0xd9, 0x9a, 0x50, 0x1c, 0xd6, 0x95, 0x5f, 0x31, 0xfb, 0xb8, 0x72, 0x3e, 0xf4, 0xb7, 0x7d, 0x2f, 0xe5, 0xa6, 0x6c, 0x20, 0xea, 0xa9, 0x63, 0x75, 0xbf, 0xfc, 0x36, 0x7a, 0xb0, 0xf3, 0x39, 0x6b, 0xa1, 0xe2, 0x28, 0x64, 0xae, 0xed, 0x27, 0x49, 0x83, 0xc0, 0xa, 0x46, 0x8c, 0xcf, 0x5, 0x57, 0x9d, 0xde, 0x14, 0x58, 0x92, 0xd1, 0x1b},
+ {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d, 0x2c, 0xe7, 0xa7, 0x6c, 0x27, 0xec, 0xac, 0x67, 0x3a, 0xf1, 0xb1, 0x7a, 0x31, 0xfa, 0xba, 0x71, 0x58, 0x93, 0xd3, 0x18, 0x53, 0x98, 0xd8, 0x13, 0x4e, 0x85, 0xc5, 0xe, 0x45, 0x8e, 0xce, 0x5, 0x74, 0xbf, 0xff, 0x34, 0x7f, 0xb4, 0xf4, 0x3f, 0x62, 0xa9, 0xe9, 0x22, 0x69, 0xa2, 0xe2, 0x29, 0xb0, 0x7b, 0x3b, 0xf0, 0xbb, 0x70, 0x30, 0xfb, 0xa6, 0x6d, 0x2d, 0xe6, 0xad, 0x66, 0x26, 0xed, 0x9c, 0x57, 0x17, 0xdc, 0x97, 0x5c, 0x1c, 0xd7, 0x8a, 0x41, 0x1, 0xca, 0x81, 0x4a, 0xa, 0xc1, 0xe8, 0x23, 0x63, 0xa8, 0xe3, 0x28, 0x68, 0xa3, 0xfe, 0x35, 0x75, 0xbe, 0xf5, 0x3e, 0x7e, 0xb5, 0xc4, 0xf, 0x4f, 0x84, 0xcf, 0x4, 0x44, 0x8f, 0xd2, 0x19, 0x59, 0x92, 0xd9, 0x12, 0x52, 0x99, 0x7d, 0xb6, 0xf6, 0x3d, 0x76, 0xbd, 0xfd, 0x36, 0x6b, 0xa0, 0xe0, 0x2b, 0x60, 0xab, 0xeb, 0x20, 0x51, 0x9a, 0xda, 0x11, 0x5a, 0x91, 0xd1, 0x1a, 0x47, 0x8c, 0xcc, 0x7, 0x4c, 0x87, 0xc7, 0xc, 0x25, 0xee, 0xae, 0x65, 0x2e, 0xe5, 0xa5, 0x6e, 0x33, 0xf8, 0xb8, 0x73, 0x38, 0xf3, 0xb3, 0x78, 0x9, 0xc2, 0x82, 0x49, 0x2, 0xc9, 0x89, 0x42, 0x1f, 0xd4, 0x94, 0x5f, 0x14, 0xdf, 0x9f, 0x54, 0xcd, 0x6, 0x46, 0x8d, 0xc6, 0xd, 0x4d, 0x86, 0xdb, 0x10, 0x50, 0x9b, 0xd0, 0x1b, 0x5b, 0x90, 0xe1, 0x2a, 0x6a, 0xa1, 0xea, 0x21, 0x61, 0xaa, 0xf7, 0x3c, 0x7c, 0xb7, 0xfc, 0x37, 0x77, 0xbc, 0x95, 0x5e, 0x1e, 0xd5, 0x9e, 0x55, 0x15, 0xde, 0x83, 0x48, 0x8, 0xc3, 0x88, 0x43, 0x3, 0xc8, 0xb9, 0x72, 0x32, 0xf9, 0xb2, 0x79, 0x39, 0xf2, 0xaf, 0x64, 0x24, 0xef, 0xa4, 0x6f, 0x2f, 0xe4},
+ {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70, 0x5c, 0x90, 0xd9, 0x15, 0x4b, 0x87, 0xce, 0x2, 0x72, 0xbe, 0xf7, 0x3b, 0x65, 0xa9, 0xe0, 0x2c, 0xb8, 0x74, 0x3d, 0xf1, 0xaf, 0x63, 0x2a, 0xe6, 0x96, 0x5a, 0x13, 0xdf, 0x81, 0x4d, 0x4, 0xc8, 0xe4, 0x28, 0x61, 0xad, 0xf3, 0x3f, 0x76, 0xba, 0xca, 0x6, 0x4f, 0x83, 0xdd, 0x11, 0x58, 0x94, 0x6d, 0xa1, 0xe8, 0x24, 0x7a, 0xb6, 0xff, 0x33, 0x43, 0x8f, 0xc6, 0xa, 0x54, 0x98, 0xd1, 0x1d, 0x31, 0xfd, 0xb4, 0x78, 0x26, 0xea, 0xa3, 0x6f, 0x1f, 0xd3, 0x9a, 0x56, 0x8, 0xc4, 0x8d, 0x41, 0xd5, 0x19, 0x50, 0x9c, 0xc2, 0xe, 0x47, 0x8b, 0xfb, 0x37, 0x7e, 0xb2, 0xec, 0x20, 0x69, 0xa5, 0x89, 0x45, 0xc, 0xc0, 0x9e, 0x52, 0x1b, 0xd7, 0xa7, 0x6b, 0x22, 0xee, 0xb0, 0x7c, 0x35, 0xf9, 0xda, 0x16, 0x5f, 0x93, 0xcd, 0x1, 0x48, 0x84, 0xf4, 0x38, 0x71, 0xbd, 0xe3, 0x2f, 0x66, 0xaa, 0x86, 0x4a, 0x3, 0xcf, 0x91, 0x5d, 0x14, 0xd8, 0xa8, 0x64, 0x2d, 0xe1, 0xbf, 0x73, 0x3a, 0xf6, 0x62, 0xae, 0xe7, 0x2b, 0x75, 0xb9, 0xf0, 0x3c, 0x4c, 0x80, 0xc9, 0x5, 0x5b, 0x97, 0xde, 0x12, 0x3e, 0xf2, 0xbb, 0x77, 0x29, 0xe5, 0xac, 0x60, 0x10, 0xdc, 0x95, 0x59, 0x7, 0xcb, 0x82, 0x4e, 0xb7, 0x7b, 0x32, 0xfe, 0xa0, 0x6c, 0x25, 0xe9, 0x99, 0x55, 0x1c, 0xd0, 0x8e, 0x42, 0xb, 0xc7, 0xeb, 0x27, 0x6e, 0xa2, 0xfc, 0x30, 0x79, 0xb5, 0xc5, 0x9, 0x40, 0x8c, 0xd2, 0x1e, 0x57, 0x9b, 0xf, 0xc3, 0x8a, 0x46, 0x18, 0xd4, 0x9d, 0x51, 0x21, 0xed, 0xa4, 0x68, 0x36, 0xfa, 0xb3, 0x7f, 0x53, 0x9f, 0xd6, 0x1a, 0x44, 0x88, 0xc1, 0xd, 0x7d, 0xb1, 0xf8, 0x34, 0x6a, 0xa6, 0xef, 0x23},
+ {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f, 0x4c, 0x81, 0xcb, 0x6, 0x5f, 0x92, 0xd8, 0x15, 0x6a, 0xa7, 0xed, 0x20, 0x79, 0xb4, 0xfe, 0x33, 0x98, 0x55, 0x1f, 0xd2, 0x8b, 0x46, 0xc, 0xc1, 0xbe, 0x73, 0x39, 0xf4, 0xad, 0x60, 0x2a, 0xe7, 0xd4, 0x19, 0x53, 0x9e, 0xc7, 0xa, 0x40, 0x8d, 0xf2, 0x3f, 0x75, 0xb8, 0xe1, 0x2c, 0x66, 0xab, 0x2d, 0xe0, 0xaa, 0x67, 0x3e, 0xf3, 0xb9, 0x74, 0xb, 0xc6, 0x8c, 0x41, 0x18, 0xd5, 0x9f, 0x52, 0x61, 0xac, 0xe6, 0x2b, 0x72, 0xbf, 0xf5, 0x38, 0x47, 0x8a, 0xc0, 0xd, 0x54, 0x99, 0xd3, 0x1e, 0xb5, 0x78, 0x32, 0xff, 0xa6, 0x6b, 0x21, 0xec, 0x93, 0x5e, 0x14, 0xd9, 0x80, 0x4d, 0x7, 0xca, 0xf9, 0x34, 0x7e, 0xb3, 0xea, 0x27, 0x6d, 0xa0, 0xdf, 0x12, 0x58, 0x95, 0xcc, 0x1, 0x4b, 0x86, 0x5a, 0x97, 0xdd, 0x10, 0x49, 0x84, 0xce, 0x3, 0x7c, 0xb1, 0xfb, 0x36, 0x6f, 0xa2, 0xe8, 0x25, 0x16, 0xdb, 0x91, 0x5c, 0x5, 0xc8, 0x82, 0x4f, 0x30, 0xfd, 0xb7, 0x7a, 0x23, 0xee, 0xa4, 0x69, 0xc2, 0xf, 0x45, 0x88, 0xd1, 0x1c, 0x56, 0x9b, 0xe4, 0x29, 0x63, 0xae, 0xf7, 0x3a, 0x70, 0xbd, 0x8e, 0x43, 0x9, 0xc4, 0x9d, 0x50, 0x1a, 0xd7, 0xa8, 0x65, 0x2f, 0xe2, 0xbb, 0x76, 0x3c, 0xf1, 0x77, 0xba, 0xf0, 0x3d, 0x64, 0xa9, 0xe3, 0x2e, 0x51, 0x9c, 0xd6, 0x1b, 0x42, 0x8f, 0xc5, 0x8, 0x3b, 0xf6, 0xbc, 0x71, 0x28, 0xe5, 0xaf, 0x62, 0x1d, 0xd0, 0x9a, 0x57, 0xe, 0xc3, 0x89, 0x44, 0xef, 0x22, 0x68, 0xa5, 0xfc, 0x31, 0x7b, 0xb6, 0xc9, 0x4, 0x4e, 0x83, 0xda, 0x17, 0x5d, 0x90, 0xa3, 0x6e, 0x24, 0xe9, 0xb0, 0x7d, 0x37, 0xfa, 0x85, 0x48, 0x2, 0xcf, 0x96, 0x5b, 0x11, 0xdc},
+ {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e, 0x7c, 0xb2, 0xfd, 0x33, 0x63, 0xad, 0xe2, 0x2c, 0x42, 0x8c, 0xc3, 0xd, 0x5d, 0x93, 0xdc, 0x12, 0xf8, 0x36, 0x79, 0xb7, 0xe7, 0x29, 0x66, 0xa8, 0xc6, 0x8, 0x47, 0x89, 0xd9, 0x17, 0x58, 0x96, 0x84, 0x4a, 0x5, 0xcb, 0x9b, 0x55, 0x1a, 0xd4, 0xba, 0x74, 0x3b, 0xf5, 0xa5, 0x6b, 0x24, 0xea, 0xed, 0x23, 0x6c, 0xa2, 0xf2, 0x3c, 0x73, 0xbd, 0xd3, 0x1d, 0x52, 0x9c, 0xcc, 0x2, 0x4d, 0x83, 0x91, 0x5f, 0x10, 0xde, 0x8e, 0x40, 0xf, 0xc1, 0xaf, 0x61, 0x2e, 0xe0, 0xb0, 0x7e, 0x31, 0xff, 0x15, 0xdb, 0x94, 0x5a, 0xa, 0xc4, 0x8b, 0x45, 0x2b, 0xe5, 0xaa, 0x64, 0x34, 0xfa, 0xb5, 0x7b, 0x69, 0xa7, 0xe8, 0x26, 0x76, 0xb8, 0xf7, 0x39, 0x57, 0x99, 0xd6, 0x18, 0x48, 0x86, 0xc9, 0x7, 0xc7, 0x9, 0x46, 0x88, 0xd8, 0x16, 0x59, 0x97, 0xf9, 0x37, 0x78, 0xb6, 0xe6, 0x28, 0x67, 0xa9, 0xbb, 0x75, 0x3a, 0xf4, 0xa4, 0x6a, 0x25, 0xeb, 0x85, 0x4b, 0x4, 0xca, 0x9a, 0x54, 0x1b, 0xd5, 0x3f, 0xf1, 0xbe, 0x70, 0x20, 0xee, 0xa1, 0x6f, 0x1, 0xcf, 0x80, 0x4e, 0x1e, 0xd0, 0x9f, 0x51, 0x43, 0x8d, 0xc2, 0xc, 0x5c, 0x92, 0xdd, 0x13, 0x7d, 0xb3, 0xfc, 0x32, 0x62, 0xac, 0xe3, 0x2d, 0x2a, 0xe4, 0xab, 0x65, 0x35, 0xfb, 0xb4, 0x7a, 0x14, 0xda, 0x95, 0x5b, 0xb, 0xc5, 0x8a, 0x44, 0x56, 0x98, 0xd7, 0x19, 0x49, 0x87, 0xc8, 0x6, 0x68, 0xa6, 0xe9, 0x27, 0x77, 0xb9, 0xf6, 0x38, 0xd2, 0x1c, 0x53, 0x9d, 0xcd, 0x3, 0x4c, 0x82, 0xec, 0x22, 0x6d, 0xa3, 0xf3, 0x3d, 0x72, 0xbc, 0xae, 0x60, 0x2f, 0xe1, 0xb1, 0x7f, 0x30, 0xfe, 0x90, 0x5e, 0x11, 0xdf, 0x8f, 0x41, 0xe, 0xc0},
+ {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61, 0x6c, 0xa3, 0xef, 0x20, 0x77, 0xb8, 0xf4, 0x3b, 0x5a, 0x95, 0xd9, 0x16, 0x41, 0x8e, 0xc2, 0xd, 0xd8, 0x17, 0x5b, 0x94, 0xc3, 0xc, 0x40, 0x8f, 0xee, 0x21, 0x6d, 0xa2, 0xf5, 0x3a, 0x76, 0xb9, 0xb4, 0x7b, 0x37, 0xf8, 0xaf, 0x60, 0x2c, 0xe3, 0x82, 0x4d, 0x1, 0xce, 0x99, 0x56, 0x1a, 0xd5, 0xad, 0x62, 0x2e, 0xe1, 0xb6, 0x79, 0x35, 0xfa, 0x9b, 0x54, 0x18, 0xd7, 0x80, 0x4f, 0x3, 0xcc, 0xc1, 0xe, 0x42, 0x8d, 0xda, 0x15, 0x59, 0x96, 0xf7, 0x38, 0x74, 0xbb, 0xec, 0x23, 0x6f, 0xa0, 0x75, 0xba, 0xf6, 0x39, 0x6e, 0xa1, 0xed, 0x22, 0x43, 0x8c, 0xc0, 0xf, 0x58, 0x97, 0xdb, 0x14, 0x19, 0xd6, 0x9a, 0x55, 0x2, 0xcd, 0x81, 0x4e, 0x2f, 0xe0, 0xac, 0x63, 0x34, 0xfb, 0xb7, 0x78, 0x47, 0x88, 0xc4, 0xb, 0x5c, 0x93, 0xdf, 0x10, 0x71, 0xbe, 0xf2, 0x3d, 0x6a, 0xa5, 0xe9, 0x26, 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c, 0x1d, 0xd2, 0x9e, 0x51, 0x6, 0xc9, 0x85, 0x4a, 0x9f, 0x50, 0x1c, 0xd3, 0x84, 0x4b, 0x7, 0xc8, 0xa9, 0x66, 0x2a, 0xe5, 0xb2, 0x7d, 0x31, 0xfe, 0xf3, 0x3c, 0x70, 0xbf, 0xe8, 0x27, 0x6b, 0xa4, 0xc5, 0xa, 0x46, 0x89, 0xde, 0x11, 0x5d, 0x92, 0xea, 0x25, 0x69, 0xa6, 0xf1, 0x3e, 0x72, 0xbd, 0xdc, 0x13, 0x5f, 0x90, 0xc7, 0x8, 0x44, 0x8b, 0x86, 0x49, 0x5, 0xca, 0x9d, 0x52, 0x1e, 0xd1, 0xb0, 0x7f, 0x33, 0xfc, 0xab, 0x64, 0x28, 0xe7, 0x32, 0xfd, 0xb1, 0x7e, 0x29, 0xe6, 0xaa, 0x65, 0x4, 0xcb, 0x87, 0x48, 0x1f, 0xd0, 0x9c, 0x53, 0x5e, 0x91, 0xdd, 0x12, 0x45, 0x8a, 0xc6, 0x9, 0x68, 0xa7, 0xeb, 0x24, 0x73, 0xbc, 0xf0, 0x3f},
+ {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4, 0x81, 0x51, 0x3c, 0xec, 0xe6, 0x36, 0x5b, 0x8b, 0x4f, 0x9f, 0xf2, 0x22, 0x28, 0xf8, 0x95, 0x45, 0x1f, 0xcf, 0xa2, 0x72, 0x78, 0xa8, 0xc5, 0x15, 0xd1, 0x1, 0x6c, 0xbc, 0xb6, 0x66, 0xb, 0xdb, 0x9e, 0x4e, 0x23, 0xf3, 0xf9, 0x29, 0x44, 0x94, 0x50, 0x80, 0xed, 0x3d, 0x37, 0xe7, 0x8a, 0x5a, 0x3e, 0xee, 0x83, 0x53, 0x59, 0x89, 0xe4, 0x34, 0xf0, 0x20, 0x4d, 0x9d, 0x97, 0x47, 0x2a, 0xfa, 0xbf, 0x6f, 0x2, 0xd2, 0xd8, 0x8, 0x65, 0xb5, 0x71, 0xa1, 0xcc, 0x1c, 0x16, 0xc6, 0xab, 0x7b, 0x21, 0xf1, 0x9c, 0x4c, 0x46, 0x96, 0xfb, 0x2b, 0xef, 0x3f, 0x52, 0x82, 0x88, 0x58, 0x35, 0xe5, 0xa0, 0x70, 0x1d, 0xcd, 0xc7, 0x17, 0x7a, 0xaa, 0x6e, 0xbe, 0xd3, 0x3, 0x9, 0xd9, 0xb4, 0x64, 0x7c, 0xac, 0xc1, 0x11, 0x1b, 0xcb, 0xa6, 0x76, 0xb2, 0x62, 0xf, 0xdf, 0xd5, 0x5, 0x68, 0xb8, 0xfd, 0x2d, 0x40, 0x90, 0x9a, 0x4a, 0x27, 0xf7, 0x33, 0xe3, 0x8e, 0x5e, 0x54, 0x84, 0xe9, 0x39, 0x63, 0xb3, 0xde, 0xe, 0x4, 0xd4, 0xb9, 0x69, 0xad, 0x7d, 0x10, 0xc0, 0xca, 0x1a, 0x77, 0xa7, 0xe2, 0x32, 0x5f, 0x8f, 0x85, 0x55, 0x38, 0xe8, 0x2c, 0xfc, 0x91, 0x41, 0x4b, 0x9b, 0xf6, 0x26, 0x42, 0x92, 0xff, 0x2f, 0x25, 0xf5, 0x98, 0x48, 0x8c, 0x5c, 0x31, 0xe1, 0xeb, 0x3b, 0x56, 0x86, 0xc3, 0x13, 0x7e, 0xae, 0xa4, 0x74, 0x19, 0xc9, 0xd, 0xdd, 0xb0, 0x60, 0x6a, 0xba, 0xd7, 0x7, 0x5d, 0x8d, 0xe0, 0x30, 0x3a, 0xea, 0x87, 0x57, 0x93, 0x43, 0x2e, 0xfe, 0xf4, 0x24, 0x49, 0x99, 0xdc, 0xc, 0x61, 0xb1, 0xbb, 0x6b, 0x6, 0xd6, 0x12, 0xc2, 0xaf, 0x7f, 0x75, 0xa5, 0xc8, 0x18},
+ {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb, 0x91, 0x40, 0x2e, 0xff, 0xf2, 0x23, 0x4d, 0x9c, 0x57, 0x86, 0xe8, 0x39, 0x34, 0xe5, 0x8b, 0x5a, 0x3f, 0xee, 0x80, 0x51, 0x5c, 0x8d, 0xe3, 0x32, 0xf9, 0x28, 0x46, 0x97, 0x9a, 0x4b, 0x25, 0xf4, 0xae, 0x7f, 0x11, 0xc0, 0xcd, 0x1c, 0x72, 0xa3, 0x68, 0xb9, 0xd7, 0x6, 0xb, 0xda, 0xb4, 0x65, 0x7e, 0xaf, 0xc1, 0x10, 0x1d, 0xcc, 0xa2, 0x73, 0xb8, 0x69, 0x7, 0xd6, 0xdb, 0xa, 0x64, 0xb5, 0xef, 0x3e, 0x50, 0x81, 0x8c, 0x5d, 0x33, 0xe2, 0x29, 0xf8, 0x96, 0x47, 0x4a, 0x9b, 0xf5, 0x24, 0x41, 0x90, 0xfe, 0x2f, 0x22, 0xf3, 0x9d, 0x4c, 0x87, 0x56, 0x38, 0xe9, 0xe4, 0x35, 0x5b, 0x8a, 0xd0, 0x1, 0x6f, 0xbe, 0xb3, 0x62, 0xc, 0xdd, 0x16, 0xc7, 0xa9, 0x78, 0x75, 0xa4, 0xca, 0x1b, 0xfc, 0x2d, 0x43, 0x92, 0x9f, 0x4e, 0x20, 0xf1, 0x3a, 0xeb, 0x85, 0x54, 0x59, 0x88, 0xe6, 0x37, 0x6d, 0xbc, 0xd2, 0x3, 0xe, 0xdf, 0xb1, 0x60, 0xab, 0x7a, 0x14, 0xc5, 0xc8, 0x19, 0x77, 0xa6, 0xc3, 0x12, 0x7c, 0xad, 0xa0, 0x71, 0x1f, 0xce, 0x5, 0xd4, 0xba, 0x6b, 0x66, 0xb7, 0xd9, 0x8, 0x52, 0x83, 0xed, 0x3c, 0x31, 0xe0, 0x8e, 0x5f, 0x94, 0x45, 0x2b, 0xfa, 0xf7, 0x26, 0x48, 0x99, 0x82, 0x53, 0x3d, 0xec, 0xe1, 0x30, 0x5e, 0x8f, 0x44, 0x95, 0xfb, 0x2a, 0x27, 0xf6, 0x98, 0x49, 0x13, 0xc2, 0xac, 0x7d, 0x70, 0xa1, 0xcf, 0x1e, 0xd5, 0x4, 0x6a, 0xbb, 0xb6, 0x67, 0x9, 0xd8, 0xbd, 0x6c, 0x2, 0xd3, 0xde, 0xf, 0x61, 0xb0, 0x7b, 0xaa, 0xc4, 0x15, 0x18, 0xc9, 0xa7, 0x76, 0x2c, 0xfd, 0x93, 0x42, 0x4f, 0x9e, 0xf0, 0x21, 0xea, 0x3b, 0x55, 0x84, 0x89, 0x58, 0x36, 0xe7},
+ {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda, 0xa1, 0x73, 0x18, 0xca, 0xce, 0x1c, 0x77, 0xa5, 0x7f, 0xad, 0xc6, 0x14, 0x10, 0xc2, 0xa9, 0x7b, 0x5f, 0x8d, 0xe6, 0x34, 0x30, 0xe2, 0x89, 0x5b, 0x81, 0x53, 0x38, 0xea, 0xee, 0x3c, 0x57, 0x85, 0xfe, 0x2c, 0x47, 0x95, 0x91, 0x43, 0x28, 0xfa, 0x20, 0xf2, 0x99, 0x4b, 0x4f, 0x9d, 0xf6, 0x24, 0xbe, 0x6c, 0x7, 0xd5, 0xd1, 0x3, 0x68, 0xba, 0x60, 0xb2, 0xd9, 0xb, 0xf, 0xdd, 0xb6, 0x64, 0x1f, 0xcd, 0xa6, 0x74, 0x70, 0xa2, 0xc9, 0x1b, 0xc1, 0x13, 0x78, 0xaa, 0xae, 0x7c, 0x17, 0xc5, 0xe1, 0x33, 0x58, 0x8a, 0x8e, 0x5c, 0x37, 0xe5, 0x3f, 0xed, 0x86, 0x54, 0x50, 0x82, 0xe9, 0x3b, 0x40, 0x92, 0xf9, 0x2b, 0x2f, 0xfd, 0x96, 0x44, 0x9e, 0x4c, 0x27, 0xf5, 0xf1, 0x23, 0x48, 0x9a, 0x61, 0xb3, 0xd8, 0xa, 0xe, 0xdc, 0xb7, 0x65, 0xbf, 0x6d, 0x6, 0xd4, 0xd0, 0x2, 0x69, 0xbb, 0xc0, 0x12, 0x79, 0xab, 0xaf, 0x7d, 0x16, 0xc4, 0x1e, 0xcc, 0xa7, 0x75, 0x71, 0xa3, 0xc8, 0x1a, 0x3e, 0xec, 0x87, 0x55, 0x51, 0x83, 0xe8, 0x3a, 0xe0, 0x32, 0x59, 0x8b, 0x8f, 0x5d, 0x36, 0xe4, 0x9f, 0x4d, 0x26, 0xf4, 0xf0, 0x22, 0x49, 0x9b, 0x41, 0x93, 0xf8, 0x2a, 0x2e, 0xfc, 0x97, 0x45, 0xdf, 0xd, 0x66, 0xb4, 0xb0, 0x62, 0x9, 0xdb, 0x1, 0xd3, 0xb8, 0x6a, 0x6e, 0xbc, 0xd7, 0x5, 0x7e, 0xac, 0xc7, 0x15, 0x11, 0xc3, 0xa8, 0x7a, 0xa0, 0x72, 0x19, 0xcb, 0xcf, 0x1d, 0x76, 0xa4, 0x80, 0x52, 0x39, 0xeb, 0xef, 0x3d, 0x56, 0x84, 0x5e, 0x8c, 0xe7, 0x35, 0x31, 0xe3, 0x88, 0x5a, 0x21, 0xf3, 0x98, 0x4a, 0x4e, 0x9c, 0xf7, 0x25, 0xff, 0x2d, 0x46, 0x94, 0x90, 0x42, 0x29, 0xfb},
+ {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5, 0xb1, 0x62, 0xa, 0xd9, 0xda, 0x9, 0x61, 0xb2, 0x67, 0xb4, 0xdc, 0xf, 0xc, 0xdf, 0xb7, 0x64, 0x7f, 0xac, 0xc4, 0x17, 0x14, 0xc7, 0xaf, 0x7c, 0xa9, 0x7a, 0x12, 0xc1, 0xc2, 0x11, 0x79, 0xaa, 0xce, 0x1d, 0x75, 0xa6, 0xa5, 0x76, 0x1e, 0xcd, 0x18, 0xcb, 0xa3, 0x70, 0x73, 0xa0, 0xc8, 0x1b, 0xfe, 0x2d, 0x45, 0x96, 0x95, 0x46, 0x2e, 0xfd, 0x28, 0xfb, 0x93, 0x40, 0x43, 0x90, 0xf8, 0x2b, 0x4f, 0x9c, 0xf4, 0x27, 0x24, 0xf7, 0x9f, 0x4c, 0x99, 0x4a, 0x22, 0xf1, 0xf2, 0x21, 0x49, 0x9a, 0x81, 0x52, 0x3a, 0xe9, 0xea, 0x39, 0x51, 0x82, 0x57, 0x84, 0xec, 0x3f, 0x3c, 0xef, 0x87, 0x54, 0x30, 0xe3, 0x8b, 0x58, 0x5b, 0x88, 0xe0, 0x33, 0xe6, 0x35, 0x5d, 0x8e, 0x8d, 0x5e, 0x36, 0xe5, 0xe1, 0x32, 0x5a, 0x89, 0x8a, 0x59, 0x31, 0xe2, 0x37, 0xe4, 0x8c, 0x5f, 0x5c, 0x8f, 0xe7, 0x34, 0x50, 0x83, 0xeb, 0x38, 0x3b, 0xe8, 0x80, 0x53, 0x86, 0x55, 0x3d, 0xee, 0xed, 0x3e, 0x56, 0x85, 0x9e, 0x4d, 0x25, 0xf6, 0xf5, 0x26, 0x4e, 0x9d, 0x48, 0x9b, 0xf3, 0x20, 0x23, 0xf0, 0x98, 0x4b, 0x2f, 0xfc, 0x94, 0x47, 0x44, 0x97, 0xff, 0x2c, 0xf9, 0x2a, 0x42, 0x91, 0x92, 0x41, 0x29, 0xfa, 0x1f, 0xcc, 0xa4, 0x77, 0x74, 0xa7, 0xcf, 0x1c, 0xc9, 0x1a, 0x72, 0xa1, 0xa2, 0x71, 0x19, 0xca, 0xae, 0x7d, 0x15, 0xc6, 0xc5, 0x16, 0x7e, 0xad, 0x78, 0xab, 0xc3, 0x10, 0x13, 0xc0, 0xa8, 0x7b, 0x60, 0xb3, 0xdb, 0x8, 0xb, 0xd8, 0xb0, 0x63, 0xb6, 0x65, 0xd, 0xde, 0xdd, 0xe, 0x66, 0xb5, 0xd1, 0x2, 0x6a, 0xb9, 0xba, 0x69, 0x1, 0xd2, 0x7, 0xd4, 0xbc, 0x6f, 0x6c, 0xbf, 0xd7, 0x4},
+ {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8, 0xc1, 0x15, 0x74, 0xa0, 0xb6, 0x62, 0x3, 0xd7, 0x2f, 0xfb, 0x9a, 0x4e, 0x58, 0x8c, 0xed, 0x39, 0x9f, 0x4b, 0x2a, 0xfe, 0xe8, 0x3c, 0x5d, 0x89, 0x71, 0xa5, 0xc4, 0x10, 0x6, 0xd2, 0xb3, 0x67, 0x5e, 0x8a, 0xeb, 0x3f, 0x29, 0xfd, 0x9c, 0x48, 0xb0, 0x64, 0x5, 0xd1, 0xc7, 0x13, 0x72, 0xa6, 0x23, 0xf7, 0x96, 0x42, 0x54, 0x80, 0xe1, 0x35, 0xcd, 0x19, 0x78, 0xac, 0xba, 0x6e, 0xf, 0xdb, 0xe2, 0x36, 0x57, 0x83, 0x95, 0x41, 0x20, 0xf4, 0xc, 0xd8, 0xb9, 0x6d, 0x7b, 0xaf, 0xce, 0x1a, 0xbc, 0x68, 0x9, 0xdd, 0xcb, 0x1f, 0x7e, 0xaa, 0x52, 0x86, 0xe7, 0x33, 0x25, 0xf1, 0x90, 0x44, 0x7d, 0xa9, 0xc8, 0x1c, 0xa, 0xde, 0xbf, 0x6b, 0x93, 0x47, 0x26, 0xf2, 0xe4, 0x30, 0x51, 0x85, 0x46, 0x92, 0xf3, 0x27, 0x31, 0xe5, 0x84, 0x50, 0xa8, 0x7c, 0x1d, 0xc9, 0xdf, 0xb, 0x6a, 0xbe, 0x87, 0x53, 0x32, 0xe6, 0xf0, 0x24, 0x45, 0x91, 0x69, 0xbd, 0xdc, 0x8, 0x1e, 0xca, 0xab, 0x7f, 0xd9, 0xd, 0x6c, 0xb8, 0xae, 0x7a, 0x1b, 0xcf, 0x37, 0xe3, 0x82, 0x56, 0x40, 0x94, 0xf5, 0x21, 0x18, 0xcc, 0xad, 0x79, 0x6f, 0xbb, 0xda, 0xe, 0xf6, 0x22, 0x43, 0x97, 0x81, 0x55, 0x34, 0xe0, 0x65, 0xb1, 0xd0, 0x4, 0x12, 0xc6, 0xa7, 0x73, 0x8b, 0x5f, 0x3e, 0xea, 0xfc, 0x28, 0x49, 0x9d, 0xa4, 0x70, 0x11, 0xc5, 0xd3, 0x7, 0x66, 0xb2, 0x4a, 0x9e, 0xff, 0x2b, 0x3d, 0xe9, 0x88, 0x5c, 0xfa, 0x2e, 0x4f, 0x9b, 0x8d, 0x59, 0x38, 0xec, 0x14, 0xc0, 0xa1, 0x75, 0x63, 0xb7, 0xd6, 0x2, 0x3b, 0xef, 0x8e, 0x5a, 0x4c, 0x98, 0xf9, 0x2d, 0xd5, 0x1, 0x60, 0xb4, 0xa2, 0x76, 0x17, 0xc3},
+ {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7, 0xd1, 0x4, 0x66, 0xb3, 0xa2, 0x77, 0x15, 0xc0, 0x37, 0xe2, 0x80, 0x55, 0x44, 0x91, 0xf3, 0x26, 0xbf, 0x6a, 0x8, 0xdd, 0xcc, 0x19, 0x7b, 0xae, 0x59, 0x8c, 0xee, 0x3b, 0x2a, 0xff, 0x9d, 0x48, 0x6e, 0xbb, 0xd9, 0xc, 0x1d, 0xc8, 0xaa, 0x7f, 0x88, 0x5d, 0x3f, 0xea, 0xfb, 0x2e, 0x4c, 0x99, 0x63, 0xb6, 0xd4, 0x1, 0x10, 0xc5, 0xa7, 0x72, 0x85, 0x50, 0x32, 0xe7, 0xf6, 0x23, 0x41, 0x94, 0xb2, 0x67, 0x5, 0xd0, 0xc1, 0x14, 0x76, 0xa3, 0x54, 0x81, 0xe3, 0x36, 0x27, 0xf2, 0x90, 0x45, 0xdc, 0x9, 0x6b, 0xbe, 0xaf, 0x7a, 0x18, 0xcd, 0x3a, 0xef, 0x8d, 0x58, 0x49, 0x9c, 0xfe, 0x2b, 0xd, 0xd8, 0xba, 0x6f, 0x7e, 0xab, 0xc9, 0x1c, 0xeb, 0x3e, 0x5c, 0x89, 0x98, 0x4d, 0x2f, 0xfa, 0xc6, 0x13, 0x71, 0xa4, 0xb5, 0x60, 0x2, 0xd7, 0x20, 0xf5, 0x97, 0x42, 0x53, 0x86, 0xe4, 0x31, 0x17, 0xc2, 0xa0, 0x75, 0x64, 0xb1, 0xd3, 0x6, 0xf1, 0x24, 0x46, 0x93, 0x82, 0x57, 0x35, 0xe0, 0x79, 0xac, 0xce, 0x1b, 0xa, 0xdf, 0xbd, 0x68, 0x9f, 0x4a, 0x28, 0xfd, 0xec, 0x39, 0x5b, 0x8e, 0xa8, 0x7d, 0x1f, 0xca, 0xdb, 0xe, 0x6c, 0xb9, 0x4e, 0x9b, 0xf9, 0x2c, 0x3d, 0xe8, 0x8a, 0x5f, 0xa5, 0x70, 0x12, 0xc7, 0xd6, 0x3, 0x61, 0xb4, 0x43, 0x96, 0xf4, 0x21, 0x30, 0xe5, 0x87, 0x52, 0x74, 0xa1, 0xc3, 0x16, 0x7, 0xd2, 0xb0, 0x65, 0x92, 0x47, 0x25, 0xf0, 0xe1, 0x34, 0x56, 0x83, 0x1a, 0xcf, 0xad, 0x78, 0x69, 0xbc, 0xde, 0xb, 0xfc, 0x29, 0x4b, 0x9e, 0x8f, 0x5a, 0x38, 0xed, 0xcb, 0x1e, 0x7c, 0xa9, 0xb8, 0x6d, 0xf, 0xda, 0x2d, 0xf8, 0x9a, 0x4f, 0x5e, 0x8b, 0xe9, 0x3c},
+ {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6, 0xe1, 0x37, 0x50, 0x86, 0x9e, 0x48, 0x2f, 0xf9, 0x1f, 0xc9, 0xae, 0x78, 0x60, 0xb6, 0xd1, 0x7, 0xdf, 0x9, 0x6e, 0xb8, 0xa0, 0x76, 0x11, 0xc7, 0x21, 0xf7, 0x90, 0x46, 0x5e, 0x88, 0xef, 0x39, 0x3e, 0xe8, 0x8f, 0x59, 0x41, 0x97, 0xf0, 0x26, 0xc0, 0x16, 0x71, 0xa7, 0xbf, 0x69, 0xe, 0xd8, 0xa3, 0x75, 0x12, 0xc4, 0xdc, 0xa, 0x6d, 0xbb, 0x5d, 0x8b, 0xec, 0x3a, 0x22, 0xf4, 0x93, 0x45, 0x42, 0x94, 0xf3, 0x25, 0x3d, 0xeb, 0x8c, 0x5a, 0xbc, 0x6a, 0xd, 0xdb, 0xc3, 0x15, 0x72, 0xa4, 0x7c, 0xaa, 0xcd, 0x1b, 0x3, 0xd5, 0xb2, 0x64, 0x82, 0x54, 0x33, 0xe5, 0xfd, 0x2b, 0x4c, 0x9a, 0x9d, 0x4b, 0x2c, 0xfa, 0xe2, 0x34, 0x53, 0x85, 0x63, 0xb5, 0xd2, 0x4, 0x1c, 0xca, 0xad, 0x7b, 0x5b, 0x8d, 0xea, 0x3c, 0x24, 0xf2, 0x95, 0x43, 0xa5, 0x73, 0x14, 0xc2, 0xda, 0xc, 0x6b, 0xbd, 0xba, 0x6c, 0xb, 0xdd, 0xc5, 0x13, 0x74, 0xa2, 0x44, 0x92, 0xf5, 0x23, 0x3b, 0xed, 0x8a, 0x5c, 0x84, 0x52, 0x35, 0xe3, 0xfb, 0x2d, 0x4a, 0x9c, 0x7a, 0xac, 0xcb, 0x1d, 0x5, 0xd3, 0xb4, 0x62, 0x65, 0xb3, 0xd4, 0x2, 0x1a, 0xcc, 0xab, 0x7d, 0x9b, 0x4d, 0x2a, 0xfc, 0xe4, 0x32, 0x55, 0x83, 0xf8, 0x2e, 0x49, 0x9f, 0x87, 0x51, 0x36, 0xe0, 0x6, 0xd0, 0xb7, 0x61, 0x79, 0xaf, 0xc8, 0x1e, 0x19, 0xcf, 0xa8, 0x7e, 0x66, 0xb0, 0xd7, 0x1, 0xe7, 0x31, 0x56, 0x80, 0x98, 0x4e, 0x29, 0xff, 0x27, 0xf1, 0x96, 0x40, 0x58, 0x8e, 0xe9, 0x3f, 0xd9, 0xf, 0x68, 0xbe, 0xa6, 0x70, 0x17, 0xc1, 0xc6, 0x10, 0x77, 0xa1, 0xb9, 0x6f, 0x8, 0xde, 0x38, 0xee, 0x89, 0x5f, 0x47, 0x91, 0xf6, 0x20},
+ {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9, 0xf1, 0x26, 0x42, 0x95, 0x8a, 0x5d, 0x39, 0xee, 0x7, 0xd0, 0xb4, 0x63, 0x7c, 0xab, 0xcf, 0x18, 0xff, 0x28, 0x4c, 0x9b, 0x84, 0x53, 0x37, 0xe0, 0x9, 0xde, 0xba, 0x6d, 0x72, 0xa5, 0xc1, 0x16, 0xe, 0xd9, 0xbd, 0x6a, 0x75, 0xa2, 0xc6, 0x11, 0xf8, 0x2f, 0x4b, 0x9c, 0x83, 0x54, 0x30, 0xe7, 0xe3, 0x34, 0x50, 0x87, 0x98, 0x4f, 0x2b, 0xfc, 0x15, 0xc2, 0xa6, 0x71, 0x6e, 0xb9, 0xdd, 0xa, 0x12, 0xc5, 0xa1, 0x76, 0x69, 0xbe, 0xda, 0xd, 0xe4, 0x33, 0x57, 0x80, 0x9f, 0x48, 0x2c, 0xfb, 0x1c, 0xcb, 0xaf, 0x78, 0x67, 0xb0, 0xd4, 0x3, 0xea, 0x3d, 0x59, 0x8e, 0x91, 0x46, 0x22, 0xf5, 0xed, 0x3a, 0x5e, 0x89, 0x96, 0x41, 0x25, 0xf2, 0x1b, 0xcc, 0xa8, 0x7f, 0x60, 0xb7, 0xd3, 0x4, 0xdb, 0xc, 0x68, 0xbf, 0xa0, 0x77, 0x13, 0xc4, 0x2d, 0xfa, 0x9e, 0x49, 0x56, 0x81, 0xe5, 0x32, 0x2a, 0xfd, 0x99, 0x4e, 0x51, 0x86, 0xe2, 0x35, 0xdc, 0xb, 0x6f, 0xb8, 0xa7, 0x70, 0x14, 0xc3, 0x24, 0xf3, 0x97, 0x40, 0x5f, 0x88, 0xec, 0x3b, 0xd2, 0x5, 0x61, 0xb6, 0xa9, 0x7e, 0x1a, 0xcd, 0xd5, 0x2, 0x66, 0xb1, 0xae, 0x79, 0x1d, 0xca, 0x23, 0xf4, 0x90, 0x47, 0x58, 0x8f, 0xeb, 0x3c, 0x38, 0xef, 0x8b, 0x5c, 0x43, 0x94, 0xf0, 0x27, 0xce, 0x19, 0x7d, 0xaa, 0xb5, 0x62, 0x6, 0xd1, 0xc9, 0x1e, 0x7a, 0xad, 0xb2, 0x65, 0x1, 0xd6, 0x3f, 0xe8, 0x8c, 0x5b, 0x44, 0x93, 0xf7, 0x20, 0xc7, 0x10, 0x74, 0xa3, 0xbc, 0x6b, 0xf, 0xd8, 0x31, 0xe6, 0x82, 0x55, 0x4a, 0x9d, 0xf9, 0x2e, 0x36, 0xe1, 0x85, 0x52, 0x4d, 0x9a, 0xfe, 0x29, 0xc0, 0x17, 0x73, 0xa4, 0xbb, 0x6c, 0x8, 0xdf},
+ {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc, 0x1, 0xd9, 0xac, 0x74, 0x46, 0x9e, 0xeb, 0x33, 0x8f, 0x57, 0x22, 0xfa, 0xc8, 0x10, 0x65, 0xbd, 0x2, 0xda, 0xaf, 0x77, 0x45, 0x9d, 0xe8, 0x30, 0x8c, 0x54, 0x21, 0xf9, 0xcb, 0x13, 0x66, 0xbe, 0x3, 0xdb, 0xae, 0x76, 0x44, 0x9c, 0xe9, 0x31, 0x8d, 0x55, 0x20, 0xf8, 0xca, 0x12, 0x67, 0xbf, 0x4, 0xdc, 0xa9, 0x71, 0x43, 0x9b, 0xee, 0x36, 0x8a, 0x52, 0x27, 0xff, 0xcd, 0x15, 0x60, 0xb8, 0x5, 0xdd, 0xa8, 0x70, 0x42, 0x9a, 0xef, 0x37, 0x8b, 0x53, 0x26, 0xfe, 0xcc, 0x14, 0x61, 0xb9, 0x6, 0xde, 0xab, 0x73, 0x41, 0x99, 0xec, 0x34, 0x88, 0x50, 0x25, 0xfd, 0xcf, 0x17, 0x62, 0xba, 0x7, 0xdf, 0xaa, 0x72, 0x40, 0x98, 0xed, 0x35, 0x89, 0x51, 0x24, 0xfc, 0xce, 0x16, 0x63, 0xbb, 0x8, 0xd0, 0xa5, 0x7d, 0x4f, 0x97, 0xe2, 0x3a, 0x86, 0x5e, 0x2b, 0xf3, 0xc1, 0x19, 0x6c, 0xb4, 0x9, 0xd1, 0xa4, 0x7c, 0x4e, 0x96, 0xe3, 0x3b, 0x87, 0x5f, 0x2a, 0xf2, 0xc0, 0x18, 0x6d, 0xb5, 0xa, 0xd2, 0xa7, 0x7f, 0x4d, 0x95, 0xe0, 0x38, 0x84, 0x5c, 0x29, 0xf1, 0xc3, 0x1b, 0x6e, 0xb6, 0xb, 0xd3, 0xa6, 0x7e, 0x4c, 0x94, 0xe1, 0x39, 0x85, 0x5d, 0x28, 0xf0, 0xc2, 0x1a, 0x6f, 0xb7, 0xc, 0xd4, 0xa1, 0x79, 0x4b, 0x93, 0xe6, 0x3e, 0x82, 0x5a, 0x2f, 0xf7, 0xc5, 0x1d, 0x68, 0xb0, 0xd, 0xd5, 0xa0, 0x78, 0x4a, 0x92, 0xe7, 0x3f, 0x83, 0x5b, 0x2e, 0xf6, 0xc4, 0x1c, 0x69, 0xb1, 0xe, 0xd6, 0xa3, 0x7b, 0x49, 0x91, 0xe4, 0x3c, 0x80, 0x58, 0x2d, 0xf5, 0xc7, 0x1f, 0x6a, 0xb2, 0xf, 0xd7, 0xa2, 0x7a, 0x48, 0x90, 0xe5, 0x3d, 0x81, 0x59, 0x2c, 0xf4, 0xc6, 0x1e, 0x6b, 0xb3},
+ {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3, 0x11, 0xc8, 0xbe, 0x67, 0x52, 0x8b, 0xfd, 0x24, 0x97, 0x4e, 0x38, 0xe1, 0xd4, 0xd, 0x7b, 0xa2, 0x22, 0xfb, 0x8d, 0x54, 0x61, 0xb8, 0xce, 0x17, 0xa4, 0x7d, 0xb, 0xd2, 0xe7, 0x3e, 0x48, 0x91, 0x33, 0xea, 0x9c, 0x45, 0x70, 0xa9, 0xdf, 0x6, 0xb5, 0x6c, 0x1a, 0xc3, 0xf6, 0x2f, 0x59, 0x80, 0x44, 0x9d, 0xeb, 0x32, 0x7, 0xde, 0xa8, 0x71, 0xc2, 0x1b, 0x6d, 0xb4, 0x81, 0x58, 0x2e, 0xf7, 0x55, 0x8c, 0xfa, 0x23, 0x16, 0xcf, 0xb9, 0x60, 0xd3, 0xa, 0x7c, 0xa5, 0x90, 0x49, 0x3f, 0xe6, 0x66, 0xbf, 0xc9, 0x10, 0x25, 0xfc, 0x8a, 0x53, 0xe0, 0x39, 0x4f, 0x96, 0xa3, 0x7a, 0xc, 0xd5, 0x77, 0xae, 0xd8, 0x1, 0x34, 0xed, 0x9b, 0x42, 0xf1, 0x28, 0x5e, 0x87, 0xb2, 0x6b, 0x1d, 0xc4, 0x88, 0x51, 0x27, 0xfe, 0xcb, 0x12, 0x64, 0xbd, 0xe, 0xd7, 0xa1, 0x78, 0x4d, 0x94, 0xe2, 0x3b, 0x99, 0x40, 0x36, 0xef, 0xda, 0x3, 0x75, 0xac, 0x1f, 0xc6, 0xb0, 0x69, 0x5c, 0x85, 0xf3, 0x2a, 0xaa, 0x73, 0x5, 0xdc, 0xe9, 0x30, 0x46, 0x9f, 0x2c, 0xf5, 0x83, 0x5a, 0x6f, 0xb6, 0xc0, 0x19, 0xbb, 0x62, 0x14, 0xcd, 0xf8, 0x21, 0x57, 0x8e, 0x3d, 0xe4, 0x92, 0x4b, 0x7e, 0xa7, 0xd1, 0x8, 0xcc, 0x15, 0x63, 0xba, 0x8f, 0x56, 0x20, 0xf9, 0x4a, 0x93, 0xe5, 0x3c, 0x9, 0xd0, 0xa6, 0x7f, 0xdd, 0x4, 0x72, 0xab, 0x9e, 0x47, 0x31, 0xe8, 0x5b, 0x82, 0xf4, 0x2d, 0x18, 0xc1, 0xb7, 0x6e, 0xee, 0x37, 0x41, 0x98, 0xad, 0x74, 0x2, 0xdb, 0x68, 0xb1, 0xc7, 0x1e, 0x2b, 0xf2, 0x84, 0x5d, 0xff, 0x26, 0x50, 0x89, 0xbc, 0x65, 0x13, 0xca, 0x79, 0xa0, 0xd6, 0xf, 0x3a, 0xe3, 0x95, 0x4c},
+ {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2, 0x21, 0xfb, 0x88, 0x52, 0x6e, 0xb4, 0xc7, 0x1d, 0xbf, 0x65, 0x16, 0xcc, 0xf0, 0x2a, 0x59, 0x83, 0x42, 0x98, 0xeb, 0x31, 0xd, 0xd7, 0xa4, 0x7e, 0xdc, 0x6, 0x75, 0xaf, 0x93, 0x49, 0x3a, 0xe0, 0x63, 0xb9, 0xca, 0x10, 0x2c, 0xf6, 0x85, 0x5f, 0xfd, 0x27, 0x54, 0x8e, 0xb2, 0x68, 0x1b, 0xc1, 0x84, 0x5e, 0x2d, 0xf7, 0xcb, 0x11, 0x62, 0xb8, 0x1a, 0xc0, 0xb3, 0x69, 0x55, 0x8f, 0xfc, 0x26, 0xa5, 0x7f, 0xc, 0xd6, 0xea, 0x30, 0x43, 0x99, 0x3b, 0xe1, 0x92, 0x48, 0x74, 0xae, 0xdd, 0x7, 0xc6, 0x1c, 0x6f, 0xb5, 0x89, 0x53, 0x20, 0xfa, 0x58, 0x82, 0xf1, 0x2b, 0x17, 0xcd, 0xbe, 0x64, 0xe7, 0x3d, 0x4e, 0x94, 0xa8, 0x72, 0x1, 0xdb, 0x79, 0xa3, 0xd0, 0xa, 0x36, 0xec, 0x9f, 0x45, 0x15, 0xcf, 0xbc, 0x66, 0x5a, 0x80, 0xf3, 0x29, 0x8b, 0x51, 0x22, 0xf8, 0xc4, 0x1e, 0x6d, 0xb7, 0x34, 0xee, 0x9d, 0x47, 0x7b, 0xa1, 0xd2, 0x8, 0xaa, 0x70, 0x3, 0xd9, 0xe5, 0x3f, 0x4c, 0x96, 0x57, 0x8d, 0xfe, 0x24, 0x18, 0xc2, 0xb1, 0x6b, 0xc9, 0x13, 0x60, 0xba, 0x86, 0x5c, 0x2f, 0xf5, 0x76, 0xac, 0xdf, 0x5, 0x39, 0xe3, 0x90, 0x4a, 0xe8, 0x32, 0x41, 0x9b, 0xa7, 0x7d, 0xe, 0xd4, 0x91, 0x4b, 0x38, 0xe2, 0xde, 0x4, 0x77, 0xad, 0xf, 0xd5, 0xa6, 0x7c, 0x40, 0x9a, 0xe9, 0x33, 0xb0, 0x6a, 0x19, 0xc3, 0xff, 0x25, 0x56, 0x8c, 0x2e, 0xf4, 0x87, 0x5d, 0x61, 0xbb, 0xc8, 0x12, 0xd3, 0x9, 0x7a, 0xa0, 0x9c, 0x46, 0x35, 0xef, 0x4d, 0x97, 0xe4, 0x3e, 0x2, 0xd8, 0xab, 0x71, 0xf2, 0x28, 0x5b, 0x81, 0xbd, 0x67, 0x14, 0xce, 0x6c, 0xb6, 0xc5, 0x1f, 0x23, 0xf9, 0x8a, 0x50},
+ {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad, 0x31, 0xea, 0x9a, 0x41, 0x7a, 0xa1, 0xd1, 0xa, 0xa7, 0x7c, 0xc, 0xd7, 0xec, 0x37, 0x47, 0x9c, 0x62, 0xb9, 0xc9, 0x12, 0x29, 0xf2, 0x82, 0x59, 0xf4, 0x2f, 0x5f, 0x84, 0xbf, 0x64, 0x14, 0xcf, 0x53, 0x88, 0xf8, 0x23, 0x18, 0xc3, 0xb3, 0x68, 0xc5, 0x1e, 0x6e, 0xb5, 0x8e, 0x55, 0x25, 0xfe, 0xc4, 0x1f, 0x6f, 0xb4, 0x8f, 0x54, 0x24, 0xff, 0x52, 0x89, 0xf9, 0x22, 0x19, 0xc2, 0xb2, 0x69, 0xf5, 0x2e, 0x5e, 0x85, 0xbe, 0x65, 0x15, 0xce, 0x63, 0xb8, 0xc8, 0x13, 0x28, 0xf3, 0x83, 0x58, 0xa6, 0x7d, 0xd, 0xd6, 0xed, 0x36, 0x46, 0x9d, 0x30, 0xeb, 0x9b, 0x40, 0x7b, 0xa0, 0xd0, 0xb, 0x97, 0x4c, 0x3c, 0xe7, 0xdc, 0x7, 0x77, 0xac, 0x1, 0xda, 0xaa, 0x71, 0x4a, 0x91, 0xe1, 0x3a, 0x95, 0x4e, 0x3e, 0xe5, 0xde, 0x5, 0x75, 0xae, 0x3, 0xd8, 0xa8, 0x73, 0x48, 0x93, 0xe3, 0x38, 0xa4, 0x7f, 0xf, 0xd4, 0xef, 0x34, 0x44, 0x9f, 0x32, 0xe9, 0x99, 0x42, 0x79, 0xa2, 0xd2, 0x9, 0xf7, 0x2c, 0x5c, 0x87, 0xbc, 0x67, 0x17, 0xcc, 0x61, 0xba, 0xca, 0x11, 0x2a, 0xf1, 0x81, 0x5a, 0xc6, 0x1d, 0x6d, 0xb6, 0x8d, 0x56, 0x26, 0xfd, 0x50, 0x8b, 0xfb, 0x20, 0x1b, 0xc0, 0xb0, 0x6b, 0x51, 0x8a, 0xfa, 0x21, 0x1a, 0xc1, 0xb1, 0x6a, 0xc7, 0x1c, 0x6c, 0xb7, 0x8c, 0x57, 0x27, 0xfc, 0x60, 0xbb, 0xcb, 0x10, 0x2b, 0xf0, 0x80, 0x5b, 0xf6, 0x2d, 0x5d, 0x86, 0xbd, 0x66, 0x16, 0xcd, 0x33, 0xe8, 0x98, 0x43, 0x78, 0xa3, 0xd3, 0x8, 0xa5, 0x7e, 0xe, 0xd5, 0xee, 0x35, 0x45, 0x9e, 0x2, 0xd9, 0xa9, 0x72, 0x49, 0x92, 0xe2, 0x39, 0x94, 0x4f, 0x3f, 0xe4, 0xdf, 0x4, 0x74, 0xaf},
+ {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80, 0x41, 0x9d, 0xe4, 0x38, 0x16, 0xca, 0xb3, 0x6f, 0xef, 0x33, 0x4a, 0x96, 0xb8, 0x64, 0x1d, 0xc1, 0x82, 0x5e, 0x27, 0xfb, 0xd5, 0x9, 0x70, 0xac, 0x2c, 0xf0, 0x89, 0x55, 0x7b, 0xa7, 0xde, 0x2, 0xc3, 0x1f, 0x66, 0xba, 0x94, 0x48, 0x31, 0xed, 0x6d, 0xb1, 0xc8, 0x14, 0x3a, 0xe6, 0x9f, 0x43, 0x19, 0xc5, 0xbc, 0x60, 0x4e, 0x92, 0xeb, 0x37, 0xb7, 0x6b, 0x12, 0xce, 0xe0, 0x3c, 0x45, 0x99, 0x58, 0x84, 0xfd, 0x21, 0xf, 0xd3, 0xaa, 0x76, 0xf6, 0x2a, 0x53, 0x8f, 0xa1, 0x7d, 0x4, 0xd8, 0x9b, 0x47, 0x3e, 0xe2, 0xcc, 0x10, 0x69, 0xb5, 0x35, 0xe9, 0x90, 0x4c, 0x62, 0xbe, 0xc7, 0x1b, 0xda, 0x6, 0x7f, 0xa3, 0x8d, 0x51, 0x28, 0xf4, 0x74, 0xa8, 0xd1, 0xd, 0x23, 0xff, 0x86, 0x5a, 0x32, 0xee, 0x97, 0x4b, 0x65, 0xb9, 0xc0, 0x1c, 0x9c, 0x40, 0x39, 0xe5, 0xcb, 0x17, 0x6e, 0xb2, 0x73, 0xaf, 0xd6, 0xa, 0x24, 0xf8, 0x81, 0x5d, 0xdd, 0x1, 0x78, 0xa4, 0x8a, 0x56, 0x2f, 0xf3, 0xb0, 0x6c, 0x15, 0xc9, 0xe7, 0x3b, 0x42, 0x9e, 0x1e, 0xc2, 0xbb, 0x67, 0x49, 0x95, 0xec, 0x30, 0xf1, 0x2d, 0x54, 0x88, 0xa6, 0x7a, 0x3, 0xdf, 0x5f, 0x83, 0xfa, 0x26, 0x8, 0xd4, 0xad, 0x71, 0x2b, 0xf7, 0x8e, 0x52, 0x7c, 0xa0, 0xd9, 0x5, 0x85, 0x59, 0x20, 0xfc, 0xd2, 0xe, 0x77, 0xab, 0x6a, 0xb6, 0xcf, 0x13, 0x3d, 0xe1, 0x98, 0x44, 0xc4, 0x18, 0x61, 0xbd, 0x93, 0x4f, 0x36, 0xea, 0xa9, 0x75, 0xc, 0xd0, 0xfe, 0x22, 0x5b, 0x87, 0x7, 0xdb, 0xa2, 0x7e, 0x50, 0x8c, 0xf5, 0x29, 0xe8, 0x34, 0x4d, 0x91, 0xbf, 0x63, 0x1a, 0xc6, 0x46, 0x9a, 0xe3, 0x3f, 0x11, 0xcd, 0xb4, 0x68},
+ {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f, 0x51, 0x8c, 0xf6, 0x2b, 0x2, 0xdf, 0xa5, 0x78, 0xf7, 0x2a, 0x50, 0x8d, 0xa4, 0x79, 0x3, 0xde, 0xa2, 0x7f, 0x5, 0xd8, 0xf1, 0x2c, 0x56, 0x8b, 0x4, 0xd9, 0xa3, 0x7e, 0x57, 0x8a, 0xf0, 0x2d, 0xf3, 0x2e, 0x54, 0x89, 0xa0, 0x7d, 0x7, 0xda, 0x55, 0x88, 0xf2, 0x2f, 0x6, 0xdb, 0xa1, 0x7c, 0x59, 0x84, 0xfe, 0x23, 0xa, 0xd7, 0xad, 0x70, 0xff, 0x22, 0x58, 0x85, 0xac, 0x71, 0xb, 0xd6, 0x8, 0xd5, 0xaf, 0x72, 0x5b, 0x86, 0xfc, 0x21, 0xae, 0x73, 0x9, 0xd4, 0xfd, 0x20, 0x5a, 0x87, 0xfb, 0x26, 0x5c, 0x81, 0xa8, 0x75, 0xf, 0xd2, 0x5d, 0x80, 0xfa, 0x27, 0xe, 0xd3, 0xa9, 0x74, 0xaa, 0x77, 0xd, 0xd0, 0xf9, 0x24, 0x5e, 0x83, 0xc, 0xd1, 0xab, 0x76, 0x5f, 0x82, 0xf8, 0x25, 0xb2, 0x6f, 0x15, 0xc8, 0xe1, 0x3c, 0x46, 0x9b, 0x14, 0xc9, 0xb3, 0x6e, 0x47, 0x9a, 0xe0, 0x3d, 0xe3, 0x3e, 0x44, 0x99, 0xb0, 0x6d, 0x17, 0xca, 0x45, 0x98, 0xe2, 0x3f, 0x16, 0xcb, 0xb1, 0x6c, 0x10, 0xcd, 0xb7, 0x6a, 0x43, 0x9e, 0xe4, 0x39, 0xb6, 0x6b, 0x11, 0xcc, 0xe5, 0x38, 0x42, 0x9f, 0x41, 0x9c, 0xe6, 0x3b, 0x12, 0xcf, 0xb5, 0x68, 0xe7, 0x3a, 0x40, 0x9d, 0xb4, 0x69, 0x13, 0xce, 0xeb, 0x36, 0x4c, 0x91, 0xb8, 0x65, 0x1f, 0xc2, 0x4d, 0x90, 0xea, 0x37, 0x1e, 0xc3, 0xb9, 0x64, 0xba, 0x67, 0x1d, 0xc0, 0xe9, 0x34, 0x4e, 0x93, 0x1c, 0xc1, 0xbb, 0x66, 0x4f, 0x92, 0xe8, 0x35, 0x49, 0x94, 0xee, 0x33, 0x1a, 0xc7, 0xbd, 0x60, 0xef, 0x32, 0x48, 0x95, 0xbc, 0x61, 0x1b, 0xc6, 0x18, 0xc5, 0xbf, 0x62, 0x4b, 0x96, 0xec, 0x31, 0xbe, 0x63, 0x19, 0xc4, 0xed, 0x30, 0x4a, 0x97},
+ {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e, 0x61, 0xbf, 0xc0, 0x1e, 0x3e, 0xe0, 0x9f, 0x41, 0xdf, 0x1, 0x7e, 0xa0, 0x80, 0x5e, 0x21, 0xff, 0xc2, 0x1c, 0x63, 0xbd, 0x9d, 0x43, 0x3c, 0xe2, 0x7c, 0xa2, 0xdd, 0x3, 0x23, 0xfd, 0x82, 0x5c, 0xa3, 0x7d, 0x2, 0xdc, 0xfc, 0x22, 0x5d, 0x83, 0x1d, 0xc3, 0xbc, 0x62, 0x42, 0x9c, 0xe3, 0x3d, 0x99, 0x47, 0x38, 0xe6, 0xc6, 0x18, 0x67, 0xb9, 0x27, 0xf9, 0x86, 0x58, 0x78, 0xa6, 0xd9, 0x7, 0xf8, 0x26, 0x59, 0x87, 0xa7, 0x79, 0x6, 0xd8, 0x46, 0x98, 0xe7, 0x39, 0x19, 0xc7, 0xb8, 0x66, 0x5b, 0x85, 0xfa, 0x24, 0x4, 0xda, 0xa5, 0x7b, 0xe5, 0x3b, 0x44, 0x9a, 0xba, 0x64, 0x1b, 0xc5, 0x3a, 0xe4, 0x9b, 0x45, 0x65, 0xbb, 0xc4, 0x1a, 0x84, 0x5a, 0x25, 0xfb, 0xdb, 0x5, 0x7a, 0xa4, 0x2f, 0xf1, 0x8e, 0x50, 0x70, 0xae, 0xd1, 0xf, 0x91, 0x4f, 0x30, 0xee, 0xce, 0x10, 0x6f, 0xb1, 0x4e, 0x90, 0xef, 0x31, 0x11, 0xcf, 0xb0, 0x6e, 0xf0, 0x2e, 0x51, 0x8f, 0xaf, 0x71, 0xe, 0xd0, 0xed, 0x33, 0x4c, 0x92, 0xb2, 0x6c, 0x13, 0xcd, 0x53, 0x8d, 0xf2, 0x2c, 0xc, 0xd2, 0xad, 0x73, 0x8c, 0x52, 0x2d, 0xf3, 0xd3, 0xd, 0x72, 0xac, 0x32, 0xec, 0x93, 0x4d, 0x6d, 0xb3, 0xcc, 0x12, 0xb6, 0x68, 0x17, 0xc9, 0xe9, 0x37, 0x48, 0x96, 0x8, 0xd6, 0xa9, 0x77, 0x57, 0x89, 0xf6, 0x28, 0xd7, 0x9, 0x76, 0xa8, 0x88, 0x56, 0x29, 0xf7, 0x69, 0xb7, 0xc8, 0x16, 0x36, 0xe8, 0x97, 0x49, 0x74, 0xaa, 0xd5, 0xb, 0x2b, 0xf5, 0x8a, 0x54, 0xca, 0x14, 0x6b, 0xb5, 0x95, 0x4b, 0x34, 0xea, 0x15, 0xcb, 0xb4, 0x6a, 0x4a, 0x94, 0xeb, 0x35, 0xab, 0x75, 0xa, 0xd4, 0xf4, 0x2a, 0x55, 0x8b},
+ {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91, 0x71, 0xae, 0xd2, 0xd, 0x2a, 0xf5, 0x89, 0x56, 0xc7, 0x18, 0x64, 0xbb, 0x9c, 0x43, 0x3f, 0xe0, 0xe2, 0x3d, 0x41, 0x9e, 0xb9, 0x66, 0x1a, 0xc5, 0x54, 0x8b, 0xf7, 0x28, 0xf, 0xd0, 0xac, 0x73, 0x93, 0x4c, 0x30, 0xef, 0xc8, 0x17, 0x6b, 0xb4, 0x25, 0xfa, 0x86, 0x59, 0x7e, 0xa1, 0xdd, 0x2, 0xd9, 0x6, 0x7a, 0xa5, 0x82, 0x5d, 0x21, 0xfe, 0x6f, 0xb0, 0xcc, 0x13, 0x34, 0xeb, 0x97, 0x48, 0xa8, 0x77, 0xb, 0xd4, 0xf3, 0x2c, 0x50, 0x8f, 0x1e, 0xc1, 0xbd, 0x62, 0x45, 0x9a, 0xe6, 0x39, 0x3b, 0xe4, 0x98, 0x47, 0x60, 0xbf, 0xc3, 0x1c, 0x8d, 0x52, 0x2e, 0xf1, 0xd6, 0x9, 0x75, 0xaa, 0x4a, 0x95, 0xe9, 0x36, 0x11, 0xce, 0xb2, 0x6d, 0xfc, 0x23, 0x5f, 0x80, 0xa7, 0x78, 0x4, 0xdb, 0xaf, 0x70, 0xc, 0xd3, 0xf4, 0x2b, 0x57, 0x88, 0x19, 0xc6, 0xba, 0x65, 0x42, 0x9d, 0xe1, 0x3e, 0xde, 0x1, 0x7d, 0xa2, 0x85, 0x5a, 0x26, 0xf9, 0x68, 0xb7, 0xcb, 0x14, 0x33, 0xec, 0x90, 0x4f, 0x4d, 0x92, 0xee, 0x31, 0x16, 0xc9, 0xb5, 0x6a, 0xfb, 0x24, 0x58, 0x87, 0xa0, 0x7f, 0x3, 0xdc, 0x3c, 0xe3, 0x9f, 0x40, 0x67, 0xb8, 0xc4, 0x1b, 0x8a, 0x55, 0x29, 0xf6, 0xd1, 0xe, 0x72, 0xad, 0x76, 0xa9, 0xd5, 0xa, 0x2d, 0xf2, 0x8e, 0x51, 0xc0, 0x1f, 0x63, 0xbc, 0x9b, 0x44, 0x38, 0xe7, 0x7, 0xd8, 0xa4, 0x7b, 0x5c, 0x83, 0xff, 0x20, 0xb1, 0x6e, 0x12, 0xcd, 0xea, 0x35, 0x49, 0x96, 0x94, 0x4b, 0x37, 0xe8, 0xcf, 0x10, 0x6c, 0xb3, 0x22, 0xfd, 0x81, 0x5e, 0x79, 0xa6, 0xda, 0x5, 0xe5, 0x3a, 0x46, 0x99, 0xbe, 0x61, 0x1d, 0xc2, 0x53, 0x8c, 0xf0, 0x2f, 0x8, 0xd7, 0xab, 0x74},
+ {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9, 0xa6, 0x46, 0x7b, 0x9b, 0x1, 0xe1, 0xdc, 0x3c, 0xf5, 0x15, 0x28, 0xc8, 0x52, 0xb2, 0x8f, 0x6f, 0x51, 0xb1, 0x8c, 0x6c, 0xf6, 0x16, 0x2b, 0xcb, 0x2, 0xe2, 0xdf, 0x3f, 0xa5, 0x45, 0x78, 0x98, 0xf7, 0x17, 0x2a, 0xca, 0x50, 0xb0, 0x8d, 0x6d, 0xa4, 0x44, 0x79, 0x99, 0x3, 0xe3, 0xde, 0x3e, 0xa2, 0x42, 0x7f, 0x9f, 0x5, 0xe5, 0xd8, 0x38, 0xf1, 0x11, 0x2c, 0xcc, 0x56, 0xb6, 0x8b, 0x6b, 0x4, 0xe4, 0xd9, 0x39, 0xa3, 0x43, 0x7e, 0x9e, 0x57, 0xb7, 0x8a, 0x6a, 0xf0, 0x10, 0x2d, 0xcd, 0xf3, 0x13, 0x2e, 0xce, 0x54, 0xb4, 0x89, 0x69, 0xa0, 0x40, 0x7d, 0x9d, 0x7, 0xe7, 0xda, 0x3a, 0x55, 0xb5, 0x88, 0x68, 0xf2, 0x12, 0x2f, 0xcf, 0x6, 0xe6, 0xdb, 0x3b, 0xa1, 0x41, 0x7c, 0x9c, 0x59, 0xb9, 0x84, 0x64, 0xfe, 0x1e, 0x23, 0xc3, 0xa, 0xea, 0xd7, 0x37, 0xad, 0x4d, 0x70, 0x90, 0xff, 0x1f, 0x22, 0xc2, 0x58, 0xb8, 0x85, 0x65, 0xac, 0x4c, 0x71, 0x91, 0xb, 0xeb, 0xd6, 0x36, 0x8, 0xe8, 0xd5, 0x35, 0xaf, 0x4f, 0x72, 0x92, 0x5b, 0xbb, 0x86, 0x66, 0xfc, 0x1c, 0x21, 0xc1, 0xae, 0x4e, 0x73, 0x93, 0x9, 0xe9, 0xd4, 0x34, 0xfd, 0x1d, 0x20, 0xc0, 0x5a, 0xba, 0x87, 0x67, 0xfb, 0x1b, 0x26, 0xc6, 0x5c, 0xbc, 0x81, 0x61, 0xa8, 0x48, 0x75, 0x95, 0xf, 0xef, 0xd2, 0x32, 0x5d, 0xbd, 0x80, 0x60, 0xfa, 0x1a, 0x27, 0xc7, 0xe, 0xee, 0xd3, 0x33, 0xa9, 0x49, 0x74, 0x94, 0xaa, 0x4a, 0x77, 0x97, 0xd, 0xed, 0xd0, 0x30, 0xf9, 0x19, 0x24, 0xc4, 0x5e, 0xbe, 0x83, 0x63, 0xc, 0xec, 0xd1, 0x31, 0xab, 0x4b, 0x76, 0x96, 0x5f, 0xbf, 0x82, 0x62, 0xf8, 0x18, 0x25, 0xc5},
+ {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6, 0xb6, 0x57, 0x69, 0x88, 0x15, 0xf4, 0xca, 0x2b, 0xed, 0xc, 0x32, 0xd3, 0x4e, 0xaf, 0x91, 0x70, 0x71, 0x90, 0xae, 0x4f, 0xd2, 0x33, 0xd, 0xec, 0x2a, 0xcb, 0xf5, 0x14, 0x89, 0x68, 0x56, 0xb7, 0xc7, 0x26, 0x18, 0xf9, 0x64, 0x85, 0xbb, 0x5a, 0x9c, 0x7d, 0x43, 0xa2, 0x3f, 0xde, 0xe0, 0x1, 0xe2, 0x3, 0x3d, 0xdc, 0x41, 0xa0, 0x9e, 0x7f, 0xb9, 0x58, 0x66, 0x87, 0x1a, 0xfb, 0xc5, 0x24, 0x54, 0xb5, 0x8b, 0x6a, 0xf7, 0x16, 0x28, 0xc9, 0xf, 0xee, 0xd0, 0x31, 0xac, 0x4d, 0x73, 0x92, 0x93, 0x72, 0x4c, 0xad, 0x30, 0xd1, 0xef, 0xe, 0xc8, 0x29, 0x17, 0xf6, 0x6b, 0x8a, 0xb4, 0x55, 0x25, 0xc4, 0xfa, 0x1b, 0x86, 0x67, 0x59, 0xb8, 0x7e, 0x9f, 0xa1, 0x40, 0xdd, 0x3c, 0x2, 0xe3, 0xd9, 0x38, 0x6, 0xe7, 0x7a, 0x9b, 0xa5, 0x44, 0x82, 0x63, 0x5d, 0xbc, 0x21, 0xc0, 0xfe, 0x1f, 0x6f, 0x8e, 0xb0, 0x51, 0xcc, 0x2d, 0x13, 0xf2, 0x34, 0xd5, 0xeb, 0xa, 0x97, 0x76, 0x48, 0xa9, 0xa8, 0x49, 0x77, 0x96, 0xb, 0xea, 0xd4, 0x35, 0xf3, 0x12, 0x2c, 0xcd, 0x50, 0xb1, 0x8f, 0x6e, 0x1e, 0xff, 0xc1, 0x20, 0xbd, 0x5c, 0x62, 0x83, 0x45, 0xa4, 0x9a, 0x7b, 0xe6, 0x7, 0x39, 0xd8, 0x3b, 0xda, 0xe4, 0x5, 0x98, 0x79, 0x47, 0xa6, 0x60, 0x81, 0xbf, 0x5e, 0xc3, 0x22, 0x1c, 0xfd, 0x8d, 0x6c, 0x52, 0xb3, 0x2e, 0xcf, 0xf1, 0x10, 0xd6, 0x37, 0x9, 0xe8, 0x75, 0x94, 0xaa, 0x4b, 0x4a, 0xab, 0x95, 0x74, 0xe9, 0x8, 0x36, 0xd7, 0x11, 0xf0, 0xce, 0x2f, 0xb2, 0x53, 0x6d, 0x8c, 0xfc, 0x1d, 0x23, 0xc2, 0x5f, 0xbe, 0x80, 0x61, 0xa7, 0x46, 0x78, 0x99, 0x4, 0xe5, 0xdb, 0x3a},
+ {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7, 0x86, 0x64, 0x5f, 0xbd, 0x29, 0xcb, 0xf0, 0x12, 0xc5, 0x27, 0x1c, 0xfe, 0x6a, 0x88, 0xb3, 0x51, 0x11, 0xf3, 0xc8, 0x2a, 0xbe, 0x5c, 0x67, 0x85, 0x52, 0xb0, 0x8b, 0x69, 0xfd, 0x1f, 0x24, 0xc6, 0x97, 0x75, 0x4e, 0xac, 0x38, 0xda, 0xe1, 0x3, 0xd4, 0x36, 0xd, 0xef, 0x7b, 0x99, 0xa2, 0x40, 0x22, 0xc0, 0xfb, 0x19, 0x8d, 0x6f, 0x54, 0xb6, 0x61, 0x83, 0xb8, 0x5a, 0xce, 0x2c, 0x17, 0xf5, 0xa4, 0x46, 0x7d, 0x9f, 0xb, 0xe9, 0xd2, 0x30, 0xe7, 0x5, 0x3e, 0xdc, 0x48, 0xaa, 0x91, 0x73, 0x33, 0xd1, 0xea, 0x8, 0x9c, 0x7e, 0x45, 0xa7, 0x70, 0x92, 0xa9, 0x4b, 0xdf, 0x3d, 0x6, 0xe4, 0xb5, 0x57, 0x6c, 0x8e, 0x1a, 0xf8, 0xc3, 0x21, 0xf6, 0x14, 0x2f, 0xcd, 0x59, 0xbb, 0x80, 0x62, 0x44, 0xa6, 0x9d, 0x7f, 0xeb, 0x9, 0x32, 0xd0, 0x7, 0xe5, 0xde, 0x3c, 0xa8, 0x4a, 0x71, 0x93, 0xc2, 0x20, 0x1b, 0xf9, 0x6d, 0x8f, 0xb4, 0x56, 0x81, 0x63, 0x58, 0xba, 0x2e, 0xcc, 0xf7, 0x15, 0x55, 0xb7, 0x8c, 0x6e, 0xfa, 0x18, 0x23, 0xc1, 0x16, 0xf4, 0xcf, 0x2d, 0xb9, 0x5b, 0x60, 0x82, 0xd3, 0x31, 0xa, 0xe8, 0x7c, 0x9e, 0xa5, 0x47, 0x90, 0x72, 0x49, 0xab, 0x3f, 0xdd, 0xe6, 0x4, 0x66, 0x84, 0xbf, 0x5d, 0xc9, 0x2b, 0x10, 0xf2, 0x25, 0xc7, 0xfc, 0x1e, 0x8a, 0x68, 0x53, 0xb1, 0xe0, 0x2, 0x39, 0xdb, 0x4f, 0xad, 0x96, 0x74, 0xa3, 0x41, 0x7a, 0x98, 0xc, 0xee, 0xd5, 0x37, 0x77, 0x95, 0xae, 0x4c, 0xd8, 0x3a, 0x1, 0xe3, 0x34, 0xd6, 0xed, 0xf, 0x9b, 0x79, 0x42, 0xa0, 0xf1, 0x13, 0x28, 0xca, 0x5e, 0xbc, 0x87, 0x65, 0xb2, 0x50, 0x6b, 0x89, 0x1d, 0xff, 0xc4, 0x26},
+ {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8, 0x96, 0x75, 0x4d, 0xae, 0x3d, 0xde, 0xe6, 0x5, 0xdd, 0x3e, 0x6, 0xe5, 0x76, 0x95, 0xad, 0x4e, 0x31, 0xd2, 0xea, 0x9, 0x9a, 0x79, 0x41, 0xa2, 0x7a, 0x99, 0xa1, 0x42, 0xd1, 0x32, 0xa, 0xe9, 0xa7, 0x44, 0x7c, 0x9f, 0xc, 0xef, 0xd7, 0x34, 0xec, 0xf, 0x37, 0xd4, 0x47, 0xa4, 0x9c, 0x7f, 0x62, 0x81, 0xb9, 0x5a, 0xc9, 0x2a, 0x12, 0xf1, 0x29, 0xca, 0xf2, 0x11, 0x82, 0x61, 0x59, 0xba, 0xf4, 0x17, 0x2f, 0xcc, 0x5f, 0xbc, 0x84, 0x67, 0xbf, 0x5c, 0x64, 0x87, 0x14, 0xf7, 0xcf, 0x2c, 0x53, 0xb0, 0x88, 0x6b, 0xf8, 0x1b, 0x23, 0xc0, 0x18, 0xfb, 0xc3, 0x20, 0xb3, 0x50, 0x68, 0x8b, 0xc5, 0x26, 0x1e, 0xfd, 0x6e, 0x8d, 0xb5, 0x56, 0x8e, 0x6d, 0x55, 0xb6, 0x25, 0xc6, 0xfe, 0x1d, 0xc4, 0x27, 0x1f, 0xfc, 0x6f, 0x8c, 0xb4, 0x57, 0x8f, 0x6c, 0x54, 0xb7, 0x24, 0xc7, 0xff, 0x1c, 0x52, 0xb1, 0x89, 0x6a, 0xf9, 0x1a, 0x22, 0xc1, 0x19, 0xfa, 0xc2, 0x21, 0xb2, 0x51, 0x69, 0x8a, 0xf5, 0x16, 0x2e, 0xcd, 0x5e, 0xbd, 0x85, 0x66, 0xbe, 0x5d, 0x65, 0x86, 0x15, 0xf6, 0xce, 0x2d, 0x63, 0x80, 0xb8, 0x5b, 0xc8, 0x2b, 0x13, 0xf0, 0x28, 0xcb, 0xf3, 0x10, 0x83, 0x60, 0x58, 0xbb, 0xa6, 0x45, 0x7d, 0x9e, 0xd, 0xee, 0xd6, 0x35, 0xed, 0xe, 0x36, 0xd5, 0x46, 0xa5, 0x9d, 0x7e, 0x30, 0xd3, 0xeb, 0x8, 0x9b, 0x78, 0x40, 0xa3, 0x7b, 0x98, 0xa0, 0x43, 0xd0, 0x33, 0xb, 0xe8, 0x97, 0x74, 0x4c, 0xaf, 0x3c, 0xdf, 0xe7, 0x4, 0xdc, 0x3f, 0x7, 0xe4, 0x77, 0x94, 0xac, 0x4f, 0x1, 0xe2, 0xda, 0x39, 0xaa, 0x49, 0x71, 0x92, 0x4a, 0xa9, 0x91, 0x72, 0xe1, 0x2, 0x3a, 0xd9},
+ {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5, 0xe6, 0x2, 0x33, 0xd7, 0x51, 0xb5, 0x84, 0x60, 0x95, 0x71, 0x40, 0xa4, 0x22, 0xc6, 0xf7, 0x13, 0xd1, 0x35, 0x4, 0xe0, 0x66, 0x82, 0xb3, 0x57, 0xa2, 0x46, 0x77, 0x93, 0x15, 0xf1, 0xc0, 0x24, 0x37, 0xd3, 0xe2, 0x6, 0x80, 0x64, 0x55, 0xb1, 0x44, 0xa0, 0x91, 0x75, 0xf3, 0x17, 0x26, 0xc2, 0xbf, 0x5b, 0x6a, 0x8e, 0x8, 0xec, 0xdd, 0x39, 0xcc, 0x28, 0x19, 0xfd, 0x7b, 0x9f, 0xae, 0x4a, 0x59, 0xbd, 0x8c, 0x68, 0xee, 0xa, 0x3b, 0xdf, 0x2a, 0xce, 0xff, 0x1b, 0x9d, 0x79, 0x48, 0xac, 0x6e, 0x8a, 0xbb, 0x5f, 0xd9, 0x3d, 0xc, 0xe8, 0x1d, 0xf9, 0xc8, 0x2c, 0xaa, 0x4e, 0x7f, 0x9b, 0x88, 0x6c, 0x5d, 0xb9, 0x3f, 0xdb, 0xea, 0xe, 0xfb, 0x1f, 0x2e, 0xca, 0x4c, 0xa8, 0x99, 0x7d, 0x63, 0x87, 0xb6, 0x52, 0xd4, 0x30, 0x1, 0xe5, 0x10, 0xf4, 0xc5, 0x21, 0xa7, 0x43, 0x72, 0x96, 0x85, 0x61, 0x50, 0xb4, 0x32, 0xd6, 0xe7, 0x3, 0xf6, 0x12, 0x23, 0xc7, 0x41, 0xa5, 0x94, 0x70, 0xb2, 0x56, 0x67, 0x83, 0x5, 0xe1, 0xd0, 0x34, 0xc1, 0x25, 0x14, 0xf0, 0x76, 0x92, 0xa3, 0x47, 0x54, 0xb0, 0x81, 0x65, 0xe3, 0x7, 0x36, 0xd2, 0x27, 0xc3, 0xf2, 0x16, 0x90, 0x74, 0x45, 0xa1, 0xdc, 0x38, 0x9, 0xed, 0x6b, 0x8f, 0xbe, 0x5a, 0xaf, 0x4b, 0x7a, 0x9e, 0x18, 0xfc, 0xcd, 0x29, 0x3a, 0xde, 0xef, 0xb, 0x8d, 0x69, 0x58, 0xbc, 0x49, 0xad, 0x9c, 0x78, 0xfe, 0x1a, 0x2b, 0xcf, 0xd, 0xe9, 0xd8, 0x3c, 0xba, 0x5e, 0x6f, 0x8b, 0x7e, 0x9a, 0xab, 0x4f, 0xc9, 0x2d, 0x1c, 0xf8, 0xeb, 0xf, 0x3e, 0xda, 0x5c, 0xb8, 0x89, 0x6d, 0x98, 0x7c, 0x4d, 0xa9, 0x2f, 0xcb, 0xfa, 0x1e},
+ {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa, 0xf6, 0x13, 0x21, 0xc4, 0x45, 0xa0, 0x92, 0x77, 0x8d, 0x68, 0x5a, 0xbf, 0x3e, 0xdb, 0xe9, 0xc, 0xf1, 0x14, 0x26, 0xc3, 0x42, 0xa7, 0x95, 0x70, 0x8a, 0x6f, 0x5d, 0xb8, 0x39, 0xdc, 0xee, 0xb, 0x7, 0xe2, 0xd0, 0x35, 0xb4, 0x51, 0x63, 0x86, 0x7c, 0x99, 0xab, 0x4e, 0xcf, 0x2a, 0x18, 0xfd, 0xff, 0x1a, 0x28, 0xcd, 0x4c, 0xa9, 0x9b, 0x7e, 0x84, 0x61, 0x53, 0xb6, 0x37, 0xd2, 0xe0, 0x5, 0x9, 0xec, 0xde, 0x3b, 0xba, 0x5f, 0x6d, 0x88, 0x72, 0x97, 0xa5, 0x40, 0xc1, 0x24, 0x16, 0xf3, 0xe, 0xeb, 0xd9, 0x3c, 0xbd, 0x58, 0x6a, 0x8f, 0x75, 0x90, 0xa2, 0x47, 0xc6, 0x23, 0x11, 0xf4, 0xf8, 0x1d, 0x2f, 0xca, 0x4b, 0xae, 0x9c, 0x79, 0x83, 0x66, 0x54, 0xb1, 0x30, 0xd5, 0xe7, 0x2, 0xe3, 0x6, 0x34, 0xd1, 0x50, 0xb5, 0x87, 0x62, 0x98, 0x7d, 0x4f, 0xaa, 0x2b, 0xce, 0xfc, 0x19, 0x15, 0xf0, 0xc2, 0x27, 0xa6, 0x43, 0x71, 0x94, 0x6e, 0x8b, 0xb9, 0x5c, 0xdd, 0x38, 0xa, 0xef, 0x12, 0xf7, 0xc5, 0x20, 0xa1, 0x44, 0x76, 0x93, 0x69, 0x8c, 0xbe, 0x5b, 0xda, 0x3f, 0xd, 0xe8, 0xe4, 0x1, 0x33, 0xd6, 0x57, 0xb2, 0x80, 0x65, 0x9f, 0x7a, 0x48, 0xad, 0x2c, 0xc9, 0xfb, 0x1e, 0x1c, 0xf9, 0xcb, 0x2e, 0xaf, 0x4a, 0x78, 0x9d, 0x67, 0x82, 0xb0, 0x55, 0xd4, 0x31, 0x3, 0xe6, 0xea, 0xf, 0x3d, 0xd8, 0x59, 0xbc, 0x8e, 0x6b, 0x91, 0x74, 0x46, 0xa3, 0x22, 0xc7, 0xf5, 0x10, 0xed, 0x8, 0x3a, 0xdf, 0x5e, 0xbb, 0x89, 0x6c, 0x96, 0x73, 0x41, 0xa4, 0x25, 0xc0, 0xf2, 0x17, 0x1b, 0xfe, 0xcc, 0x29, 0xa8, 0x4d, 0x7f, 0x9a, 0x60, 0x85, 0xb7, 0x52, 0xd3, 0x36, 0x4, 0xe1},
+ {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb, 0xc6, 0x20, 0x17, 0xf1, 0x79, 0x9f, 0xa8, 0x4e, 0xa5, 0x43, 0x74, 0x92, 0x1a, 0xfc, 0xcb, 0x2d, 0x91, 0x77, 0x40, 0xa6, 0x2e, 0xc8, 0xff, 0x19, 0xf2, 0x14, 0x23, 0xc5, 0x4d, 0xab, 0x9c, 0x7a, 0x57, 0xb1, 0x86, 0x60, 0xe8, 0xe, 0x39, 0xdf, 0x34, 0xd2, 0xe5, 0x3, 0x8b, 0x6d, 0x5a, 0xbc, 0x3f, 0xd9, 0xee, 0x8, 0x80, 0x66, 0x51, 0xb7, 0x5c, 0xba, 0x8d, 0x6b, 0xe3, 0x5, 0x32, 0xd4, 0xf9, 0x1f, 0x28, 0xce, 0x46, 0xa0, 0x97, 0x71, 0x9a, 0x7c, 0x4b, 0xad, 0x25, 0xc3, 0xf4, 0x12, 0xae, 0x48, 0x7f, 0x99, 0x11, 0xf7, 0xc0, 0x26, 0xcd, 0x2b, 0x1c, 0xfa, 0x72, 0x94, 0xa3, 0x45, 0x68, 0x8e, 0xb9, 0x5f, 0xd7, 0x31, 0x6, 0xe0, 0xb, 0xed, 0xda, 0x3c, 0xb4, 0x52, 0x65, 0x83, 0x7e, 0x98, 0xaf, 0x49, 0xc1, 0x27, 0x10, 0xf6, 0x1d, 0xfb, 0xcc, 0x2a, 0xa2, 0x44, 0x73, 0x95, 0xb8, 0x5e, 0x69, 0x8f, 0x7, 0xe1, 0xd6, 0x30, 0xdb, 0x3d, 0xa, 0xec, 0x64, 0x82, 0xb5, 0x53, 0xef, 0x9, 0x3e, 0xd8, 0x50, 0xb6, 0x81, 0x67, 0x8c, 0x6a, 0x5d, 0xbb, 0x33, 0xd5, 0xe2, 0x4, 0x29, 0xcf, 0xf8, 0x1e, 0x96, 0x70, 0x47, 0xa1, 0x4a, 0xac, 0x9b, 0x7d, 0xf5, 0x13, 0x24, 0xc2, 0x41, 0xa7, 0x90, 0x76, 0xfe, 0x18, 0x2f, 0xc9, 0x22, 0xc4, 0xf3, 0x15, 0x9d, 0x7b, 0x4c, 0xaa, 0x87, 0x61, 0x56, 0xb0, 0x38, 0xde, 0xe9, 0xf, 0xe4, 0x2, 0x35, 0xd3, 0x5b, 0xbd, 0x8a, 0x6c, 0xd0, 0x36, 0x1, 0xe7, 0x6f, 0x89, 0xbe, 0x58, 0xb3, 0x55, 0x62, 0x84, 0xc, 0xea, 0xdd, 0x3b, 0x16, 0xf0, 0xc7, 0x21, 0xa9, 0x4f, 0x78, 0x9e, 0x75, 0x93, 0xa4, 0x42, 0xca, 0x2c, 0x1b, 0xfd},
+ {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4, 0xd6, 0x31, 0x5, 0xe2, 0x6d, 0x8a, 0xbe, 0x59, 0xbd, 0x5a, 0x6e, 0x89, 0x6, 0xe1, 0xd5, 0x32, 0xb1, 0x56, 0x62, 0x85, 0xa, 0xed, 0xd9, 0x3e, 0xda, 0x3d, 0x9, 0xee, 0x61, 0x86, 0xb2, 0x55, 0x67, 0x80, 0xb4, 0x53, 0xdc, 0x3b, 0xf, 0xe8, 0xc, 0xeb, 0xdf, 0x38, 0xb7, 0x50, 0x64, 0x83, 0x7f, 0x98, 0xac, 0x4b, 0xc4, 0x23, 0x17, 0xf0, 0x14, 0xf3, 0xc7, 0x20, 0xaf, 0x48, 0x7c, 0x9b, 0xa9, 0x4e, 0x7a, 0x9d, 0x12, 0xf5, 0xc1, 0x26, 0xc2, 0x25, 0x11, 0xf6, 0x79, 0x9e, 0xaa, 0x4d, 0xce, 0x29, 0x1d, 0xfa, 0x75, 0x92, 0xa6, 0x41, 0xa5, 0x42, 0x76, 0x91, 0x1e, 0xf9, 0xcd, 0x2a, 0x18, 0xff, 0xcb, 0x2c, 0xa3, 0x44, 0x70, 0x97, 0x73, 0x94, 0xa0, 0x47, 0xc8, 0x2f, 0x1b, 0xfc, 0xfe, 0x19, 0x2d, 0xca, 0x45, 0xa2, 0x96, 0x71, 0x95, 0x72, 0x46, 0xa1, 0x2e, 0xc9, 0xfd, 0x1a, 0x28, 0xcf, 0xfb, 0x1c, 0x93, 0x74, 0x40, 0xa7, 0x43, 0xa4, 0x90, 0x77, 0xf8, 0x1f, 0x2b, 0xcc, 0x4f, 0xa8, 0x9c, 0x7b, 0xf4, 0x13, 0x27, 0xc0, 0x24, 0xc3, 0xf7, 0x10, 0x9f, 0x78, 0x4c, 0xab, 0x99, 0x7e, 0x4a, 0xad, 0x22, 0xc5, 0xf1, 0x16, 0xf2, 0x15, 0x21, 0xc6, 0x49, 0xae, 0x9a, 0x7d, 0x81, 0x66, 0x52, 0xb5, 0x3a, 0xdd, 0xe9, 0xe, 0xea, 0xd, 0x39, 0xde, 0x51, 0xb6, 0x82, 0x65, 0x57, 0xb0, 0x84, 0x63, 0xec, 0xb, 0x3f, 0xd8, 0x3c, 0xdb, 0xef, 0x8, 0x87, 0x60, 0x54, 0xb3, 0x30, 0xd7, 0xe3, 0x4, 0x8b, 0x6c, 0x58, 0xbf, 0x5b, 0xbc, 0x88, 0x6f, 0xe0, 0x7, 0x33, 0xd4, 0xe6, 0x1, 0x35, 0xd2, 0x5d, 0xba, 0x8e, 0x69, 0x8d, 0x6a, 0x5e, 0xb9, 0x36, 0xd1, 0xe5, 0x2},
+ {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1, 0x26, 0xce, 0xeb, 0x3, 0xa1, 0x49, 0x6c, 0x84, 0x35, 0xdd, 0xf8, 0x10, 0xb2, 0x5a, 0x7f, 0x97, 0x4c, 0xa4, 0x81, 0x69, 0xcb, 0x23, 0x6, 0xee, 0x5f, 0xb7, 0x92, 0x7a, 0xd8, 0x30, 0x15, 0xfd, 0x6a, 0x82, 0xa7, 0x4f, 0xed, 0x5, 0x20, 0xc8, 0x79, 0x91, 0xb4, 0x5c, 0xfe, 0x16, 0x33, 0xdb, 0x98, 0x70, 0x55, 0xbd, 0x1f, 0xf7, 0xd2, 0x3a, 0x8b, 0x63, 0x46, 0xae, 0xc, 0xe4, 0xc1, 0x29, 0xbe, 0x56, 0x73, 0x9b, 0x39, 0xd1, 0xf4, 0x1c, 0xad, 0x45, 0x60, 0x88, 0x2a, 0xc2, 0xe7, 0xf, 0xd4, 0x3c, 0x19, 0xf1, 0x53, 0xbb, 0x9e, 0x76, 0xc7, 0x2f, 0xa, 0xe2, 0x40, 0xa8, 0x8d, 0x65, 0xf2, 0x1a, 0x3f, 0xd7, 0x75, 0x9d, 0xb8, 0x50, 0xe1, 0x9, 0x2c, 0xc4, 0x66, 0x8e, 0xab, 0x43, 0x2d, 0xc5, 0xe0, 0x8, 0xaa, 0x42, 0x67, 0x8f, 0x3e, 0xd6, 0xf3, 0x1b, 0xb9, 0x51, 0x74, 0x9c, 0xb, 0xe3, 0xc6, 0x2e, 0x8c, 0x64, 0x41, 0xa9, 0x18, 0xf0, 0xd5, 0x3d, 0x9f, 0x77, 0x52, 0xba, 0x61, 0x89, 0xac, 0x44, 0xe6, 0xe, 0x2b, 0xc3, 0x72, 0x9a, 0xbf, 0x57, 0xf5, 0x1d, 0x38, 0xd0, 0x47, 0xaf, 0x8a, 0x62, 0xc0, 0x28, 0xd, 0xe5, 0x54, 0xbc, 0x99, 0x71, 0xd3, 0x3b, 0x1e, 0xf6, 0xb5, 0x5d, 0x78, 0x90, 0x32, 0xda, 0xff, 0x17, 0xa6, 0x4e, 0x6b, 0x83, 0x21, 0xc9, 0xec, 0x4, 0x93, 0x7b, 0x5e, 0xb6, 0x14, 0xfc, 0xd9, 0x31, 0x80, 0x68, 0x4d, 0xa5, 0x7, 0xef, 0xca, 0x22, 0xf9, 0x11, 0x34, 0xdc, 0x7e, 0x96, 0xb3, 0x5b, 0xea, 0x2, 0x27, 0xcf, 0x6d, 0x85, 0xa0, 0x48, 0xdf, 0x37, 0x12, 0xfa, 0x58, 0xb0, 0x95, 0x7d, 0xcc, 0x24, 0x1, 0xe9, 0x4b, 0xa3, 0x86, 0x6e},
+ {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe, 0x36, 0xdf, 0xf9, 0x10, 0xb5, 0x5c, 0x7a, 0x93, 0x2d, 0xc4, 0xe2, 0xb, 0xae, 0x47, 0x61, 0x88, 0x6c, 0x85, 0xa3, 0x4a, 0xef, 0x6, 0x20, 0xc9, 0x77, 0x9e, 0xb8, 0x51, 0xf4, 0x1d, 0x3b, 0xd2, 0x5a, 0xb3, 0x95, 0x7c, 0xd9, 0x30, 0x16, 0xff, 0x41, 0xa8, 0x8e, 0x67, 0xc2, 0x2b, 0xd, 0xe4, 0xd8, 0x31, 0x17, 0xfe, 0x5b, 0xb2, 0x94, 0x7d, 0xc3, 0x2a, 0xc, 0xe5, 0x40, 0xa9, 0x8f, 0x66, 0xee, 0x7, 0x21, 0xc8, 0x6d, 0x84, 0xa2, 0x4b, 0xf5, 0x1c, 0x3a, 0xd3, 0x76, 0x9f, 0xb9, 0x50, 0xb4, 0x5d, 0x7b, 0x92, 0x37, 0xde, 0xf8, 0x11, 0xaf, 0x46, 0x60, 0x89, 0x2c, 0xc5, 0xe3, 0xa, 0x82, 0x6b, 0x4d, 0xa4, 0x1, 0xe8, 0xce, 0x27, 0x99, 0x70, 0x56, 0xbf, 0x1a, 0xf3, 0xd5, 0x3c, 0xad, 0x44, 0x62, 0x8b, 0x2e, 0xc7, 0xe1, 0x8, 0xb6, 0x5f, 0x79, 0x90, 0x35, 0xdc, 0xfa, 0x13, 0x9b, 0x72, 0x54, 0xbd, 0x18, 0xf1, 0xd7, 0x3e, 0x80, 0x69, 0x4f, 0xa6, 0x3, 0xea, 0xcc, 0x25, 0xc1, 0x28, 0xe, 0xe7, 0x42, 0xab, 0x8d, 0x64, 0xda, 0x33, 0x15, 0xfc, 0x59, 0xb0, 0x96, 0x7f, 0xf7, 0x1e, 0x38, 0xd1, 0x74, 0x9d, 0xbb, 0x52, 0xec, 0x5, 0x23, 0xca, 0x6f, 0x86, 0xa0, 0x49, 0x75, 0x9c, 0xba, 0x53, 0xf6, 0x1f, 0x39, 0xd0, 0x6e, 0x87, 0xa1, 0x48, 0xed, 0x4, 0x22, 0xcb, 0x43, 0xaa, 0x8c, 0x65, 0xc0, 0x29, 0xf, 0xe6, 0x58, 0xb1, 0x97, 0x7e, 0xdb, 0x32, 0x14, 0xfd, 0x19, 0xf0, 0xd6, 0x3f, 0x9a, 0x73, 0x55, 0xbc, 0x2, 0xeb, 0xcd, 0x24, 0x81, 0x68, 0x4e, 0xa7, 0x2f, 0xc6, 0xe0, 0x9, 0xac, 0x45, 0x63, 0x8a, 0x34, 0xdd, 0xfb, 0x12, 0xb7, 0x5e, 0x78, 0x91},
+ {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf, 0x6, 0xec, 0xcf, 0x25, 0x89, 0x63, 0x40, 0xaa, 0x5, 0xef, 0xcc, 0x26, 0x8a, 0x60, 0x43, 0xa9, 0xc, 0xe6, 0xc5, 0x2f, 0x83, 0x69, 0x4a, 0xa0, 0xf, 0xe5, 0xc6, 0x2c, 0x80, 0x6a, 0x49, 0xa3, 0xa, 0xe0, 0xc3, 0x29, 0x85, 0x6f, 0x4c, 0xa6, 0x9, 0xe3, 0xc0, 0x2a, 0x86, 0x6c, 0x4f, 0xa5, 0x18, 0xf2, 0xd1, 0x3b, 0x97, 0x7d, 0x5e, 0xb4, 0x1b, 0xf1, 0xd2, 0x38, 0x94, 0x7e, 0x5d, 0xb7, 0x1e, 0xf4, 0xd7, 0x3d, 0x91, 0x7b, 0x58, 0xb2, 0x1d, 0xf7, 0xd4, 0x3e, 0x92, 0x78, 0x5b, 0xb1, 0x14, 0xfe, 0xdd, 0x37, 0x9b, 0x71, 0x52, 0xb8, 0x17, 0xfd, 0xde, 0x34, 0x98, 0x72, 0x51, 0xbb, 0x12, 0xf8, 0xdb, 0x31, 0x9d, 0x77, 0x54, 0xbe, 0x11, 0xfb, 0xd8, 0x32, 0x9e, 0x74, 0x57, 0xbd, 0x30, 0xda, 0xf9, 0x13, 0xbf, 0x55, 0x76, 0x9c, 0x33, 0xd9, 0xfa, 0x10, 0xbc, 0x56, 0x75, 0x9f, 0x36, 0xdc, 0xff, 0x15, 0xb9, 0x53, 0x70, 0x9a, 0x35, 0xdf, 0xfc, 0x16, 0xba, 0x50, 0x73, 0x99, 0x3c, 0xd6, 0xf5, 0x1f, 0xb3, 0x59, 0x7a, 0x90, 0x3f, 0xd5, 0xf6, 0x1c, 0xb0, 0x5a, 0x79, 0x93, 0x3a, 0xd0, 0xf3, 0x19, 0xb5, 0x5f, 0x7c, 0x96, 0x39, 0xd3, 0xf0, 0x1a, 0xb6, 0x5c, 0x7f, 0x95, 0x28, 0xc2, 0xe1, 0xb, 0xa7, 0x4d, 0x6e, 0x84, 0x2b, 0xc1, 0xe2, 0x8, 0xa4, 0x4e, 0x6d, 0x87, 0x2e, 0xc4, 0xe7, 0xd, 0xa1, 0x4b, 0x68, 0x82, 0x2d, 0xc7, 0xe4, 0xe, 0xa2, 0x48, 0x6b, 0x81, 0x24, 0xce, 0xed, 0x7, 0xab, 0x41, 0x62, 0x88, 0x27, 0xcd, 0xee, 0x4, 0xa8, 0x42, 0x61, 0x8b, 0x22, 0xc8, 0xeb, 0x1, 0xad, 0x47, 0x64, 0x8e, 0x21, 0xcb, 0xe8, 0x2, 0xae, 0x44, 0x67, 0x8d},
+ {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0, 0x16, 0xfd, 0xdd, 0x36, 0x9d, 0x76, 0x56, 0xbd, 0x1d, 0xf6, 0xd6, 0x3d, 0x96, 0x7d, 0x5d, 0xb6, 0x2c, 0xc7, 0xe7, 0xc, 0xa7, 0x4c, 0x6c, 0x87, 0x27, 0xcc, 0xec, 0x7, 0xac, 0x47, 0x67, 0x8c, 0x3a, 0xd1, 0xf1, 0x1a, 0xb1, 0x5a, 0x7a, 0x91, 0x31, 0xda, 0xfa, 0x11, 0xba, 0x51, 0x71, 0x9a, 0x58, 0xb3, 0x93, 0x78, 0xd3, 0x38, 0x18, 0xf3, 0x53, 0xb8, 0x98, 0x73, 0xd8, 0x33, 0x13, 0xf8, 0x4e, 0xa5, 0x85, 0x6e, 0xc5, 0x2e, 0xe, 0xe5, 0x45, 0xae, 0x8e, 0x65, 0xce, 0x25, 0x5, 0xee, 0x74, 0x9f, 0xbf, 0x54, 0xff, 0x14, 0x34, 0xdf, 0x7f, 0x94, 0xb4, 0x5f, 0xf4, 0x1f, 0x3f, 0xd4, 0x62, 0x89, 0xa9, 0x42, 0xe9, 0x2, 0x22, 0xc9, 0x69, 0x82, 0xa2, 0x49, 0xe2, 0x9, 0x29, 0xc2, 0xb0, 0x5b, 0x7b, 0x90, 0x3b, 0xd0, 0xf0, 0x1b, 0xbb, 0x50, 0x70, 0x9b, 0x30, 0xdb, 0xfb, 0x10, 0xa6, 0x4d, 0x6d, 0x86, 0x2d, 0xc6, 0xe6, 0xd, 0xad, 0x46, 0x66, 0x8d, 0x26, 0xcd, 0xed, 0x6, 0x9c, 0x77, 0x57, 0xbc, 0x17, 0xfc, 0xdc, 0x37, 0x97, 0x7c, 0x5c, 0xb7, 0x1c, 0xf7, 0xd7, 0x3c, 0x8a, 0x61, 0x41, 0xaa, 0x1, 0xea, 0xca, 0x21, 0x81, 0x6a, 0x4a, 0xa1, 0xa, 0xe1, 0xc1, 0x2a, 0xe8, 0x3, 0x23, 0xc8, 0x63, 0x88, 0xa8, 0x43, 0xe3, 0x8, 0x28, 0xc3, 0x68, 0x83, 0xa3, 0x48, 0xfe, 0x15, 0x35, 0xde, 0x75, 0x9e, 0xbe, 0x55, 0xf5, 0x1e, 0x3e, 0xd5, 0x7e, 0x95, 0xb5, 0x5e, 0xc4, 0x2f, 0xf, 0xe4, 0x4f, 0xa4, 0x84, 0x6f, 0xcf, 0x24, 0x4, 0xef, 0x44, 0xaf, 0x8f, 0x64, 0xd2, 0x39, 0x19, 0xf2, 0x59, 0xb2, 0x92, 0x79, 0xd9, 0x32, 0x12, 0xf9, 0x52, 0xb9, 0x99, 0x72},
+ {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d, 0x66, 0x8a, 0xa3, 0x4f, 0xf1, 0x1d, 0x34, 0xd8, 0x55, 0xb9, 0x90, 0x7c, 0xc2, 0x2e, 0x7, 0xeb, 0xcc, 0x20, 0x9, 0xe5, 0x5b, 0xb7, 0x9e, 0x72, 0xff, 0x13, 0x3a, 0xd6, 0x68, 0x84, 0xad, 0x41, 0xaa, 0x46, 0x6f, 0x83, 0x3d, 0xd1, 0xf8, 0x14, 0x99, 0x75, 0x5c, 0xb0, 0xe, 0xe2, 0xcb, 0x27, 0x85, 0x69, 0x40, 0xac, 0x12, 0xfe, 0xd7, 0x3b, 0xb6, 0x5a, 0x73, 0x9f, 0x21, 0xcd, 0xe4, 0x8, 0xe3, 0xf, 0x26, 0xca, 0x74, 0x98, 0xb1, 0x5d, 0xd0, 0x3c, 0x15, 0xf9, 0x47, 0xab, 0x82, 0x6e, 0x49, 0xa5, 0x8c, 0x60, 0xde, 0x32, 0x1b, 0xf7, 0x7a, 0x96, 0xbf, 0x53, 0xed, 0x1, 0x28, 0xc4, 0x2f, 0xc3, 0xea, 0x6, 0xb8, 0x54, 0x7d, 0x91, 0x1c, 0xf0, 0xd9, 0x35, 0x8b, 0x67, 0x4e, 0xa2, 0x17, 0xfb, 0xd2, 0x3e, 0x80, 0x6c, 0x45, 0xa9, 0x24, 0xc8, 0xe1, 0xd, 0xb3, 0x5f, 0x76, 0x9a, 0x71, 0x9d, 0xb4, 0x58, 0xe6, 0xa, 0x23, 0xcf, 0x42, 0xae, 0x87, 0x6b, 0xd5, 0x39, 0x10, 0xfc, 0xdb, 0x37, 0x1e, 0xf2, 0x4c, 0xa0, 0x89, 0x65, 0xe8, 0x4, 0x2d, 0xc1, 0x7f, 0x93, 0xba, 0x56, 0xbd, 0x51, 0x78, 0x94, 0x2a, 0xc6, 0xef, 0x3, 0x8e, 0x62, 0x4b, 0xa7, 0x19, 0xf5, 0xdc, 0x30, 0x92, 0x7e, 0x57, 0xbb, 0x5, 0xe9, 0xc0, 0x2c, 0xa1, 0x4d, 0x64, 0x88, 0x36, 0xda, 0xf3, 0x1f, 0xf4, 0x18, 0x31, 0xdd, 0x63, 0x8f, 0xa6, 0x4a, 0xc7, 0x2b, 0x2, 0xee, 0x50, 0xbc, 0x95, 0x79, 0x5e, 0xb2, 0x9b, 0x77, 0xc9, 0x25, 0xc, 0xe0, 0x6d, 0x81, 0xa8, 0x44, 0xfa, 0x16, 0x3f, 0xd3, 0x38, 0xd4, 0xfd, 0x11, 0xaf, 0x43, 0x6a, 0x86, 0xb, 0xe7, 0xce, 0x22, 0x9c, 0x70, 0x59, 0xb5},
+ {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82, 0x76, 0x9b, 0xb1, 0x5c, 0xe5, 0x8, 0x22, 0xcf, 0x4d, 0xa0, 0x8a, 0x67, 0xde, 0x33, 0x19, 0xf4, 0xec, 0x1, 0x2b, 0xc6, 0x7f, 0x92, 0xb8, 0x55, 0xd7, 0x3a, 0x10, 0xfd, 0x44, 0xa9, 0x83, 0x6e, 0x9a, 0x77, 0x5d, 0xb0, 0x9, 0xe4, 0xce, 0x23, 0xa1, 0x4c, 0x66, 0x8b, 0x32, 0xdf, 0xf5, 0x18, 0xc5, 0x28, 0x2, 0xef, 0x56, 0xbb, 0x91, 0x7c, 0xfe, 0x13, 0x39, 0xd4, 0x6d, 0x80, 0xaa, 0x47, 0xb3, 0x5e, 0x74, 0x99, 0x20, 0xcd, 0xe7, 0xa, 0x88, 0x65, 0x4f, 0xa2, 0x1b, 0xf6, 0xdc, 0x31, 0x29, 0xc4, 0xee, 0x3, 0xba, 0x57, 0x7d, 0x90, 0x12, 0xff, 0xd5, 0x38, 0x81, 0x6c, 0x46, 0xab, 0x5f, 0xb2, 0x98, 0x75, 0xcc, 0x21, 0xb, 0xe6, 0x64, 0x89, 0xa3, 0x4e, 0xf7, 0x1a, 0x30, 0xdd, 0x97, 0x7a, 0x50, 0xbd, 0x4, 0xe9, 0xc3, 0x2e, 0xac, 0x41, 0x6b, 0x86, 0x3f, 0xd2, 0xf8, 0x15, 0xe1, 0xc, 0x26, 0xcb, 0x72, 0x9f, 0xb5, 0x58, 0xda, 0x37, 0x1d, 0xf0, 0x49, 0xa4, 0x8e, 0x63, 0x7b, 0x96, 0xbc, 0x51, 0xe8, 0x5, 0x2f, 0xc2, 0x40, 0xad, 0x87, 0x6a, 0xd3, 0x3e, 0x14, 0xf9, 0xd, 0xe0, 0xca, 0x27, 0x9e, 0x73, 0x59, 0xb4, 0x36, 0xdb, 0xf1, 0x1c, 0xa5, 0x48, 0x62, 0x8f, 0x52, 0xbf, 0x95, 0x78, 0xc1, 0x2c, 0x6, 0xeb, 0x69, 0x84, 0xae, 0x43, 0xfa, 0x17, 0x3d, 0xd0, 0x24, 0xc9, 0xe3, 0xe, 0xb7, 0x5a, 0x70, 0x9d, 0x1f, 0xf2, 0xd8, 0x35, 0x8c, 0x61, 0x4b, 0xa6, 0xbe, 0x53, 0x79, 0x94, 0x2d, 0xc0, 0xea, 0x7, 0x85, 0x68, 0x42, 0xaf, 0x16, 0xfb, 0xd1, 0x3c, 0xc8, 0x25, 0xf, 0xe2, 0x5b, 0xb6, 0x9c, 0x71, 0xf3, 0x1e, 0x34, 0xd9, 0x60, 0x8d, 0xa7, 0x4a},
+ {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93, 0x46, 0xa8, 0x87, 0x69, 0xd9, 0x37, 0x18, 0xf6, 0x65, 0x8b, 0xa4, 0x4a, 0xfa, 0x14, 0x3b, 0xd5, 0x8c, 0x62, 0x4d, 0xa3, 0x13, 0xfd, 0xd2, 0x3c, 0xaf, 0x41, 0x6e, 0x80, 0x30, 0xde, 0xf1, 0x1f, 0xca, 0x24, 0xb, 0xe5, 0x55, 0xbb, 0x94, 0x7a, 0xe9, 0x7, 0x28, 0xc6, 0x76, 0x98, 0xb7, 0x59, 0x5, 0xeb, 0xc4, 0x2a, 0x9a, 0x74, 0x5b, 0xb5, 0x26, 0xc8, 0xe7, 0x9, 0xb9, 0x57, 0x78, 0x96, 0x43, 0xad, 0x82, 0x6c, 0xdc, 0x32, 0x1d, 0xf3, 0x60, 0x8e, 0xa1, 0x4f, 0xff, 0x11, 0x3e, 0xd0, 0x89, 0x67, 0x48, 0xa6, 0x16, 0xf8, 0xd7, 0x39, 0xaa, 0x44, 0x6b, 0x85, 0x35, 0xdb, 0xf4, 0x1a, 0xcf, 0x21, 0xe, 0xe0, 0x50, 0xbe, 0x91, 0x7f, 0xec, 0x2, 0x2d, 0xc3, 0x73, 0x9d, 0xb2, 0x5c, 0xa, 0xe4, 0xcb, 0x25, 0x95, 0x7b, 0x54, 0xba, 0x29, 0xc7, 0xe8, 0x6, 0xb6, 0x58, 0x77, 0x99, 0x4c, 0xa2, 0x8d, 0x63, 0xd3, 0x3d, 0x12, 0xfc, 0x6f, 0x81, 0xae, 0x40, 0xf0, 0x1e, 0x31, 0xdf, 0x86, 0x68, 0x47, 0xa9, 0x19, 0xf7, 0xd8, 0x36, 0xa5, 0x4b, 0x64, 0x8a, 0x3a, 0xd4, 0xfb, 0x15, 0xc0, 0x2e, 0x1, 0xef, 0x5f, 0xb1, 0x9e, 0x70, 0xe3, 0xd, 0x22, 0xcc, 0x7c, 0x92, 0xbd, 0x53, 0xf, 0xe1, 0xce, 0x20, 0x90, 0x7e, 0x51, 0xbf, 0x2c, 0xc2, 0xed, 0x3, 0xb3, 0x5d, 0x72, 0x9c, 0x49, 0xa7, 0x88, 0x66, 0xd6, 0x38, 0x17, 0xf9, 0x6a, 0x84, 0xab, 0x45, 0xf5, 0x1b, 0x34, 0xda, 0x83, 0x6d, 0x42, 0xac, 0x1c, 0xf2, 0xdd, 0x33, 0xa0, 0x4e, 0x61, 0x8f, 0x3f, 0xd1, 0xfe, 0x10, 0xc5, 0x2b, 0x4, 0xea, 0x5a, 0xb4, 0x9b, 0x75, 0xe6, 0x8, 0x27, 0xc9, 0x79, 0x97, 0xb8, 0x56},
+ {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c, 0x56, 0xb9, 0x95, 0x7a, 0xcd, 0x22, 0xe, 0xe1, 0x7d, 0x92, 0xbe, 0x51, 0xe6, 0x9, 0x25, 0xca, 0xac, 0x43, 0x6f, 0x80, 0x37, 0xd8, 0xf4, 0x1b, 0x87, 0x68, 0x44, 0xab, 0x1c, 0xf3, 0xdf, 0x30, 0xfa, 0x15, 0x39, 0xd6, 0x61, 0x8e, 0xa2, 0x4d, 0xd1, 0x3e, 0x12, 0xfd, 0x4a, 0xa5, 0x89, 0x66, 0x45, 0xaa, 0x86, 0x69, 0xde, 0x31, 0x1d, 0xf2, 0x6e, 0x81, 0xad, 0x42, 0xf5, 0x1a, 0x36, 0xd9, 0x13, 0xfc, 0xd0, 0x3f, 0x88, 0x67, 0x4b, 0xa4, 0x38, 0xd7, 0xfb, 0x14, 0xa3, 0x4c, 0x60, 0x8f, 0xe9, 0x6, 0x2a, 0xc5, 0x72, 0x9d, 0xb1, 0x5e, 0xc2, 0x2d, 0x1, 0xee, 0x59, 0xb6, 0x9a, 0x75, 0xbf, 0x50, 0x7c, 0x93, 0x24, 0xcb, 0xe7, 0x8, 0x94, 0x7b, 0x57, 0xb8, 0xf, 0xe0, 0xcc, 0x23, 0x8a, 0x65, 0x49, 0xa6, 0x11, 0xfe, 0xd2, 0x3d, 0xa1, 0x4e, 0x62, 0x8d, 0x3a, 0xd5, 0xf9, 0x16, 0xdc, 0x33, 0x1f, 0xf0, 0x47, 0xa8, 0x84, 0x6b, 0xf7, 0x18, 0x34, 0xdb, 0x6c, 0x83, 0xaf, 0x40, 0x26, 0xc9, 0xe5, 0xa, 0xbd, 0x52, 0x7e, 0x91, 0xd, 0xe2, 0xce, 0x21, 0x96, 0x79, 0x55, 0xba, 0x70, 0x9f, 0xb3, 0x5c, 0xeb, 0x4, 0x28, 0xc7, 0x5b, 0xb4, 0x98, 0x77, 0xc0, 0x2f, 0x3, 0xec, 0xcf, 0x20, 0xc, 0xe3, 0x54, 0xbb, 0x97, 0x78, 0xe4, 0xb, 0x27, 0xc8, 0x7f, 0x90, 0xbc, 0x53, 0x99, 0x76, 0x5a, 0xb5, 0x2, 0xed, 0xc1, 0x2e, 0xb2, 0x5d, 0x71, 0x9e, 0x29, 0xc6, 0xea, 0x5, 0x63, 0x8c, 0xa0, 0x4f, 0xf8, 0x17, 0x3b, 0xd4, 0x48, 0xa7, 0x8b, 0x64, 0xd3, 0x3c, 0x10, 0xff, 0x35, 0xda, 0xf6, 0x19, 0xae, 0x41, 0x6d, 0x82, 0x1e, 0xf1, 0xdd, 0x32, 0x85, 0x6a, 0x46, 0xa9},
+ {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39, 0xbb, 0x4b, 0x46, 0xb6, 0x5c, 0xac, 0xa1, 0x51, 0x68, 0x98, 0x95, 0x65, 0x8f, 0x7f, 0x72, 0x82, 0x6b, 0x9b, 0x96, 0x66, 0x8c, 0x7c, 0x71, 0x81, 0xb8, 0x48, 0x45, 0xb5, 0x5f, 0xaf, 0xa2, 0x52, 0xd0, 0x20, 0x2d, 0xdd, 0x37, 0xc7, 0xca, 0x3a, 0x3, 0xf3, 0xfe, 0xe, 0xe4, 0x14, 0x19, 0xe9, 0xd6, 0x26, 0x2b, 0xdb, 0x31, 0xc1, 0xcc, 0x3c, 0x5, 0xf5, 0xf8, 0x8, 0xe2, 0x12, 0x1f, 0xef, 0x6d, 0x9d, 0x90, 0x60, 0x8a, 0x7a, 0x77, 0x87, 0xbe, 0x4e, 0x43, 0xb3, 0x59, 0xa9, 0xa4, 0x54, 0xbd, 0x4d, 0x40, 0xb0, 0x5a, 0xaa, 0xa7, 0x57, 0x6e, 0x9e, 0x93, 0x63, 0x89, 0x79, 0x74, 0x84, 0x6, 0xf6, 0xfb, 0xb, 0xe1, 0x11, 0x1c, 0xec, 0xd5, 0x25, 0x28, 0xd8, 0x32, 0xc2, 0xcf, 0x3f, 0xb1, 0x41, 0x4c, 0xbc, 0x56, 0xa6, 0xab, 0x5b, 0x62, 0x92, 0x9f, 0x6f, 0x85, 0x75, 0x78, 0x88, 0xa, 0xfa, 0xf7, 0x7, 0xed, 0x1d, 0x10, 0xe0, 0xd9, 0x29, 0x24, 0xd4, 0x3e, 0xce, 0xc3, 0x33, 0xda, 0x2a, 0x27, 0xd7, 0x3d, 0xcd, 0xc0, 0x30, 0x9, 0xf9, 0xf4, 0x4, 0xee, 0x1e, 0x13, 0xe3, 0x61, 0x91, 0x9c, 0x6c, 0x86, 0x76, 0x7b, 0x8b, 0xb2, 0x42, 0x4f, 0xbf, 0x55, 0xa5, 0xa8, 0x58, 0x67, 0x97, 0x9a, 0x6a, 0x80, 0x70, 0x7d, 0x8d, 0xb4, 0x44, 0x49, 0xb9, 0x53, 0xa3, 0xae, 0x5e, 0xdc, 0x2c, 0x21, 0xd1, 0x3b, 0xcb, 0xc6, 0x36, 0xf, 0xff, 0xf2, 0x2, 0xe8, 0x18, 0x15, 0xe5, 0xc, 0xfc, 0xf1, 0x1, 0xeb, 0x1b, 0x16, 0xe6, 0xdf, 0x2f, 0x22, 0xd2, 0x38, 0xc8, 0xc5, 0x35, 0xb7, 0x47, 0x4a, 0xba, 0x50, 0xa0, 0xad, 0x5d, 0x64, 0x94, 0x99, 0x69, 0x83, 0x73, 0x7e, 0x8e},
+ {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36, 0xab, 0x5a, 0x54, 0xa5, 0x48, 0xb9, 0xb7, 0x46, 0x70, 0x81, 0x8f, 0x7e, 0x93, 0x62, 0x6c, 0x9d, 0x4b, 0xba, 0xb4, 0x45, 0xa8, 0x59, 0x57, 0xa6, 0x90, 0x61, 0x6f, 0x9e, 0x73, 0x82, 0x8c, 0x7d, 0xe0, 0x11, 0x1f, 0xee, 0x3, 0xf2, 0xfc, 0xd, 0x3b, 0xca, 0xc4, 0x35, 0xd8, 0x29, 0x27, 0xd6, 0x96, 0x67, 0x69, 0x98, 0x75, 0x84, 0x8a, 0x7b, 0x4d, 0xbc, 0xb2, 0x43, 0xae, 0x5f, 0x51, 0xa0, 0x3d, 0xcc, 0xc2, 0x33, 0xde, 0x2f, 0x21, 0xd0, 0xe6, 0x17, 0x19, 0xe8, 0x5, 0xf4, 0xfa, 0xb, 0xdd, 0x2c, 0x22, 0xd3, 0x3e, 0xcf, 0xc1, 0x30, 0x6, 0xf7, 0xf9, 0x8, 0xe5, 0x14, 0x1a, 0xeb, 0x76, 0x87, 0x89, 0x78, 0x95, 0x64, 0x6a, 0x9b, 0xad, 0x5c, 0x52, 0xa3, 0x4e, 0xbf, 0xb1, 0x40, 0x31, 0xc0, 0xce, 0x3f, 0xd2, 0x23, 0x2d, 0xdc, 0xea, 0x1b, 0x15, 0xe4, 0x9, 0xf8, 0xf6, 0x7, 0x9a, 0x6b, 0x65, 0x94, 0x79, 0x88, 0x86, 0x77, 0x41, 0xb0, 0xbe, 0x4f, 0xa2, 0x53, 0x5d, 0xac, 0x7a, 0x8b, 0x85, 0x74, 0x99, 0x68, 0x66, 0x97, 0xa1, 0x50, 0x5e, 0xaf, 0x42, 0xb3, 0xbd, 0x4c, 0xd1, 0x20, 0x2e, 0xdf, 0x32, 0xc3, 0xcd, 0x3c, 0xa, 0xfb, 0xf5, 0x4, 0xe9, 0x18, 0x16, 0xe7, 0xa7, 0x56, 0x58, 0xa9, 0x44, 0xb5, 0xbb, 0x4a, 0x7c, 0x8d, 0x83, 0x72, 0x9f, 0x6e, 0x60, 0x91, 0xc, 0xfd, 0xf3, 0x2, 0xef, 0x1e, 0x10, 0xe1, 0xd7, 0x26, 0x28, 0xd9, 0x34, 0xc5, 0xcb, 0x3a, 0xec, 0x1d, 0x13, 0xe2, 0xf, 0xfe, 0xf0, 0x1, 0x37, 0xc6, 0xc8, 0x39, 0xd4, 0x25, 0x2b, 0xda, 0x47, 0xb6, 0xb8, 0x49, 0xa4, 0x55, 0x5b, 0xaa, 0x9c, 0x6d, 0x63, 0x92, 0x7f, 0x8e, 0x80, 0x71},
+ {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27, 0x9b, 0x69, 0x62, 0x90, 0x74, 0x86, 0x8d, 0x7f, 0x58, 0xaa, 0xa1, 0x53, 0xb7, 0x45, 0x4e, 0xbc, 0x2b, 0xd9, 0xd2, 0x20, 0xc4, 0x36, 0x3d, 0xcf, 0xe8, 0x1a, 0x11, 0xe3, 0x7, 0xf5, 0xfe, 0xc, 0xb0, 0x42, 0x49, 0xbb, 0x5f, 0xad, 0xa6, 0x54, 0x73, 0x81, 0x8a, 0x78, 0x9c, 0x6e, 0x65, 0x97, 0x56, 0xa4, 0xaf, 0x5d, 0xb9, 0x4b, 0x40, 0xb2, 0x95, 0x67, 0x6c, 0x9e, 0x7a, 0x88, 0x83, 0x71, 0xcd, 0x3f, 0x34, 0xc6, 0x22, 0xd0, 0xdb, 0x29, 0xe, 0xfc, 0xf7, 0x5, 0xe1, 0x13, 0x18, 0xea, 0x7d, 0x8f, 0x84, 0x76, 0x92, 0x60, 0x6b, 0x99, 0xbe, 0x4c, 0x47, 0xb5, 0x51, 0xa3, 0xa8, 0x5a, 0xe6, 0x14, 0x1f, 0xed, 0x9, 0xfb, 0xf0, 0x2, 0x25, 0xd7, 0xdc, 0x2e, 0xca, 0x38, 0x33, 0xc1, 0xac, 0x5e, 0x55, 0xa7, 0x43, 0xb1, 0xba, 0x48, 0x6f, 0x9d, 0x96, 0x64, 0x80, 0x72, 0x79, 0x8b, 0x37, 0xc5, 0xce, 0x3c, 0xd8, 0x2a, 0x21, 0xd3, 0xf4, 0x6, 0xd, 0xff, 0x1b, 0xe9, 0xe2, 0x10, 0x87, 0x75, 0x7e, 0x8c, 0x68, 0x9a, 0x91, 0x63, 0x44, 0xb6, 0xbd, 0x4f, 0xab, 0x59, 0x52, 0xa0, 0x1c, 0xee, 0xe5, 0x17, 0xf3, 0x1, 0xa, 0xf8, 0xdf, 0x2d, 0x26, 0xd4, 0x30, 0xc2, 0xc9, 0x3b, 0xfa, 0x8, 0x3, 0xf1, 0x15, 0xe7, 0xec, 0x1e, 0x39, 0xcb, 0xc0, 0x32, 0xd6, 0x24, 0x2f, 0xdd, 0x61, 0x93, 0x98, 0x6a, 0x8e, 0x7c, 0x77, 0x85, 0xa2, 0x50, 0x5b, 0xa9, 0x4d, 0xbf, 0xb4, 0x46, 0xd1, 0x23, 0x28, 0xda, 0x3e, 0xcc, 0xc7, 0x35, 0x12, 0xe0, 0xeb, 0x19, 0xfd, 0xf, 0x4, 0xf6, 0x4a, 0xb8, 0xb3, 0x41, 0xa5, 0x57, 0x5c, 0xae, 0x89, 0x7b, 0x70, 0x82, 0x66, 0x94, 0x9f, 0x6d},
+ {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28, 0x8b, 0x78, 0x70, 0x83, 0x60, 0x93, 0x9b, 0x68, 0x40, 0xb3, 0xbb, 0x48, 0xab, 0x58, 0x50, 0xa3, 0xb, 0xf8, 0xf0, 0x3, 0xe0, 0x13, 0x1b, 0xe8, 0xc0, 0x33, 0x3b, 0xc8, 0x2b, 0xd8, 0xd0, 0x23, 0x80, 0x73, 0x7b, 0x88, 0x6b, 0x98, 0x90, 0x63, 0x4b, 0xb8, 0xb0, 0x43, 0xa0, 0x53, 0x5b, 0xa8, 0x16, 0xe5, 0xed, 0x1e, 0xfd, 0xe, 0x6, 0xf5, 0xdd, 0x2e, 0x26, 0xd5, 0x36, 0xc5, 0xcd, 0x3e, 0x9d, 0x6e, 0x66, 0x95, 0x76, 0x85, 0x8d, 0x7e, 0x56, 0xa5, 0xad, 0x5e, 0xbd, 0x4e, 0x46, 0xb5, 0x1d, 0xee, 0xe6, 0x15, 0xf6, 0x5, 0xd, 0xfe, 0xd6, 0x25, 0x2d, 0xde, 0x3d, 0xce, 0xc6, 0x35, 0x96, 0x65, 0x6d, 0x9e, 0x7d, 0x8e, 0x86, 0x75, 0x5d, 0xae, 0xa6, 0x55, 0xb6, 0x45, 0x4d, 0xbe, 0x2c, 0xdf, 0xd7, 0x24, 0xc7, 0x34, 0x3c, 0xcf, 0xe7, 0x14, 0x1c, 0xef, 0xc, 0xff, 0xf7, 0x4, 0xa7, 0x54, 0x5c, 0xaf, 0x4c, 0xbf, 0xb7, 0x44, 0x6c, 0x9f, 0x97, 0x64, 0x87, 0x74, 0x7c, 0x8f, 0x27, 0xd4, 0xdc, 0x2f, 0xcc, 0x3f, 0x37, 0xc4, 0xec, 0x1f, 0x17, 0xe4, 0x7, 0xf4, 0xfc, 0xf, 0xac, 0x5f, 0x57, 0xa4, 0x47, 0xb4, 0xbc, 0x4f, 0x67, 0x94, 0x9c, 0x6f, 0x8c, 0x7f, 0x77, 0x84, 0x3a, 0xc9, 0xc1, 0x32, 0xd1, 0x22, 0x2a, 0xd9, 0xf1, 0x2, 0xa, 0xf9, 0x1a, 0xe9, 0xe1, 0x12, 0xb1, 0x42, 0x4a, 0xb9, 0x5a, 0xa9, 0xa1, 0x52, 0x7a, 0x89, 0x81, 0x72, 0x91, 0x62, 0x6a, 0x99, 0x31, 0xc2, 0xca, 0x39, 0xda, 0x29, 0x21, 0xd2, 0xfa, 0x9, 0x1, 0xf2, 0x11, 0xe2, 0xea, 0x19, 0xba, 0x49, 0x41, 0xb2, 0x51, 0xa2, 0xaa, 0x59, 0x71, 0x82, 0x8a, 0x79, 0x9a, 0x69, 0x61, 0x92},
+ {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5, 0xfb, 0xf, 0xe, 0xfa, 0xc, 0xf8, 0xf9, 0xd, 0x8, 0xfc, 0xfd, 0x9, 0xff, 0xb, 0xa, 0xfe, 0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee, 0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15, 0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce, 0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35, 0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25, 0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde, 0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e, 0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75, 0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65, 0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e, 0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45, 0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe, 0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae, 0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55},
+ {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa, 0xeb, 0x1e, 0x1c, 0xe9, 0x18, 0xed, 0xef, 0x1a, 0x10, 0xe5, 0xe7, 0x12, 0xe3, 0x16, 0x14, 0xe1, 0xcb, 0x3e, 0x3c, 0xc9, 0x38, 0xcd, 0xcf, 0x3a, 0x30, 0xc5, 0xc7, 0x32, 0xc3, 0x36, 0x34, 0xc1, 0x20, 0xd5, 0xd7, 0x22, 0xd3, 0x26, 0x24, 0xd1, 0xdb, 0x2e, 0x2c, 0xd9, 0x28, 0xdd, 0xdf, 0x2a, 0x8b, 0x7e, 0x7c, 0x89, 0x78, 0x8d, 0x8f, 0x7a, 0x70, 0x85, 0x87, 0x72, 0x83, 0x76, 0x74, 0x81, 0x60, 0x95, 0x97, 0x62, 0x93, 0x66, 0x64, 0x91, 0x9b, 0x6e, 0x6c, 0x99, 0x68, 0x9d, 0x9f, 0x6a, 0x40, 0xb5, 0xb7, 0x42, 0xb3, 0x46, 0x44, 0xb1, 0xbb, 0x4e, 0x4c, 0xb9, 0x48, 0xbd, 0xbf, 0x4a, 0xab, 0x5e, 0x5c, 0xa9, 0x58, 0xad, 0xaf, 0x5a, 0x50, 0xa5, 0xa7, 0x52, 0xa3, 0x56, 0x54, 0xa1, 0xb, 0xfe, 0xfc, 0x9, 0xf8, 0xd, 0xf, 0xfa, 0xf0, 0x5, 0x7, 0xf2, 0x3, 0xf6, 0xf4, 0x1, 0xe0, 0x15, 0x17, 0xe2, 0x13, 0xe6, 0xe4, 0x11, 0x1b, 0xee, 0xec, 0x19, 0xe8, 0x1d, 0x1f, 0xea, 0xc0, 0x35, 0x37, 0xc2, 0x33, 0xc6, 0xc4, 0x31, 0x3b, 0xce, 0xcc, 0x39, 0xc8, 0x3d, 0x3f, 0xca, 0x2b, 0xde, 0xdc, 0x29, 0xd8, 0x2d, 0x2f, 0xda, 0xd0, 0x25, 0x27, 0xd2, 0x23, 0xd6, 0xd4, 0x21, 0x80, 0x75, 0x77, 0x82, 0x73, 0x86, 0x84, 0x71, 0x7b, 0x8e, 0x8c, 0x79, 0x88, 0x7d, 0x7f, 0x8a, 0x6b, 0x9e, 0x9c, 0x69, 0x98, 0x6d, 0x6f, 0x9a, 0x90, 0x65, 0x67, 0x92, 0x63, 0x96, 0x94, 0x61, 0x4b, 0xbe, 0xbc, 0x49, 0xb8, 0x4d, 0x4f, 0xba, 0xb0, 0x45, 0x47, 0xb2, 0x43, 0xb6, 0xb4, 0x41, 0xa0, 0x55, 0x57, 0xa2, 0x53, 0xa6, 0xa4, 0x51, 0x5b, 0xae, 0xac, 0x59, 0xa8, 0x5d, 0x5f, 0xaa},
+ {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b, 0xdb, 0x2d, 0x2a, 0xdc, 0x24, 0xd2, 0xd5, 0x23, 0x38, 0xce, 0xc9, 0x3f, 0xc7, 0x31, 0x36, 0xc0, 0xab, 0x5d, 0x5a, 0xac, 0x54, 0xa2, 0xa5, 0x53, 0x48, 0xbe, 0xb9, 0x4f, 0xb7, 0x41, 0x46, 0xb0, 0x70, 0x86, 0x81, 0x77, 0x8f, 0x79, 0x7e, 0x88, 0x93, 0x65, 0x62, 0x94, 0x6c, 0x9a, 0x9d, 0x6b, 0x4b, 0xbd, 0xba, 0x4c, 0xb4, 0x42, 0x45, 0xb3, 0xa8, 0x5e, 0x59, 0xaf, 0x57, 0xa1, 0xa6, 0x50, 0x90, 0x66, 0x61, 0x97, 0x6f, 0x99, 0x9e, 0x68, 0x73, 0x85, 0x82, 0x74, 0x8c, 0x7a, 0x7d, 0x8b, 0xe0, 0x16, 0x11, 0xe7, 0x1f, 0xe9, 0xee, 0x18, 0x3, 0xf5, 0xf2, 0x4, 0xfc, 0xa, 0xd, 0xfb, 0x3b, 0xcd, 0xca, 0x3c, 0xc4, 0x32, 0x35, 0xc3, 0xd8, 0x2e, 0x29, 0xdf, 0x27, 0xd1, 0xd6, 0x20, 0x96, 0x60, 0x67, 0x91, 0x69, 0x9f, 0x98, 0x6e, 0x75, 0x83, 0x84, 0x72, 0x8a, 0x7c, 0x7b, 0x8d, 0x4d, 0xbb, 0xbc, 0x4a, 0xb2, 0x44, 0x43, 0xb5, 0xae, 0x58, 0x5f, 0xa9, 0x51, 0xa7, 0xa0, 0x56, 0x3d, 0xcb, 0xcc, 0x3a, 0xc2, 0x34, 0x33, 0xc5, 0xde, 0x28, 0x2f, 0xd9, 0x21, 0xd7, 0xd0, 0x26, 0xe6, 0x10, 0x17, 0xe1, 0x19, 0xef, 0xe8, 0x1e, 0x5, 0xf3, 0xf4, 0x2, 0xfa, 0xc, 0xb, 0xfd, 0xdd, 0x2b, 0x2c, 0xda, 0x22, 0xd4, 0xd3, 0x25, 0x3e, 0xc8, 0xcf, 0x39, 0xc1, 0x37, 0x30, 0xc6, 0x6, 0xf0, 0xf7, 0x1, 0xf9, 0xf, 0x8, 0xfe, 0xe5, 0x13, 0x14, 0xe2, 0x1a, 0xec, 0xeb, 0x1d, 0x76, 0x80, 0x87, 0x71, 0x89, 0x7f, 0x78, 0x8e, 0x95, 0x63, 0x64, 0x92, 0x6a, 0x9c, 0x9b, 0x6d, 0xad, 0x5b, 0x5c, 0xaa, 0x52, 0xa4, 0xa3, 0x55, 0x4e, 0xb8, 0xbf, 0x49, 0xb1, 0x47, 0x40, 0xb6},
+ {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14, 0xcb, 0x3c, 0x38, 0xcf, 0x30, 0xc7, 0xc3, 0x34, 0x20, 0xd7, 0xd3, 0x24, 0xdb, 0x2c, 0x28, 0xdf, 0x8b, 0x7c, 0x78, 0x8f, 0x70, 0x87, 0x83, 0x74, 0x60, 0x97, 0x93, 0x64, 0x9b, 0x6c, 0x68, 0x9f, 0x40, 0xb7, 0xb3, 0x44, 0xbb, 0x4c, 0x48, 0xbf, 0xab, 0x5c, 0x58, 0xaf, 0x50, 0xa7, 0xa3, 0x54, 0xb, 0xfc, 0xf8, 0xf, 0xf0, 0x7, 0x3, 0xf4, 0xe0, 0x17, 0x13, 0xe4, 0x1b, 0xec, 0xe8, 0x1f, 0xc0, 0x37, 0x33, 0xc4, 0x3b, 0xcc, 0xc8, 0x3f, 0x2b, 0xdc, 0xd8, 0x2f, 0xd0, 0x27, 0x23, 0xd4, 0x80, 0x77, 0x73, 0x84, 0x7b, 0x8c, 0x88, 0x7f, 0x6b, 0x9c, 0x98, 0x6f, 0x90, 0x67, 0x63, 0x94, 0x4b, 0xbc, 0xb8, 0x4f, 0xb0, 0x47, 0x43, 0xb4, 0xa0, 0x57, 0x53, 0xa4, 0x5b, 0xac, 0xa8, 0x5f, 0x16, 0xe1, 0xe5, 0x12, 0xed, 0x1a, 0x1e, 0xe9, 0xfd, 0xa, 0xe, 0xf9, 0x6, 0xf1, 0xf5, 0x2, 0xdd, 0x2a, 0x2e, 0xd9, 0x26, 0xd1, 0xd5, 0x22, 0x36, 0xc1, 0xc5, 0x32, 0xcd, 0x3a, 0x3e, 0xc9, 0x9d, 0x6a, 0x6e, 0x99, 0x66, 0x91, 0x95, 0x62, 0x76, 0x81, 0x85, 0x72, 0x8d, 0x7a, 0x7e, 0x89, 0x56, 0xa1, 0xa5, 0x52, 0xad, 0x5a, 0x5e, 0xa9, 0xbd, 0x4a, 0x4e, 0xb9, 0x46, 0xb1, 0xb5, 0x42, 0x1d, 0xea, 0xee, 0x19, 0xe6, 0x11, 0x15, 0xe2, 0xf6, 0x1, 0x5, 0xf2, 0xd, 0xfa, 0xfe, 0x9, 0xd6, 0x21, 0x25, 0xd2, 0x2d, 0xda, 0xde, 0x29, 0x3d, 0xca, 0xce, 0x39, 0xc6, 0x31, 0x35, 0xc2, 0x96, 0x61, 0x65, 0x92, 0x6d, 0x9a, 0x9e, 0x69, 0x7d, 0x8a, 0x8e, 0x79, 0x86, 0x71, 0x75, 0x82, 0x5d, 0xaa, 0xae, 0x59, 0xa6, 0x51, 0x55, 0xa2, 0xb6, 0x41, 0x45, 0xb2, 0x4d, 0xba, 0xbe, 0x49},
+ {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41, 0x3b, 0xc3, 0xd6, 0x2e, 0xfc, 0x4, 0x11, 0xe9, 0xa8, 0x50, 0x45, 0xbd, 0x6f, 0x97, 0x82, 0x7a, 0x76, 0x8e, 0x9b, 0x63, 0xb1, 0x49, 0x5c, 0xa4, 0xe5, 0x1d, 0x8, 0xf0, 0x22, 0xda, 0xcf, 0x37, 0x4d, 0xb5, 0xa0, 0x58, 0x8a, 0x72, 0x67, 0x9f, 0xde, 0x26, 0x33, 0xcb, 0x19, 0xe1, 0xf4, 0xc, 0xec, 0x14, 0x1, 0xf9, 0x2b, 0xd3, 0xc6, 0x3e, 0x7f, 0x87, 0x92, 0x6a, 0xb8, 0x40, 0x55, 0xad, 0xd7, 0x2f, 0x3a, 0xc2, 0x10, 0xe8, 0xfd, 0x5, 0x44, 0xbc, 0xa9, 0x51, 0x83, 0x7b, 0x6e, 0x96, 0x9a, 0x62, 0x77, 0x8f, 0x5d, 0xa5, 0xb0, 0x48, 0x9, 0xf1, 0xe4, 0x1c, 0xce, 0x36, 0x23, 0xdb, 0xa1, 0x59, 0x4c, 0xb4, 0x66, 0x9e, 0x8b, 0x73, 0x32, 0xca, 0xdf, 0x27, 0xf5, 0xd, 0x18, 0xe0, 0xc5, 0x3d, 0x28, 0xd0, 0x2, 0xfa, 0xef, 0x17, 0x56, 0xae, 0xbb, 0x43, 0x91, 0x69, 0x7c, 0x84, 0xfe, 0x6, 0x13, 0xeb, 0x39, 0xc1, 0xd4, 0x2c, 0x6d, 0x95, 0x80, 0x78, 0xaa, 0x52, 0x47, 0xbf, 0xb3, 0x4b, 0x5e, 0xa6, 0x74, 0x8c, 0x99, 0x61, 0x20, 0xd8, 0xcd, 0x35, 0xe7, 0x1f, 0xa, 0xf2, 0x88, 0x70, 0x65, 0x9d, 0x4f, 0xb7, 0xa2, 0x5a, 0x1b, 0xe3, 0xf6, 0xe, 0xdc, 0x24, 0x31, 0xc9, 0x29, 0xd1, 0xc4, 0x3c, 0xee, 0x16, 0x3, 0xfb, 0xba, 0x42, 0x57, 0xaf, 0x7d, 0x85, 0x90, 0x68, 0x12, 0xea, 0xff, 0x7, 0xd5, 0x2d, 0x38, 0xc0, 0x81, 0x79, 0x6c, 0x94, 0x46, 0xbe, 0xab, 0x53, 0x5f, 0xa7, 0xb2, 0x4a, 0x98, 0x60, 0x75, 0x8d, 0xcc, 0x34, 0x21, 0xd9, 0xb, 0xf3, 0xe6, 0x1e, 0x64, 0x9c, 0x89, 0x71, 0xa3, 0x5b, 0x4e, 0xb6, 0xf7, 0xf, 0x1a, 0xe2, 0x30, 0xc8, 0xdd, 0x25},
+ {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e, 0x2b, 0xd2, 0xc4, 0x3d, 0xe8, 0x11, 0x7, 0xfe, 0xb0, 0x49, 0x5f, 0xa6, 0x73, 0x8a, 0x9c, 0x65, 0x56, 0xaf, 0xb9, 0x40, 0x95, 0x6c, 0x7a, 0x83, 0xcd, 0x34, 0x22, 0xdb, 0xe, 0xf7, 0xe1, 0x18, 0x7d, 0x84, 0x92, 0x6b, 0xbe, 0x47, 0x51, 0xa8, 0xe6, 0x1f, 0x9, 0xf0, 0x25, 0xdc, 0xca, 0x33, 0xac, 0x55, 0x43, 0xba, 0x6f, 0x96, 0x80, 0x79, 0x37, 0xce, 0xd8, 0x21, 0xf4, 0xd, 0x1b, 0xe2, 0x87, 0x7e, 0x68, 0x91, 0x44, 0xbd, 0xab, 0x52, 0x1c, 0xe5, 0xf3, 0xa, 0xdf, 0x26, 0x30, 0xc9, 0xfa, 0x3, 0x15, 0xec, 0x39, 0xc0, 0xd6, 0x2f, 0x61, 0x98, 0x8e, 0x77, 0xa2, 0x5b, 0x4d, 0xb4, 0xd1, 0x28, 0x3e, 0xc7, 0x12, 0xeb, 0xfd, 0x4, 0x4a, 0xb3, 0xa5, 0x5c, 0x89, 0x70, 0x66, 0x9f, 0x45, 0xbc, 0xaa, 0x53, 0x86, 0x7f, 0x69, 0x90, 0xde, 0x27, 0x31, 0xc8, 0x1d, 0xe4, 0xf2, 0xb, 0x6e, 0x97, 0x81, 0x78, 0xad, 0x54, 0x42, 0xbb, 0xf5, 0xc, 0x1a, 0xe3, 0x36, 0xcf, 0xd9, 0x20, 0x13, 0xea, 0xfc, 0x5, 0xd0, 0x29, 0x3f, 0xc6, 0x88, 0x71, 0x67, 0x9e, 0x4b, 0xb2, 0xa4, 0x5d, 0x38, 0xc1, 0xd7, 0x2e, 0xfb, 0x2, 0x14, 0xed, 0xa3, 0x5a, 0x4c, 0xb5, 0x60, 0x99, 0x8f, 0x76, 0xe9, 0x10, 0x6, 0xff, 0x2a, 0xd3, 0xc5, 0x3c, 0x72, 0x8b, 0x9d, 0x64, 0xb1, 0x48, 0x5e, 0xa7, 0xc2, 0x3b, 0x2d, 0xd4, 0x1, 0xf8, 0xee, 0x17, 0x59, 0xa0, 0xb6, 0x4f, 0x9a, 0x63, 0x75, 0x8c, 0xbf, 0x46, 0x50, 0xa9, 0x7c, 0x85, 0x93, 0x6a, 0x24, 0xdd, 0xcb, 0x32, 0xe7, 0x1e, 0x8, 0xf1, 0x94, 0x6d, 0x7b, 0x82, 0x57, 0xae, 0xb8, 0x41, 0xf, 0xf6, 0xe0, 0x19, 0xcc, 0x35, 0x23, 0xda},
+ {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f, 0x1b, 0xe1, 0xf2, 0x8, 0xd4, 0x2e, 0x3d, 0xc7, 0x98, 0x62, 0x71, 0x8b, 0x57, 0xad, 0xbe, 0x44, 0x36, 0xcc, 0xdf, 0x25, 0xf9, 0x3, 0x10, 0xea, 0xb5, 0x4f, 0x5c, 0xa6, 0x7a, 0x80, 0x93, 0x69, 0x2d, 0xd7, 0xc4, 0x3e, 0xe2, 0x18, 0xb, 0xf1, 0xae, 0x54, 0x47, 0xbd, 0x61, 0x9b, 0x88, 0x72, 0x6c, 0x96, 0x85, 0x7f, 0xa3, 0x59, 0x4a, 0xb0, 0xef, 0x15, 0x6, 0xfc, 0x20, 0xda, 0xc9, 0x33, 0x77, 0x8d, 0x9e, 0x64, 0xb8, 0x42, 0x51, 0xab, 0xf4, 0xe, 0x1d, 0xe7, 0x3b, 0xc1, 0xd2, 0x28, 0x5a, 0xa0, 0xb3, 0x49, 0x95, 0x6f, 0x7c, 0x86, 0xd9, 0x23, 0x30, 0xca, 0x16, 0xec, 0xff, 0x5, 0x41, 0xbb, 0xa8, 0x52, 0x8e, 0x74, 0x67, 0x9d, 0xc2, 0x38, 0x2b, 0xd1, 0xd, 0xf7, 0xe4, 0x1e, 0xd8, 0x22, 0x31, 0xcb, 0x17, 0xed, 0xfe, 0x4, 0x5b, 0xa1, 0xb2, 0x48, 0x94, 0x6e, 0x7d, 0x87, 0xc3, 0x39, 0x2a, 0xd0, 0xc, 0xf6, 0xe5, 0x1f, 0x40, 0xba, 0xa9, 0x53, 0x8f, 0x75, 0x66, 0x9c, 0xee, 0x14, 0x7, 0xfd, 0x21, 0xdb, 0xc8, 0x32, 0x6d, 0x97, 0x84, 0x7e, 0xa2, 0x58, 0x4b, 0xb1, 0xf5, 0xf, 0x1c, 0xe6, 0x3a, 0xc0, 0xd3, 0x29, 0x76, 0x8c, 0x9f, 0x65, 0xb9, 0x43, 0x50, 0xaa, 0xb4, 0x4e, 0x5d, 0xa7, 0x7b, 0x81, 0x92, 0x68, 0x37, 0xcd, 0xde, 0x24, 0xf8, 0x2, 0x11, 0xeb, 0xaf, 0x55, 0x46, 0xbc, 0x60, 0x9a, 0x89, 0x73, 0x2c, 0xd6, 0xc5, 0x3f, 0xe3, 0x19, 0xa, 0xf0, 0x82, 0x78, 0x6b, 0x91, 0x4d, 0xb7, 0xa4, 0x5e, 0x1, 0xfb, 0xe8, 0x12, 0xce, 0x34, 0x27, 0xdd, 0x99, 0x63, 0x70, 0x8a, 0x56, 0xac, 0xbf, 0x45, 0x1a, 0xe0, 0xf3, 0x9, 0xd5, 0x2f, 0x3c, 0xc6},
+ {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50, 0xb, 0xf0, 0xe0, 0x1b, 0xc0, 0x3b, 0x2b, 0xd0, 0x80, 0x7b, 0x6b, 0x90, 0x4b, 0xb0, 0xa0, 0x5b, 0x16, 0xed, 0xfd, 0x6, 0xdd, 0x26, 0x36, 0xcd, 0x9d, 0x66, 0x76, 0x8d, 0x56, 0xad, 0xbd, 0x46, 0x1d, 0xe6, 0xf6, 0xd, 0xd6, 0x2d, 0x3d, 0xc6, 0x96, 0x6d, 0x7d, 0x86, 0x5d, 0xa6, 0xb6, 0x4d, 0x2c, 0xd7, 0xc7, 0x3c, 0xe7, 0x1c, 0xc, 0xf7, 0xa7, 0x5c, 0x4c, 0xb7, 0x6c, 0x97, 0x87, 0x7c, 0x27, 0xdc, 0xcc, 0x37, 0xec, 0x17, 0x7, 0xfc, 0xac, 0x57, 0x47, 0xbc, 0x67, 0x9c, 0x8c, 0x77, 0x3a, 0xc1, 0xd1, 0x2a, 0xf1, 0xa, 0x1a, 0xe1, 0xb1, 0x4a, 0x5a, 0xa1, 0x7a, 0x81, 0x91, 0x6a, 0x31, 0xca, 0xda, 0x21, 0xfa, 0x1, 0x11, 0xea, 0xba, 0x41, 0x51, 0xaa, 0x71, 0x8a, 0x9a, 0x61, 0x58, 0xa3, 0xb3, 0x48, 0x93, 0x68, 0x78, 0x83, 0xd3, 0x28, 0x38, 0xc3, 0x18, 0xe3, 0xf3, 0x8, 0x53, 0xa8, 0xb8, 0x43, 0x98, 0x63, 0x73, 0x88, 0xd8, 0x23, 0x33, 0xc8, 0x13, 0xe8, 0xf8, 0x3, 0x4e, 0xb5, 0xa5, 0x5e, 0x85, 0x7e, 0x6e, 0x95, 0xc5, 0x3e, 0x2e, 0xd5, 0xe, 0xf5, 0xe5, 0x1e, 0x45, 0xbe, 0xae, 0x55, 0x8e, 0x75, 0x65, 0x9e, 0xce, 0x35, 0x25, 0xde, 0x5, 0xfe, 0xee, 0x15, 0x74, 0x8f, 0x9f, 0x64, 0xbf, 0x44, 0x54, 0xaf, 0xff, 0x4, 0x14, 0xef, 0x34, 0xcf, 0xdf, 0x24, 0x7f, 0x84, 0x94, 0x6f, 0xb4, 0x4f, 0x5f, 0xa4, 0xf4, 0xf, 0x1f, 0xe4, 0x3f, 0xc4, 0xd4, 0x2f, 0x62, 0x99, 0x89, 0x72, 0xa9, 0x52, 0x42, 0xb9, 0xe9, 0x12, 0x2, 0xf9, 0x22, 0xd9, 0xc9, 0x32, 0x69, 0x92, 0x82, 0x79, 0xa2, 0x59, 0x49, 0xb2, 0xe2, 0x19, 0x9, 0xf2, 0x29, 0xd2, 0xc2, 0x39},
+ {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d, 0x7b, 0x87, 0x9e, 0x62, 0xac, 0x50, 0x49, 0xb5, 0xc8, 0x34, 0x2d, 0xd1, 0x1f, 0xe3, 0xfa, 0x6, 0xf6, 0xa, 0x13, 0xef, 0x21, 0xdd, 0xc4, 0x38, 0x45, 0xb9, 0xa0, 0x5c, 0x92, 0x6e, 0x77, 0x8b, 0x8d, 0x71, 0x68, 0x94, 0x5a, 0xa6, 0xbf, 0x43, 0x3e, 0xc2, 0xdb, 0x27, 0xe9, 0x15, 0xc, 0xf0, 0xf1, 0xd, 0x14, 0xe8, 0x26, 0xda, 0xc3, 0x3f, 0x42, 0xbe, 0xa7, 0x5b, 0x95, 0x69, 0x70, 0x8c, 0x8a, 0x76, 0x6f, 0x93, 0x5d, 0xa1, 0xb8, 0x44, 0x39, 0xc5, 0xdc, 0x20, 0xee, 0x12, 0xb, 0xf7, 0x7, 0xfb, 0xe2, 0x1e, 0xd0, 0x2c, 0x35, 0xc9, 0xb4, 0x48, 0x51, 0xad, 0x63, 0x9f, 0x86, 0x7a, 0x7c, 0x80, 0x99, 0x65, 0xab, 0x57, 0x4e, 0xb2, 0xcf, 0x33, 0x2a, 0xd6, 0x18, 0xe4, 0xfd, 0x1, 0xff, 0x3, 0x1a, 0xe6, 0x28, 0xd4, 0xcd, 0x31, 0x4c, 0xb0, 0xa9, 0x55, 0x9b, 0x67, 0x7e, 0x82, 0x84, 0x78, 0x61, 0x9d, 0x53, 0xaf, 0xb6, 0x4a, 0x37, 0xcb, 0xd2, 0x2e, 0xe0, 0x1c, 0x5, 0xf9, 0x9, 0xf5, 0xec, 0x10, 0xde, 0x22, 0x3b, 0xc7, 0xba, 0x46, 0x5f, 0xa3, 0x6d, 0x91, 0x88, 0x74, 0x72, 0x8e, 0x97, 0x6b, 0xa5, 0x59, 0x40, 0xbc, 0xc1, 0x3d, 0x24, 0xd8, 0x16, 0xea, 0xf3, 0xf, 0xe, 0xf2, 0xeb, 0x17, 0xd9, 0x25, 0x3c, 0xc0, 0xbd, 0x41, 0x58, 0xa4, 0x6a, 0x96, 0x8f, 0x73, 0x75, 0x89, 0x90, 0x6c, 0xa2, 0x5e, 0x47, 0xbb, 0xc6, 0x3a, 0x23, 0xdf, 0x11, 0xed, 0xf4, 0x8, 0xf8, 0x4, 0x1d, 0xe1, 0x2f, 0xd3, 0xca, 0x36, 0x4b, 0xb7, 0xae, 0x52, 0x9c, 0x60, 0x79, 0x85, 0x83, 0x7f, 0x66, 0x9a, 0x54, 0xa8, 0xb1, 0x4d, 0x30, 0xcc, 0xd5, 0x29, 0xe7, 0x1b, 0x2, 0xfe},
+ {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72, 0x6b, 0x96, 0x8c, 0x71, 0xb8, 0x45, 0x5f, 0xa2, 0xd0, 0x2d, 0x37, 0xca, 0x3, 0xfe, 0xe4, 0x19, 0xd6, 0x2b, 0x31, 0xcc, 0x5, 0xf8, 0xe2, 0x1f, 0x6d, 0x90, 0x8a, 0x77, 0xbe, 0x43, 0x59, 0xa4, 0xbd, 0x40, 0x5a, 0xa7, 0x6e, 0x93, 0x89, 0x74, 0x6, 0xfb, 0xe1, 0x1c, 0xd5, 0x28, 0x32, 0xcf, 0xb1, 0x4c, 0x56, 0xab, 0x62, 0x9f, 0x85, 0x78, 0xa, 0xf7, 0xed, 0x10, 0xd9, 0x24, 0x3e, 0xc3, 0xda, 0x27, 0x3d, 0xc0, 0x9, 0xf4, 0xee, 0x13, 0x61, 0x9c, 0x86, 0x7b, 0xb2, 0x4f, 0x55, 0xa8, 0x67, 0x9a, 0x80, 0x7d, 0xb4, 0x49, 0x53, 0xae, 0xdc, 0x21, 0x3b, 0xc6, 0xf, 0xf2, 0xe8, 0x15, 0xc, 0xf1, 0xeb, 0x16, 0xdf, 0x22, 0x38, 0xc5, 0xb7, 0x4a, 0x50, 0xad, 0x64, 0x99, 0x83, 0x7e, 0x7f, 0x82, 0x98, 0x65, 0xac, 0x51, 0x4b, 0xb6, 0xc4, 0x39, 0x23, 0xde, 0x17, 0xea, 0xf0, 0xd, 0x14, 0xe9, 0xf3, 0xe, 0xc7, 0x3a, 0x20, 0xdd, 0xaf, 0x52, 0x48, 0xb5, 0x7c, 0x81, 0x9b, 0x66, 0xa9, 0x54, 0x4e, 0xb3, 0x7a, 0x87, 0x9d, 0x60, 0x12, 0xef, 0xf5, 0x8, 0xc1, 0x3c, 0x26, 0xdb, 0xc2, 0x3f, 0x25, 0xd8, 0x11, 0xec, 0xf6, 0xb, 0x79, 0x84, 0x9e, 0x63, 0xaa, 0x57, 0x4d, 0xb0, 0xce, 0x33, 0x29, 0xd4, 0x1d, 0xe0, 0xfa, 0x7, 0x75, 0x88, 0x92, 0x6f, 0xa6, 0x5b, 0x41, 0xbc, 0xa5, 0x58, 0x42, 0xbf, 0x76, 0x8b, 0x91, 0x6c, 0x1e, 0xe3, 0xf9, 0x4, 0xcd, 0x30, 0x2a, 0xd7, 0x18, 0xe5, 0xff, 0x2, 0xcb, 0x36, 0x2c, 0xd1, 0xa3, 0x5e, 0x44, 0xb9, 0x70, 0x8d, 0x97, 0x6a, 0x73, 0x8e, 0x94, 0x69, 0xa0, 0x5d, 0x47, 0xba, 0xc8, 0x35, 0x2f, 0xd2, 0x1b, 0xe6, 0xfc, 0x1},
+ {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63, 0x5b, 0xa5, 0xba, 0x44, 0x84, 0x7a, 0x65, 0x9b, 0xf8, 0x6, 0x19, 0xe7, 0x27, 0xd9, 0xc6, 0x38, 0xb6, 0x48, 0x57, 0xa9, 0x69, 0x97, 0x88, 0x76, 0x15, 0xeb, 0xf4, 0xa, 0xca, 0x34, 0x2b, 0xd5, 0xed, 0x13, 0xc, 0xf2, 0x32, 0xcc, 0xd3, 0x2d, 0x4e, 0xb0, 0xaf, 0x51, 0x91, 0x6f, 0x70, 0x8e, 0x71, 0x8f, 0x90, 0x6e, 0xae, 0x50, 0x4f, 0xb1, 0xd2, 0x2c, 0x33, 0xcd, 0xd, 0xf3, 0xec, 0x12, 0x2a, 0xd4, 0xcb, 0x35, 0xf5, 0xb, 0x14, 0xea, 0x89, 0x77, 0x68, 0x96, 0x56, 0xa8, 0xb7, 0x49, 0xc7, 0x39, 0x26, 0xd8, 0x18, 0xe6, 0xf9, 0x7, 0x64, 0x9a, 0x85, 0x7b, 0xbb, 0x45, 0x5a, 0xa4, 0x9c, 0x62, 0x7d, 0x83, 0x43, 0xbd, 0xa2, 0x5c, 0x3f, 0xc1, 0xde, 0x20, 0xe0, 0x1e, 0x1, 0xff, 0xe2, 0x1c, 0x3, 0xfd, 0x3d, 0xc3, 0xdc, 0x22, 0x41, 0xbf, 0xa0, 0x5e, 0x9e, 0x60, 0x7f, 0x81, 0xb9, 0x47, 0x58, 0xa6, 0x66, 0x98, 0x87, 0x79, 0x1a, 0xe4, 0xfb, 0x5, 0xc5, 0x3b, 0x24, 0xda, 0x54, 0xaa, 0xb5, 0x4b, 0x8b, 0x75, 0x6a, 0x94, 0xf7, 0x9, 0x16, 0xe8, 0x28, 0xd6, 0xc9, 0x37, 0xf, 0xf1, 0xee, 0x10, 0xd0, 0x2e, 0x31, 0xcf, 0xac, 0x52, 0x4d, 0xb3, 0x73, 0x8d, 0x92, 0x6c, 0x93, 0x6d, 0x72, 0x8c, 0x4c, 0xb2, 0xad, 0x53, 0x30, 0xce, 0xd1, 0x2f, 0xef, 0x11, 0xe, 0xf0, 0xc8, 0x36, 0x29, 0xd7, 0x17, 0xe9, 0xf6, 0x8, 0x6b, 0x95, 0x8a, 0x74, 0xb4, 0x4a, 0x55, 0xab, 0x25, 0xdb, 0xc4, 0x3a, 0xfa, 0x4, 0x1b, 0xe5, 0x86, 0x78, 0x67, 0x99, 0x59, 0xa7, 0xb8, 0x46, 0x7e, 0x80, 0x9f, 0x61, 0xa1, 0x5f, 0x40, 0xbe, 0xdd, 0x23, 0x3c, 0xc2, 0x2, 0xfc, 0xe3, 0x1d},
+ {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c, 0x4b, 0xb4, 0xa8, 0x57, 0x90, 0x6f, 0x73, 0x8c, 0xe0, 0x1f, 0x3, 0xfc, 0x3b, 0xc4, 0xd8, 0x27, 0x96, 0x69, 0x75, 0x8a, 0x4d, 0xb2, 0xae, 0x51, 0x3d, 0xc2, 0xde, 0x21, 0xe6, 0x19, 0x5, 0xfa, 0xdd, 0x22, 0x3e, 0xc1, 0x6, 0xf9, 0xe5, 0x1a, 0x76, 0x89, 0x95, 0x6a, 0xad, 0x52, 0x4e, 0xb1, 0x31, 0xce, 0xd2, 0x2d, 0xea, 0x15, 0x9, 0xf6, 0x9a, 0x65, 0x79, 0x86, 0x41, 0xbe, 0xa2, 0x5d, 0x7a, 0x85, 0x99, 0x66, 0xa1, 0x5e, 0x42, 0xbd, 0xd1, 0x2e, 0x32, 0xcd, 0xa, 0xf5, 0xe9, 0x16, 0xa7, 0x58, 0x44, 0xbb, 0x7c, 0x83, 0x9f, 0x60, 0xc, 0xf3, 0xef, 0x10, 0xd7, 0x28, 0x34, 0xcb, 0xec, 0x13, 0xf, 0xf0, 0x37, 0xc8, 0xd4, 0x2b, 0x47, 0xb8, 0xa4, 0x5b, 0x9c, 0x63, 0x7f, 0x80, 0x62, 0x9d, 0x81, 0x7e, 0xb9, 0x46, 0x5a, 0xa5, 0xc9, 0x36, 0x2a, 0xd5, 0x12, 0xed, 0xf1, 0xe, 0x29, 0xd6, 0xca, 0x35, 0xf2, 0xd, 0x11, 0xee, 0x82, 0x7d, 0x61, 0x9e, 0x59, 0xa6, 0xba, 0x45, 0xf4, 0xb, 0x17, 0xe8, 0x2f, 0xd0, 0xcc, 0x33, 0x5f, 0xa0, 0xbc, 0x43, 0x84, 0x7b, 0x67, 0x98, 0xbf, 0x40, 0x5c, 0xa3, 0x64, 0x9b, 0x87, 0x78, 0x14, 0xeb, 0xf7, 0x8, 0xcf, 0x30, 0x2c, 0xd3, 0x53, 0xac, 0xb0, 0x4f, 0x88, 0x77, 0x6b, 0x94, 0xf8, 0x7, 0x1b, 0xe4, 0x23, 0xdc, 0xc0, 0x3f, 0x18, 0xe7, 0xfb, 0x4, 0xc3, 0x3c, 0x20, 0xdf, 0xb3, 0x4c, 0x50, 0xaf, 0x68, 0x97, 0x8b, 0x74, 0xc5, 0x3a, 0x26, 0xd9, 0x1e, 0xe1, 0xfd, 0x2, 0x6e, 0x91, 0x8d, 0x72, 0xb5, 0x4a, 0x56, 0xa9, 0x8e, 0x71, 0x6d, 0x92, 0x55, 0xaa, 0xb6, 0x49, 0x25, 0xda, 0xc6, 0x39, 0xfe, 0x1, 0x1d, 0xe2}}
+
+var mulTableLow = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+ {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
+ {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e},
+ {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11},
+ {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c},
+ {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33},
+ {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22},
+ {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d},
+ {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78},
+ {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77},
+ {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66},
+ {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69},
+ {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44},
+ {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b},
+ {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a},
+ {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55},
+ {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0},
+ {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff},
+ {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee},
+ {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1},
+ {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc},
+ {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3},
+ {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2},
+ {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd},
+ {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88},
+ {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87},
+ {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96},
+ {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99},
+ {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4},
+ {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb},
+ {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa},
+ {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5},
+ {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd},
+ {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2},
+ {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3},
+ {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec},
+ {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1},
+ {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce},
+ {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf},
+ {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0},
+ {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85},
+ {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a},
+ {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b},
+ {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94},
+ {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9},
+ {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6},
+ {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7},
+ {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8},
+ {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd},
+ {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2},
+ {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13},
+ {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c},
+ {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31},
+ {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e},
+ {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f},
+ {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20},
+ {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75},
+ {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a},
+ {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b},
+ {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64},
+ {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49},
+ {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46},
+ {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57},
+ {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58},
+ {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7},
+ {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8},
+ {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9},
+ {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6},
+ {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb},
+ {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4},
+ {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5},
+ {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca},
+ {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f},
+ {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90},
+ {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81},
+ {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e},
+ {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3},
+ {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac},
+ {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd},
+ {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2},
+ {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17},
+ {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18},
+ {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9},
+ {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6},
+ {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b},
+ {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24},
+ {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35},
+ {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a},
+ {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f},
+ {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60},
+ {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71},
+ {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e},
+ {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53},
+ {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c},
+ {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d},
+ {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42},
+ {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a},
+ {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15},
+ {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4},
+ {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb},
+ {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26},
+ {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29},
+ {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38},
+ {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37},
+ {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62},
+ {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d},
+ {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c},
+ {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73},
+ {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e},
+ {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51},
+ {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40},
+ {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f},
+ {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea},
+ {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5},
+ {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4},
+ {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb},
+ {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6},
+ {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9},
+ {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8},
+ {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7},
+ {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92},
+ {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d},
+ {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c},
+ {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83},
+ {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae},
+ {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1},
+ {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0},
+ {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf},
+ {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3},
+ {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc},
+ {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd},
+ {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2},
+ {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef},
+ {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0},
+ {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1},
+ {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe},
+ {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab},
+ {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4},
+ {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5},
+ {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba},
+ {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97},
+ {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98},
+ {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89},
+ {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86},
+ {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23},
+ {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c},
+ {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d},
+ {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32},
+ {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f},
+ {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10},
+ {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1},
+ {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe},
+ {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b},
+ {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54},
+ {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45},
+ {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a},
+ {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67},
+ {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68},
+ {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79},
+ {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76},
+ {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e},
+ {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21},
+ {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30},
+ {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f},
+ {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12},
+ {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d},
+ {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc},
+ {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3},
+ {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56},
+ {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59},
+ {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48},
+ {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47},
+ {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a},
+ {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65},
+ {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74},
+ {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b},
+ {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde},
+ {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1},
+ {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0},
+ {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf},
+ {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2},
+ {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed},
+ {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc},
+ {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3},
+ {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6},
+ {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9},
+ {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8},
+ {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7},
+ {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a},
+ {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95},
+ {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84},
+ {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b},
+ {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34},
+ {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b},
+ {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a},
+ {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25},
+ {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8},
+ {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7},
+ {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16},
+ {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19},
+ {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c},
+ {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43},
+ {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52},
+ {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d},
+ {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70},
+ {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f},
+ {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e},
+ {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61},
+ {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4},
+ {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb},
+ {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda},
+ {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5},
+ {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8},
+ {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7},
+ {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6},
+ {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9},
+ {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc},
+ {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3},
+ {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2},
+ {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad},
+ {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80},
+ {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f},
+ {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e},
+ {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91},
+ {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9},
+ {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6},
+ {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7},
+ {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8},
+ {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5},
+ {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa},
+ {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb},
+ {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4},
+ {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1},
+ {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe},
+ {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf},
+ {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0},
+ {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d},
+ {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82},
+ {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93},
+ {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c},
+ {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39},
+ {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36},
+ {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27},
+ {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28},
+ {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5},
+ {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa},
+ {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b},
+ {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14},
+ {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41},
+ {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e},
+ {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f},
+ {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50},
+ {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d},
+ {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72},
+ {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63},
+ {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c}}
+var mulTableHigh = [256][16]uint8{{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
+ {0x0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0},
+ {0x0, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0, 0x1d, 0x3d, 0x5d, 0x7d, 0x9d, 0xbd, 0xdd, 0xfd},
+ {0x0, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90, 0x9d, 0xad, 0xfd, 0xcd, 0x5d, 0x6d, 0x3d, 0xd},
+ {0x0, 0x40, 0x80, 0xc0, 0x1d, 0x5d, 0x9d, 0xdd, 0x3a, 0x7a, 0xba, 0xfa, 0x27, 0x67, 0xa7, 0xe7},
+ {0x0, 0x50, 0xa0, 0xf0, 0x5d, 0xd, 0xfd, 0xad, 0xba, 0xea, 0x1a, 0x4a, 0xe7, 0xb7, 0x47, 0x17},
+ {0x0, 0x60, 0xc0, 0xa0, 0x9d, 0xfd, 0x5d, 0x3d, 0x27, 0x47, 0xe7, 0x87, 0xba, 0xda, 0x7a, 0x1a},
+ {0x0, 0x70, 0xe0, 0x90, 0xdd, 0xad, 0x3d, 0x4d, 0xa7, 0xd7, 0x47, 0x37, 0x7a, 0xa, 0x9a, 0xea},
+ {0x0, 0x80, 0x1d, 0x9d, 0x3a, 0xba, 0x27, 0xa7, 0x74, 0xf4, 0x69, 0xe9, 0x4e, 0xce, 0x53, 0xd3},
+ {0x0, 0x90, 0x3d, 0xad, 0x7a, 0xea, 0x47, 0xd7, 0xf4, 0x64, 0xc9, 0x59, 0x8e, 0x1e, 0xb3, 0x23},
+ {0x0, 0xa0, 0x5d, 0xfd, 0xba, 0x1a, 0xe7, 0x47, 0x69, 0xc9, 0x34, 0x94, 0xd3, 0x73, 0x8e, 0x2e},
+ {0x0, 0xb0, 0x7d, 0xcd, 0xfa, 0x4a, 0x87, 0x37, 0xe9, 0x59, 0x94, 0x24, 0x13, 0xa3, 0x6e, 0xde},
+ {0x0, 0xc0, 0x9d, 0x5d, 0x27, 0xe7, 0xba, 0x7a, 0x4e, 0x8e, 0xd3, 0x13, 0x69, 0xa9, 0xf4, 0x34},
+ {0x0, 0xd0, 0xbd, 0x6d, 0x67, 0xb7, 0xda, 0xa, 0xce, 0x1e, 0x73, 0xa3, 0xa9, 0x79, 0x14, 0xc4},
+ {0x0, 0xe0, 0xdd, 0x3d, 0xa7, 0x47, 0x7a, 0x9a, 0x53, 0xb3, 0x8e, 0x6e, 0xf4, 0x14, 0x29, 0xc9},
+ {0x0, 0xf0, 0xfd, 0xd, 0xe7, 0x17, 0x1a, 0xea, 0xd3, 0x23, 0x2e, 0xde, 0x34, 0xc4, 0xc9, 0x39},
+ {0x0, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53, 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb},
+ {0x0, 0xd, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b},
+ {0x0, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3, 0xf5, 0xc8, 0x8f, 0xb2, 0x1, 0x3c, 0x7b, 0x46},
+ {0x0, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3, 0x75, 0x58, 0x2f, 0x2, 0xc1, 0xec, 0x9b, 0xb6},
+ {0x0, 0x5d, 0xba, 0xe7, 0x69, 0x34, 0xd3, 0x8e, 0xd2, 0x8f, 0x68, 0x35, 0xbb, 0xe6, 0x1, 0x5c},
+ {0x0, 0x4d, 0x9a, 0xd7, 0x29, 0x64, 0xb3, 0xfe, 0x52, 0x1f, 0xc8, 0x85, 0x7b, 0x36, 0xe1, 0xac},
+ {0x0, 0x7d, 0xfa, 0x87, 0xe9, 0x94, 0x13, 0x6e, 0xcf, 0xb2, 0x35, 0x48, 0x26, 0x5b, 0xdc, 0xa1},
+ {0x0, 0x6d, 0xda, 0xb7, 0xa9, 0xc4, 0x73, 0x1e, 0x4f, 0x22, 0x95, 0xf8, 0xe6, 0x8b, 0x3c, 0x51},
+ {0x0, 0x9d, 0x27, 0xba, 0x4e, 0xd3, 0x69, 0xf4, 0x9c, 0x1, 0xbb, 0x26, 0xd2, 0x4f, 0xf5, 0x68},
+ {0x0, 0x8d, 0x7, 0x8a, 0xe, 0x83, 0x9, 0x84, 0x1c, 0x91, 0x1b, 0x96, 0x12, 0x9f, 0x15, 0x98},
+ {0x0, 0xbd, 0x67, 0xda, 0xce, 0x73, 0xa9, 0x14, 0x81, 0x3c, 0xe6, 0x5b, 0x4f, 0xf2, 0x28, 0x95},
+ {0x0, 0xad, 0x47, 0xea, 0x8e, 0x23, 0xc9, 0x64, 0x1, 0xac, 0x46, 0xeb, 0x8f, 0x22, 0xc8, 0x65},
+ {0x0, 0xdd, 0xa7, 0x7a, 0x53, 0x8e, 0xf4, 0x29, 0xa6, 0x7b, 0x1, 0xdc, 0xf5, 0x28, 0x52, 0x8f},
+ {0x0, 0xcd, 0x87, 0x4a, 0x13, 0xde, 0x94, 0x59, 0x26, 0xeb, 0xa1, 0x6c, 0x35, 0xf8, 0xb2, 0x7f},
+ {0x0, 0xfd, 0xe7, 0x1a, 0xd3, 0x2e, 0x34, 0xc9, 0xbb, 0x46, 0x5c, 0xa1, 0x68, 0x95, 0x8f, 0x72},
+ {0x0, 0xed, 0xc7, 0x2a, 0x93, 0x7e, 0x54, 0xb9, 0x3b, 0xd6, 0xfc, 0x11, 0xa8, 0x45, 0x6f, 0x82},
+ {0x0, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6, 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b},
+ {0x0, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6, 0x4d, 0x67, 0x19, 0x33, 0xe5, 0xcf, 0xb1, 0x9b},
+ {0x0, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46, 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96},
+ {0x0, 0xa, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36, 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66},
+ {0x0, 0x7a, 0xf4, 0x8e, 0xf5, 0x8f, 0x1, 0x7b, 0xf7, 0x8d, 0x3, 0x79, 0x2, 0x78, 0xf6, 0x8c},
+ {0x0, 0x6a, 0xd4, 0xbe, 0xb5, 0xdf, 0x61, 0xb, 0x77, 0x1d, 0xa3, 0xc9, 0xc2, 0xa8, 0x16, 0x7c},
+ {0x0, 0x5a, 0xb4, 0xee, 0x75, 0x2f, 0xc1, 0x9b, 0xea, 0xb0, 0x5e, 0x4, 0x9f, 0xc5, 0x2b, 0x71},
+ {0x0, 0x4a, 0x94, 0xde, 0x35, 0x7f, 0xa1, 0xeb, 0x6a, 0x20, 0xfe, 0xb4, 0x5f, 0x15, 0xcb, 0x81},
+ {0x0, 0xba, 0x69, 0xd3, 0xd2, 0x68, 0xbb, 0x1, 0xb9, 0x3, 0xd0, 0x6a, 0x6b, 0xd1, 0x2, 0xb8},
+ {0x0, 0xaa, 0x49, 0xe3, 0x92, 0x38, 0xdb, 0x71, 0x39, 0x93, 0x70, 0xda, 0xab, 0x1, 0xe2, 0x48},
+ {0x0, 0x9a, 0x29, 0xb3, 0x52, 0xc8, 0x7b, 0xe1, 0xa4, 0x3e, 0x8d, 0x17, 0xf6, 0x6c, 0xdf, 0x45},
+ {0x0, 0x8a, 0x9, 0x83, 0x12, 0x98, 0x1b, 0x91, 0x24, 0xae, 0x2d, 0xa7, 0x36, 0xbc, 0x3f, 0xb5},
+ {0x0, 0xfa, 0xe9, 0x13, 0xcf, 0x35, 0x26, 0xdc, 0x83, 0x79, 0x6a, 0x90, 0x4c, 0xb6, 0xa5, 0x5f},
+ {0x0, 0xea, 0xc9, 0x23, 0x8f, 0x65, 0x46, 0xac, 0x3, 0xe9, 0xca, 0x20, 0x8c, 0x66, 0x45, 0xaf},
+ {0x0, 0xda, 0xa9, 0x73, 0x4f, 0x95, 0xe6, 0x3c, 0x9e, 0x44, 0x37, 0xed, 0xd1, 0xb, 0x78, 0xa2},
+ {0x0, 0xca, 0x89, 0x43, 0xf, 0xc5, 0x86, 0x4c, 0x1e, 0xd4, 0x97, 0x5d, 0x11, 0xdb, 0x98, 0x52},
+ {0x0, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5, 0x25, 0x2, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0},
+ {0x0, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85, 0xa5, 0x92, 0xcb, 0xfc, 0x79, 0x4e, 0x17, 0x20},
+ {0x0, 0x7, 0xe, 0x9, 0x1c, 0x1b, 0x12, 0x15, 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d},
+ {0x0, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65, 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd},
+ {0x0, 0x67, 0xce, 0xa9, 0x81, 0xe6, 0x4f, 0x28, 0x1f, 0x78, 0xd1, 0xb6, 0x9e, 0xf9, 0x50, 0x37},
+ {0x0, 0x77, 0xee, 0x99, 0xc1, 0xb6, 0x2f, 0x58, 0x9f, 0xe8, 0x71, 0x6, 0x5e, 0x29, 0xb0, 0xc7},
+ {0x0, 0x47, 0x8e, 0xc9, 0x1, 0x46, 0x8f, 0xc8, 0x2, 0x45, 0x8c, 0xcb, 0x3, 0x44, 0x8d, 0xca},
+ {0x0, 0x57, 0xae, 0xf9, 0x41, 0x16, 0xef, 0xb8, 0x82, 0xd5, 0x2c, 0x7b, 0xc3, 0x94, 0x6d, 0x3a},
+ {0x0, 0xa7, 0x53, 0xf4, 0xa6, 0x1, 0xf5, 0x52, 0x51, 0xf6, 0x2, 0xa5, 0xf7, 0x50, 0xa4, 0x3},
+ {0x0, 0xb7, 0x73, 0xc4, 0xe6, 0x51, 0x95, 0x22, 0xd1, 0x66, 0xa2, 0x15, 0x37, 0x80, 0x44, 0xf3},
+ {0x0, 0x87, 0x13, 0x94, 0x26, 0xa1, 0x35, 0xb2, 0x4c, 0xcb, 0x5f, 0xd8, 0x6a, 0xed, 0x79, 0xfe},
+ {0x0, 0x97, 0x33, 0xa4, 0x66, 0xf1, 0x55, 0xc2, 0xcc, 0x5b, 0xff, 0x68, 0xaa, 0x3d, 0x99, 0xe},
+ {0x0, 0xe7, 0xd3, 0x34, 0xbb, 0x5c, 0x68, 0x8f, 0x6b, 0x8c, 0xb8, 0x5f, 0xd0, 0x37, 0x3, 0xe4},
+ {0x0, 0xf7, 0xf3, 0x4, 0xfb, 0xc, 0x8, 0xff, 0xeb, 0x1c, 0x18, 0xef, 0x10, 0xe7, 0xe3, 0x14},
+ {0x0, 0xc7, 0x93, 0x54, 0x3b, 0xfc, 0xa8, 0x6f, 0x76, 0xb1, 0xe5, 0x22, 0x4d, 0x8a, 0xde, 0x19},
+ {0x0, 0xd7, 0xb3, 0x64, 0x7b, 0xac, 0xc8, 0x1f, 0xf6, 0x21, 0x45, 0x92, 0x8d, 0x5a, 0x3e, 0xe9},
+ {0x0, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51, 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6},
+ {0x0, 0x64, 0xc8, 0xac, 0x8d, 0xe9, 0x45, 0x21, 0x7, 0x63, 0xcf, 0xab, 0x8a, 0xee, 0x42, 0x26},
+ {0x0, 0x54, 0xa8, 0xfc, 0x4d, 0x19, 0xe5, 0xb1, 0x9a, 0xce, 0x32, 0x66, 0xd7, 0x83, 0x7f, 0x2b},
+ {0x0, 0x44, 0x88, 0xcc, 0xd, 0x49, 0x85, 0xc1, 0x1a, 0x5e, 0x92, 0xd6, 0x17, 0x53, 0x9f, 0xdb},
+ {0x0, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c, 0xbd, 0x89, 0xd5, 0xe1, 0x6d, 0x59, 0x5, 0x31},
+ {0x0, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc, 0x3d, 0x19, 0x75, 0x51, 0xad, 0x89, 0xe5, 0xc1},
+ {0x0, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c, 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc},
+ {0x0, 0x4, 0x8, 0xc, 0x10, 0x14, 0x18, 0x1c, 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c},
+ {0x0, 0xf4, 0xf5, 0x1, 0xf7, 0x3, 0x2, 0xf6, 0xf3, 0x7, 0x6, 0xf2, 0x4, 0xf0, 0xf1, 0x5},
+ {0x0, 0xe4, 0xd5, 0x31, 0xb7, 0x53, 0x62, 0x86, 0x73, 0x97, 0xa6, 0x42, 0xc4, 0x20, 0x11, 0xf5},
+ {0x0, 0xd4, 0xb5, 0x61, 0x77, 0xa3, 0xc2, 0x16, 0xee, 0x3a, 0x5b, 0x8f, 0x99, 0x4d, 0x2c, 0xf8},
+ {0x0, 0xc4, 0x95, 0x51, 0x37, 0xf3, 0xa2, 0x66, 0x6e, 0xaa, 0xfb, 0x3f, 0x59, 0x9d, 0xcc, 0x8},
+ {0x0, 0xb4, 0x75, 0xc1, 0xea, 0x5e, 0x9f, 0x2b, 0xc9, 0x7d, 0xbc, 0x8, 0x23, 0x97, 0x56, 0xe2},
+ {0x0, 0xa4, 0x55, 0xf1, 0xaa, 0xe, 0xff, 0x5b, 0x49, 0xed, 0x1c, 0xb8, 0xe3, 0x47, 0xb6, 0x12},
+ {0x0, 0x94, 0x35, 0xa1, 0x6a, 0xfe, 0x5f, 0xcb, 0xd4, 0x40, 0xe1, 0x75, 0xbe, 0x2a, 0x8b, 0x1f},
+ {0x0, 0x84, 0x15, 0x91, 0x2a, 0xae, 0x3f, 0xbb, 0x54, 0xd0, 0x41, 0xc5, 0x7e, 0xfa, 0x6b, 0xef},
+ {0x0, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x2, 0x6f, 0x6, 0xbd, 0xd4, 0xd6, 0xbf, 0x4, 0x6d},
+ {0x0, 0x79, 0xf2, 0x8b, 0xf9, 0x80, 0xb, 0x72, 0xef, 0x96, 0x1d, 0x64, 0x16, 0x6f, 0xe4, 0x9d},
+ {0x0, 0x49, 0x92, 0xdb, 0x39, 0x70, 0xab, 0xe2, 0x72, 0x3b, 0xe0, 0xa9, 0x4b, 0x2, 0xd9, 0x90},
+ {0x0, 0x59, 0xb2, 0xeb, 0x79, 0x20, 0xcb, 0x92, 0xf2, 0xab, 0x40, 0x19, 0x8b, 0xd2, 0x39, 0x60},
+ {0x0, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf, 0x55, 0x7c, 0x7, 0x2e, 0xf1, 0xd8, 0xa3, 0x8a},
+ {0x0, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf, 0xd5, 0xec, 0xa7, 0x9e, 0x31, 0x8, 0x43, 0x7a},
+ {0x0, 0x9, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77},
+ {0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f, 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87},
+ {0x0, 0xe9, 0xcf, 0x26, 0x83, 0x6a, 0x4c, 0xa5, 0x1b, 0xf2, 0xd4, 0x3d, 0x98, 0x71, 0x57, 0xbe},
+ {0x0, 0xf9, 0xef, 0x16, 0xc3, 0x3a, 0x2c, 0xd5, 0x9b, 0x62, 0x74, 0x8d, 0x58, 0xa1, 0xb7, 0x4e},
+ {0x0, 0xc9, 0x8f, 0x46, 0x3, 0xca, 0x8c, 0x45, 0x6, 0xcf, 0x89, 0x40, 0x5, 0xcc, 0x8a, 0x43},
+ {0x0, 0xd9, 0xaf, 0x76, 0x43, 0x9a, 0xec, 0x35, 0x86, 0x5f, 0x29, 0xf0, 0xc5, 0x1c, 0x6a, 0xb3},
+ {0x0, 0xa9, 0x4f, 0xe6, 0x9e, 0x37, 0xd1, 0x78, 0x21, 0x88, 0x6e, 0xc7, 0xbf, 0x16, 0xf0, 0x59},
+ {0x0, 0xb9, 0x6f, 0xd6, 0xde, 0x67, 0xb1, 0x8, 0xa1, 0x18, 0xce, 0x77, 0x7f, 0xc6, 0x10, 0xa9},
+ {0x0, 0x89, 0xf, 0x86, 0x1e, 0x97, 0x11, 0x98, 0x3c, 0xb5, 0x33, 0xba, 0x22, 0xab, 0x2d, 0xa4},
+ {0x0, 0x99, 0x2f, 0xb6, 0x5e, 0xc7, 0x71, 0xe8, 0xbc, 0x25, 0x93, 0xa, 0xe2, 0x7b, 0xcd, 0x54},
+ {0x0, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7, 0x4a, 0x4, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd},
+ {0x0, 0x5e, 0xbc, 0xe2, 0x65, 0x3b, 0xd9, 0x87, 0xca, 0x94, 0x76, 0x28, 0xaf, 0xf1, 0x13, 0x4d},
+ {0x0, 0x6e, 0xdc, 0xb2, 0xa5, 0xcb, 0x79, 0x17, 0x57, 0x39, 0x8b, 0xe5, 0xf2, 0x9c, 0x2e, 0x40},
+ {0x0, 0x7e, 0xfc, 0x82, 0xe5, 0x9b, 0x19, 0x67, 0xd7, 0xa9, 0x2b, 0x55, 0x32, 0x4c, 0xce, 0xb0},
+ {0x0, 0xe, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a},
+ {0x0, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a, 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa},
+ {0x0, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca, 0x6d, 0x43, 0x31, 0x1f, 0xd5, 0xfb, 0x89, 0xa7},
+ {0x0, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba, 0xed, 0xd3, 0x91, 0xaf, 0x15, 0x2b, 0x69, 0x57},
+ {0x0, 0xce, 0x81, 0x4f, 0x1f, 0xd1, 0x9e, 0x50, 0x3e, 0xf0, 0xbf, 0x71, 0x21, 0xef, 0xa0, 0x6e},
+ {0x0, 0xde, 0xa1, 0x7f, 0x5f, 0x81, 0xfe, 0x20, 0xbe, 0x60, 0x1f, 0xc1, 0xe1, 0x3f, 0x40, 0x9e},
+ {0x0, 0xee, 0xc1, 0x2f, 0x9f, 0x71, 0x5e, 0xb0, 0x23, 0xcd, 0xe2, 0xc, 0xbc, 0x52, 0x7d, 0x93},
+ {0x0, 0xfe, 0xe1, 0x1f, 0xdf, 0x21, 0x3e, 0xc0, 0xa3, 0x5d, 0x42, 0xbc, 0x7c, 0x82, 0x9d, 0x63},
+ {0x0, 0x8e, 0x1, 0x8f, 0x2, 0x8c, 0x3, 0x8d, 0x4, 0x8a, 0x5, 0x8b, 0x6, 0x88, 0x7, 0x89},
+ {0x0, 0x9e, 0x21, 0xbf, 0x42, 0xdc, 0x63, 0xfd, 0x84, 0x1a, 0xa5, 0x3b, 0xc6, 0x58, 0xe7, 0x79},
+ {0x0, 0xae, 0x41, 0xef, 0x82, 0x2c, 0xc3, 0x6d, 0x19, 0xb7, 0x58, 0xf6, 0x9b, 0x35, 0xda, 0x74},
+ {0x0, 0xbe, 0x61, 0xdf, 0xc2, 0x7c, 0xa3, 0x1d, 0x99, 0x27, 0xf8, 0x46, 0x5b, 0xe5, 0x3a, 0x84},
+ {0x0, 0x53, 0xa6, 0xf5, 0x51, 0x2, 0xf7, 0xa4, 0xa2, 0xf1, 0x4, 0x57, 0xf3, 0xa0, 0x55, 0x6},
+ {0x0, 0x43, 0x86, 0xc5, 0x11, 0x52, 0x97, 0xd4, 0x22, 0x61, 0xa4, 0xe7, 0x33, 0x70, 0xb5, 0xf6},
+ {0x0, 0x73, 0xe6, 0x95, 0xd1, 0xa2, 0x37, 0x44, 0xbf, 0xcc, 0x59, 0x2a, 0x6e, 0x1d, 0x88, 0xfb},
+ {0x0, 0x63, 0xc6, 0xa5, 0x91, 0xf2, 0x57, 0x34, 0x3f, 0x5c, 0xf9, 0x9a, 0xae, 0xcd, 0x68, 0xb},
+ {0x0, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79, 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1},
+ {0x0, 0x3, 0x6, 0x5, 0xc, 0xf, 0xa, 0x9, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11},
+ {0x0, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99, 0x85, 0xb6, 0xe3, 0xd0, 0x49, 0x7a, 0x2f, 0x1c},
+ {0x0, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9, 0x5, 0x26, 0x43, 0x60, 0x89, 0xaa, 0xcf, 0xec},
+ {0x0, 0xd3, 0xbb, 0x68, 0x6b, 0xb8, 0xd0, 0x3, 0xd6, 0x5, 0x6d, 0xbe, 0xbd, 0x6e, 0x6, 0xd5},
+ {0x0, 0xc3, 0x9b, 0x58, 0x2b, 0xe8, 0xb0, 0x73, 0x56, 0x95, 0xcd, 0xe, 0x7d, 0xbe, 0xe6, 0x25},
+ {0x0, 0xf3, 0xfb, 0x8, 0xeb, 0x18, 0x10, 0xe3, 0xcb, 0x38, 0x30, 0xc3, 0x20, 0xd3, 0xdb, 0x28},
+ {0x0, 0xe3, 0xdb, 0x38, 0xab, 0x48, 0x70, 0x93, 0x4b, 0xa8, 0x90, 0x73, 0xe0, 0x3, 0x3b, 0xd8},
+ {0x0, 0x93, 0x3b, 0xa8, 0x76, 0xe5, 0x4d, 0xde, 0xec, 0x7f, 0xd7, 0x44, 0x9a, 0x9, 0xa1, 0x32},
+ {0x0, 0x83, 0x1b, 0x98, 0x36, 0xb5, 0x2d, 0xae, 0x6c, 0xef, 0x77, 0xf4, 0x5a, 0xd9, 0x41, 0xc2},
+ {0x0, 0xb3, 0x7b, 0xc8, 0xf6, 0x45, 0x8d, 0x3e, 0xf1, 0x42, 0x8a, 0x39, 0x7, 0xb4, 0x7c, 0xcf},
+ {0x0, 0xa3, 0x5b, 0xf8, 0xb6, 0x15, 0xed, 0x4e, 0x71, 0xd2, 0x2a, 0x89, 0xc7, 0x64, 0x9c, 0x3f},
+ {0x0, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2, 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1},
+ {0x0, 0xf8, 0xed, 0x15, 0xc7, 0x3f, 0x2a, 0xd2, 0x93, 0x6b, 0x7e, 0x86, 0x54, 0xac, 0xb9, 0x41},
+ {0x0, 0xc8, 0x8d, 0x45, 0x7, 0xcf, 0x8a, 0x42, 0xe, 0xc6, 0x83, 0x4b, 0x9, 0xc1, 0x84, 0x4c},
+ {0x0, 0xd8, 0xad, 0x75, 0x47, 0x9f, 0xea, 0x32, 0x8e, 0x56, 0x23, 0xfb, 0xc9, 0x11, 0x64, 0xbc},
+ {0x0, 0xa8, 0x4d, 0xe5, 0x9a, 0x32, 0xd7, 0x7f, 0x29, 0x81, 0x64, 0xcc, 0xb3, 0x1b, 0xfe, 0x56},
+ {0x0, 0xb8, 0x6d, 0xd5, 0xda, 0x62, 0xb7, 0xf, 0xa9, 0x11, 0xc4, 0x7c, 0x73, 0xcb, 0x1e, 0xa6},
+ {0x0, 0x88, 0xd, 0x85, 0x1a, 0x92, 0x17, 0x9f, 0x34, 0xbc, 0x39, 0xb1, 0x2e, 0xa6, 0x23, 0xab},
+ {0x0, 0x98, 0x2d, 0xb5, 0x5a, 0xc2, 0x77, 0xef, 0xb4, 0x2c, 0x99, 0x1, 0xee, 0x76, 0xc3, 0x5b},
+ {0x0, 0x68, 0xd0, 0xb8, 0xbd, 0xd5, 0x6d, 0x5, 0x67, 0xf, 0xb7, 0xdf, 0xda, 0xb2, 0xa, 0x62},
+ {0x0, 0x78, 0xf0, 0x88, 0xfd, 0x85, 0xd, 0x75, 0xe7, 0x9f, 0x17, 0x6f, 0x1a, 0x62, 0xea, 0x92},
+ {0x0, 0x48, 0x90, 0xd8, 0x3d, 0x75, 0xad, 0xe5, 0x7a, 0x32, 0xea, 0xa2, 0x47, 0xf, 0xd7, 0x9f},
+ {0x0, 0x58, 0xb0, 0xe8, 0x7d, 0x25, 0xcd, 0x95, 0xfa, 0xa2, 0x4a, 0x12, 0x87, 0xdf, 0x37, 0x6f},
+ {0x0, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8, 0x5d, 0x75, 0xd, 0x25, 0xfd, 0xd5, 0xad, 0x85},
+ {0x0, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8, 0xdd, 0xe5, 0xad, 0x95, 0x3d, 0x5, 0x4d, 0x75},
+ {0x0, 0x8, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38, 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78},
+ {0x0, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48, 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88},
+ {0x0, 0xf5, 0xf7, 0x2, 0xf3, 0x6, 0x4, 0xf1, 0xfb, 0xe, 0xc, 0xf9, 0x8, 0xfd, 0xff, 0xa},
+ {0x0, 0xe5, 0xd7, 0x32, 0xb3, 0x56, 0x64, 0x81, 0x7b, 0x9e, 0xac, 0x49, 0xc8, 0x2d, 0x1f, 0xfa},
+ {0x0, 0xd5, 0xb7, 0x62, 0x73, 0xa6, 0xc4, 0x11, 0xe6, 0x33, 0x51, 0x84, 0x95, 0x40, 0x22, 0xf7},
+ {0x0, 0xc5, 0x97, 0x52, 0x33, 0xf6, 0xa4, 0x61, 0x66, 0xa3, 0xf1, 0x34, 0x55, 0x90, 0xc2, 0x7},
+ {0x0, 0xb5, 0x77, 0xc2, 0xee, 0x5b, 0x99, 0x2c, 0xc1, 0x74, 0xb6, 0x3, 0x2f, 0x9a, 0x58, 0xed},
+ {0x0, 0xa5, 0x57, 0xf2, 0xae, 0xb, 0xf9, 0x5c, 0x41, 0xe4, 0x16, 0xb3, 0xef, 0x4a, 0xb8, 0x1d},
+ {0x0, 0x95, 0x37, 0xa2, 0x6e, 0xfb, 0x59, 0xcc, 0xdc, 0x49, 0xeb, 0x7e, 0xb2, 0x27, 0x85, 0x10},
+ {0x0, 0x85, 0x17, 0x92, 0x2e, 0xab, 0x39, 0xbc, 0x5c, 0xd9, 0x4b, 0xce, 0x72, 0xf7, 0x65, 0xe0},
+ {0x0, 0x75, 0xea, 0x9f, 0xc9, 0xbc, 0x23, 0x56, 0x8f, 0xfa, 0x65, 0x10, 0x46, 0x33, 0xac, 0xd9},
+ {0x0, 0x65, 0xca, 0xaf, 0x89, 0xec, 0x43, 0x26, 0xf, 0x6a, 0xc5, 0xa0, 0x86, 0xe3, 0x4c, 0x29},
+ {0x0, 0x55, 0xaa, 0xff, 0x49, 0x1c, 0xe3, 0xb6, 0x92, 0xc7, 0x38, 0x6d, 0xdb, 0x8e, 0x71, 0x24},
+ {0x0, 0x45, 0x8a, 0xcf, 0x9, 0x4c, 0x83, 0xc6, 0x12, 0x57, 0x98, 0xdd, 0x1b, 0x5e, 0x91, 0xd4},
+ {0x0, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b, 0xb5, 0x80, 0xdf, 0xea, 0x61, 0x54, 0xb, 0x3e},
+ {0x0, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb, 0x35, 0x10, 0x7f, 0x5a, 0xa1, 0x84, 0xeb, 0xce},
+ {0x0, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b, 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3},
+ {0x0, 0x5, 0xa, 0xf, 0x14, 0x11, 0x1e, 0x1b, 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33},
+ {0x0, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x4, 0xde, 0xc, 0x67, 0xb5, 0xb1, 0x63, 0x8, 0xda},
+ {0x0, 0xc2, 0x99, 0x5b, 0x2f, 0xed, 0xb6, 0x74, 0x5e, 0x9c, 0xc7, 0x5, 0x71, 0xb3, 0xe8, 0x2a},
+ {0x0, 0xf2, 0xf9, 0xb, 0xef, 0x1d, 0x16, 0xe4, 0xc3, 0x31, 0x3a, 0xc8, 0x2c, 0xde, 0xd5, 0x27},
+ {0x0, 0xe2, 0xd9, 0x3b, 0xaf, 0x4d, 0x76, 0x94, 0x43, 0xa1, 0x9a, 0x78, 0xec, 0xe, 0x35, 0xd7},
+ {0x0, 0x92, 0x39, 0xab, 0x72, 0xe0, 0x4b, 0xd9, 0xe4, 0x76, 0xdd, 0x4f, 0x96, 0x4, 0xaf, 0x3d},
+ {0x0, 0x82, 0x19, 0x9b, 0x32, 0xb0, 0x2b, 0xa9, 0x64, 0xe6, 0x7d, 0xff, 0x56, 0xd4, 0x4f, 0xcd},
+ {0x0, 0xb2, 0x79, 0xcb, 0xf2, 0x40, 0x8b, 0x39, 0xf9, 0x4b, 0x80, 0x32, 0xb, 0xb9, 0x72, 0xc0},
+ {0x0, 0xa2, 0x59, 0xfb, 0xb2, 0x10, 0xeb, 0x49, 0x79, 0xdb, 0x20, 0x82, 0xcb, 0x69, 0x92, 0x30},
+ {0x0, 0x52, 0xa4, 0xf6, 0x55, 0x7, 0xf1, 0xa3, 0xaa, 0xf8, 0xe, 0x5c, 0xff, 0xad, 0x5b, 0x9},
+ {0x0, 0x42, 0x84, 0xc6, 0x15, 0x57, 0x91, 0xd3, 0x2a, 0x68, 0xae, 0xec, 0x3f, 0x7d, 0xbb, 0xf9},
+ {0x0, 0x72, 0xe4, 0x96, 0xd5, 0xa7, 0x31, 0x43, 0xb7, 0xc5, 0x53, 0x21, 0x62, 0x10, 0x86, 0xf4},
+ {0x0, 0x62, 0xc4, 0xa6, 0x95, 0xf7, 0x51, 0x33, 0x37, 0x55, 0xf3, 0x91, 0xa2, 0xc0, 0x66, 0x4},
+ {0x0, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e, 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee},
+ {0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e},
+ {0x0, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e, 0x8d, 0xbf, 0xe9, 0xdb, 0x45, 0x77, 0x21, 0x13},
+ {0x0, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee, 0xd, 0x2f, 0x49, 0x6b, 0x85, 0xa7, 0xc1, 0xe3},
+ {0x0, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57, 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61},
+ {0x0, 0xdf, 0xa3, 0x7c, 0x5b, 0x84, 0xf8, 0x27, 0xb6, 0x69, 0x15, 0xca, 0xed, 0x32, 0x4e, 0x91},
+ {0x0, 0xef, 0xc3, 0x2c, 0x9b, 0x74, 0x58, 0xb7, 0x2b, 0xc4, 0xe8, 0x7, 0xb0, 0x5f, 0x73, 0x9c},
+ {0x0, 0xff, 0xe3, 0x1c, 0xdb, 0x24, 0x38, 0xc7, 0xab, 0x54, 0x48, 0xb7, 0x70, 0x8f, 0x93, 0x6c},
+ {0x0, 0x8f, 0x3, 0x8c, 0x6, 0x89, 0x5, 0x8a, 0xc, 0x83, 0xf, 0x80, 0xa, 0x85, 0x9, 0x86},
+ {0x0, 0x9f, 0x23, 0xbc, 0x46, 0xd9, 0x65, 0xfa, 0x8c, 0x13, 0xaf, 0x30, 0xca, 0x55, 0xe9, 0x76},
+ {0x0, 0xaf, 0x43, 0xec, 0x86, 0x29, 0xc5, 0x6a, 0x11, 0xbe, 0x52, 0xfd, 0x97, 0x38, 0xd4, 0x7b},
+ {0x0, 0xbf, 0x63, 0xdc, 0xc6, 0x79, 0xa5, 0x1a, 0x91, 0x2e, 0xf2, 0x4d, 0x57, 0xe8, 0x34, 0x8b},
+ {0x0, 0x4f, 0x9e, 0xd1, 0x21, 0x6e, 0xbf, 0xf0, 0x42, 0xd, 0xdc, 0x93, 0x63, 0x2c, 0xfd, 0xb2},
+ {0x0, 0x5f, 0xbe, 0xe1, 0x61, 0x3e, 0xdf, 0x80, 0xc2, 0x9d, 0x7c, 0x23, 0xa3, 0xfc, 0x1d, 0x42},
+ {0x0, 0x6f, 0xde, 0xb1, 0xa1, 0xce, 0x7f, 0x10, 0x5f, 0x30, 0x81, 0xee, 0xfe, 0x91, 0x20, 0x4f},
+ {0x0, 0x7f, 0xfe, 0x81, 0xe1, 0x9e, 0x1f, 0x60, 0xdf, 0xa0, 0x21, 0x5e, 0x3e, 0x41, 0xc0, 0xbf},
+ {0x0, 0xf, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d, 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55},
+ {0x0, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d, 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5},
+ {0x0, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd, 0x65, 0x4a, 0x3b, 0x14, 0xd9, 0xf6, 0x87, 0xa8},
+ {0x0, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd, 0xe5, 0xda, 0x9b, 0xa4, 0x19, 0x26, 0x67, 0x58},
+ {0x0, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3, 0x94, 0x8, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67},
+ {0x0, 0x8c, 0x5, 0x89, 0xa, 0x86, 0xf, 0x83, 0x14, 0x98, 0x11, 0x9d, 0x1e, 0x92, 0x1b, 0x97},
+ {0x0, 0xbc, 0x65, 0xd9, 0xca, 0x76, 0xaf, 0x13, 0x89, 0x35, 0xec, 0x50, 0x43, 0xff, 0x26, 0x9a},
+ {0x0, 0xac, 0x45, 0xe9, 0x8a, 0x26, 0xcf, 0x63, 0x9, 0xa5, 0x4c, 0xe0, 0x83, 0x2f, 0xc6, 0x6a},
+ {0x0, 0xdc, 0xa5, 0x79, 0x57, 0x8b, 0xf2, 0x2e, 0xae, 0x72, 0xb, 0xd7, 0xf9, 0x25, 0x5c, 0x80},
+ {0x0, 0xcc, 0x85, 0x49, 0x17, 0xdb, 0x92, 0x5e, 0x2e, 0xe2, 0xab, 0x67, 0x39, 0xf5, 0xbc, 0x70},
+ {0x0, 0xfc, 0xe5, 0x19, 0xd7, 0x2b, 0x32, 0xce, 0xb3, 0x4f, 0x56, 0xaa, 0x64, 0x98, 0x81, 0x7d},
+ {0x0, 0xec, 0xc5, 0x29, 0x97, 0x7b, 0x52, 0xbe, 0x33, 0xdf, 0xf6, 0x1a, 0xa4, 0x48, 0x61, 0x8d},
+ {0x0, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54, 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4},
+ {0x0, 0xc, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24, 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44},
+ {0x0, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4, 0xfd, 0xc1, 0x85, 0xb9, 0xd, 0x31, 0x75, 0x49},
+ {0x0, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4, 0x7d, 0x51, 0x25, 0x9, 0xcd, 0xe1, 0x95, 0xb9},
+ {0x0, 0x5c, 0xb8, 0xe4, 0x6d, 0x31, 0xd5, 0x89, 0xda, 0x86, 0x62, 0x3e, 0xb7, 0xeb, 0xf, 0x53},
+ {0x0, 0x4c, 0x98, 0xd4, 0x2d, 0x61, 0xb5, 0xf9, 0x5a, 0x16, 0xc2, 0x8e, 0x77, 0x3b, 0xef, 0xa3},
+ {0x0, 0x7c, 0xf8, 0x84, 0xed, 0x91, 0x15, 0x69, 0xc7, 0xbb, 0x3f, 0x43, 0x2a, 0x56, 0xd2, 0xae},
+ {0x0, 0x6c, 0xd8, 0xb4, 0xad, 0xc1, 0x75, 0x19, 0x47, 0x2b, 0x9f, 0xf3, 0xea, 0x86, 0x32, 0x5e},
+ {0x0, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0, 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc},
+ {0x0, 0x91, 0x3f, 0xae, 0x7e, 0xef, 0x41, 0xd0, 0xfc, 0x6d, 0xc3, 0x52, 0x82, 0x13, 0xbd, 0x2c},
+ {0x0, 0xa1, 0x5f, 0xfe, 0xbe, 0x1f, 0xe1, 0x40, 0x61, 0xc0, 0x3e, 0x9f, 0xdf, 0x7e, 0x80, 0x21},
+ {0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x30, 0xe1, 0x50, 0x9e, 0x2f, 0x1f, 0xae, 0x60, 0xd1},
+ {0x0, 0xc1, 0x9f, 0x5e, 0x23, 0xe2, 0xbc, 0x7d, 0x46, 0x87, 0xd9, 0x18, 0x65, 0xa4, 0xfa, 0x3b},
+ {0x0, 0xd1, 0xbf, 0x6e, 0x63, 0xb2, 0xdc, 0xd, 0xc6, 0x17, 0x79, 0xa8, 0xa5, 0x74, 0x1a, 0xcb},
+ {0x0, 0xe1, 0xdf, 0x3e, 0xa3, 0x42, 0x7c, 0x9d, 0x5b, 0xba, 0x84, 0x65, 0xf8, 0x19, 0x27, 0xc6},
+ {0x0, 0xf1, 0xff, 0xe, 0xe3, 0x12, 0x1c, 0xed, 0xdb, 0x2a, 0x24, 0xd5, 0x38, 0xc9, 0xc7, 0x36},
+ {0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf},
+ {0x0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff},
+ {0x0, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7, 0x15, 0x34, 0x57, 0x76, 0x91, 0xb0, 0xd3, 0xf2},
+ {0x0, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97, 0x95, 0xa4, 0xf7, 0xc6, 0x51, 0x60, 0x33, 0x2},
+ {0x0, 0x41, 0x82, 0xc3, 0x19, 0x58, 0x9b, 0xda, 0x32, 0x73, 0xb0, 0xf1, 0x2b, 0x6a, 0xa9, 0xe8},
+ {0x0, 0x51, 0xa2, 0xf3, 0x59, 0x8, 0xfb, 0xaa, 0xb2, 0xe3, 0x10, 0x41, 0xeb, 0xba, 0x49, 0x18},
+ {0x0, 0x61, 0xc2, 0xa3, 0x99, 0xf8, 0x5b, 0x3a, 0x2f, 0x4e, 0xed, 0x8c, 0xb6, 0xd7, 0x74, 0x15},
+ {0x0, 0x71, 0xe2, 0x93, 0xd9, 0xa8, 0x3b, 0x4a, 0xaf, 0xde, 0x4d, 0x3c, 0x76, 0x7, 0x94, 0xe5},
+ {0x0, 0xa6, 0x51, 0xf7, 0xa2, 0x4, 0xf3, 0x55, 0x59, 0xff, 0x8, 0xae, 0xfb, 0x5d, 0xaa, 0xc},
+ {0x0, 0xb6, 0x71, 0xc7, 0xe2, 0x54, 0x93, 0x25, 0xd9, 0x6f, 0xa8, 0x1e, 0x3b, 0x8d, 0x4a, 0xfc},
+ {0x0, 0x86, 0x11, 0x97, 0x22, 0xa4, 0x33, 0xb5, 0x44, 0xc2, 0x55, 0xd3, 0x66, 0xe0, 0x77, 0xf1},
+ {0x0, 0x96, 0x31, 0xa7, 0x62, 0xf4, 0x53, 0xc5, 0xc4, 0x52, 0xf5, 0x63, 0xa6, 0x30, 0x97, 0x1},
+ {0x0, 0xe6, 0xd1, 0x37, 0xbf, 0x59, 0x6e, 0x88, 0x63, 0x85, 0xb2, 0x54, 0xdc, 0x3a, 0xd, 0xeb},
+ {0x0, 0xf6, 0xf1, 0x7, 0xff, 0x9, 0xe, 0xf8, 0xe3, 0x15, 0x12, 0xe4, 0x1c, 0xea, 0xed, 0x1b},
+ {0x0, 0xc6, 0x91, 0x57, 0x3f, 0xf9, 0xae, 0x68, 0x7e, 0xb8, 0xef, 0x29, 0x41, 0x87, 0xd0, 0x16},
+ {0x0, 0xd6, 0xb1, 0x67, 0x7f, 0xa9, 0xce, 0x18, 0xfe, 0x28, 0x4f, 0x99, 0x81, 0x57, 0x30, 0xe6},
+ {0x0, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2, 0x2d, 0xb, 0x61, 0x47, 0xb5, 0x93, 0xf9, 0xdf},
+ {0x0, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82, 0xad, 0x9b, 0xc1, 0xf7, 0x75, 0x43, 0x19, 0x2f},
+ {0x0, 0x6, 0xc, 0xa, 0x18, 0x1e, 0x14, 0x12, 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22},
+ {0x0, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62, 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2},
+ {0x0, 0x66, 0xcc, 0xaa, 0x85, 0xe3, 0x49, 0x2f, 0x17, 0x71, 0xdb, 0xbd, 0x92, 0xf4, 0x5e, 0x38},
+ {0x0, 0x76, 0xec, 0x9a, 0xc5, 0xb3, 0x29, 0x5f, 0x97, 0xe1, 0x7b, 0xd, 0x52, 0x24, 0xbe, 0xc8},
+ {0x0, 0x46, 0x8c, 0xca, 0x5, 0x43, 0x89, 0xcf, 0xa, 0x4c, 0x86, 0xc0, 0xf, 0x49, 0x83, 0xc5},
+ {0x0, 0x56, 0xac, 0xfa, 0x45, 0x13, 0xe9, 0xbf, 0x8a, 0xdc, 0x26, 0x70, 0xcf, 0x99, 0x63, 0x35},
+ {0x0, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x6, 0xb1, 0xa, 0xda, 0x61, 0x67, 0xdc, 0xc, 0xb7},
+ {0x0, 0xab, 0x4b, 0xe0, 0x96, 0x3d, 0xdd, 0x76, 0x31, 0x9a, 0x7a, 0xd1, 0xa7, 0xc, 0xec, 0x47},
+ {0x0, 0x9b, 0x2b, 0xb0, 0x56, 0xcd, 0x7d, 0xe6, 0xac, 0x37, 0x87, 0x1c, 0xfa, 0x61, 0xd1, 0x4a},
+ {0x0, 0x8b, 0xb, 0x80, 0x16, 0x9d, 0x1d, 0x96, 0x2c, 0xa7, 0x27, 0xac, 0x3a, 0xb1, 0x31, 0xba},
+ {0x0, 0xfb, 0xeb, 0x10, 0xcb, 0x30, 0x20, 0xdb, 0x8b, 0x70, 0x60, 0x9b, 0x40, 0xbb, 0xab, 0x50},
+ {0x0, 0xeb, 0xcb, 0x20, 0x8b, 0x60, 0x40, 0xab, 0xb, 0xe0, 0xc0, 0x2b, 0x80, 0x6b, 0x4b, 0xa0},
+ {0x0, 0xdb, 0xab, 0x70, 0x4b, 0x90, 0xe0, 0x3b, 0x96, 0x4d, 0x3d, 0xe6, 0xdd, 0x6, 0x76, 0xad},
+ {0x0, 0xcb, 0x8b, 0x40, 0xb, 0xc0, 0x80, 0x4b, 0x16, 0xdd, 0x9d, 0x56, 0x1d, 0xd6, 0x96, 0x5d},
+ {0x0, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1, 0xc5, 0xfe, 0xb3, 0x88, 0x29, 0x12, 0x5f, 0x64},
+ {0x0, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1, 0x45, 0x6e, 0x13, 0x38, 0xe9, 0xc2, 0xbf, 0x94},
+ {0x0, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99},
+ {0x0, 0xb, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69},
+ {0x0, 0x7b, 0xf6, 0x8d, 0xf1, 0x8a, 0x7, 0x7c, 0xff, 0x84, 0x9, 0x72, 0xe, 0x75, 0xf8, 0x83},
+ {0x0, 0x6b, 0xd6, 0xbd, 0xb1, 0xda, 0x67, 0xc, 0x7f, 0x14, 0xa9, 0xc2, 0xce, 0xa5, 0x18, 0x73},
+ {0x0, 0x5b, 0xb6, 0xed, 0x71, 0x2a, 0xc7, 0x9c, 0xe2, 0xb9, 0x54, 0xf, 0x93, 0xc8, 0x25, 0x7e},
+ {0x0, 0x4b, 0x96, 0xdd, 0x31, 0x7a, 0xa7, 0xec, 0x62, 0x29, 0xf4, 0xbf, 0x53, 0x18, 0xc5, 0x8e}}
+
+// galMultiply multiplies to elements of the field.
+// Uses lookup table ~40% faster
+func galMultiply(a, b byte) byte {
+ return mulTable[a][b]
+}
+
+// Original function:
+/*
+// galMultiply multiplies to elements of the field.
+func galMultiply(a, b byte) byte {
+ if a == 0 || b == 0 {
+ return 0
+ }
+ logA := int(logTable[a])
+ logB := int(logTable[b])
+ return expTable[logA+logB]
+}
+*/
+
+// galDivide is inverse of galMultiply.
+func galDivide(a, b byte) byte {
+ if a == 0 {
+ return 0
+ }
+ if b == 0 {
+ panic("Argument 'divisor' is 0")
+ }
+ logA := int(logTable[a])
+ logB := int(logTable[b])
+ logResult := logA - logB
+ if logResult < 0 {
+ logResult += 255
+ }
+ return expTable[logResult]
+}
+
+// Computes a**n.
+//
+// The result will be the same as multiplying a times itself n times.
+func galExp(a byte, n int) byte {
+ if n == 0 {
+ return 1
+ }
+ if a == 0 {
+ return 0
+ }
+
+ logA := logTable[a]
+ logResult := int(logA) * n
+ for logResult >= 255 {
+ logResult -= 255
+ }
+ return expTable[logResult]
+}
+
+func genAvx2Matrix(matrixRows [][]byte, inputs, outputs int, dst []byte) []byte {
+ if !avx2CodeGen {
+ panic("codegen not enabled")
+ }
+ total := inputs * outputs
+
+ // Duplicated in+out
+ wantBytes := total * 32 * 2
+ if cap(dst) < wantBytes {
+ dst = make([]byte, wantBytes)
+ } else {
+ dst = dst[:wantBytes]
+ }
+ for i, row := range matrixRows[:outputs] {
+ for j, idx := range row[:inputs] {
+ dstIdx := (j*outputs + i) * 64
+ lo := mulTableLow[idx][:]
+ hi := mulTableHigh[idx][:]
+ copy(dst[dstIdx:], lo)
+ copy(dst[dstIdx+16:], lo)
+ copy(dst[dstIdx+32:], hi)
+ copy(dst[dstIdx+48:], hi)
+ }
+ }
+ return dst
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
new file mode 100644
index 0000000..720196f
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.go
@@ -0,0 +1,338 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2019, Minio, Inc.
+
+package reedsolomon
+
+import (
+ "sync"
+)
+
+//go:noescape
+func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
+
+//go:noescape
+func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
+
+//go:noescape
+func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
+
+const (
+ dimIn = 8 // Number of input rows processed simultaneously
+ dimOut81 = 1 // Number of output rows processed simultaneously for x1 routine
+ dimOut82 = 2 // Number of output rows processed simultaneously for x2 routine
+ dimOut84 = 4 // Number of output rows processed simultaneously for x4 routine
+ matrixSize81 = (16 + 16) * dimIn * dimOut81 // Dimension of slice of matrix coefficient passed into x1 routine
+ matrixSize82 = (16 + 16) * dimIn * dimOut82 // Dimension of slice of matrix coefficient passed into x2 routine
+ matrixSize84 = (16 + 16) * dimIn * dimOut84 // Dimension of slice of matrix coefficient passed into x4 routine
+)
+
+// Construct block of matrix coefficients for single output row in parallel
+func setupMatrix81(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize81]byte) {
+ offset := 0
+ for c := inputOffset; c < inputOffset+dimIn; c++ {
+ for iRow := outputOffset; iRow < outputOffset+dimOut81; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ coeff := matrixRows[iRow][c]
+ copy(matrix[offset*32:], mulTableLow[coeff][:])
+ copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+ } else {
+ // coefficients not used for this input shard (so null out)
+ v := matrix[offset*32 : offset*32+32]
+ for i := range v {
+ v[i] = 0
+ }
+ }
+ offset += dimIn
+ if offset >= dimIn*dimOut81 {
+ offset -= dimIn*dimOut81 - 1
+ }
+ }
+ }
+}
+
+// Construct block of matrix coefficients for 2 output rows in parallel
+func setupMatrix82(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize82]byte) {
+ offset := 0
+ for c := inputOffset; c < inputOffset+dimIn; c++ {
+ for iRow := outputOffset; iRow < outputOffset+dimOut82; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ coeff := matrixRows[iRow][c]
+ copy(matrix[offset*32:], mulTableLow[coeff][:])
+ copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+ } else {
+ // coefficients not used for this input shard (so null out)
+ v := matrix[offset*32 : offset*32+32]
+ for i := range v {
+ v[i] = 0
+ }
+ }
+ offset += dimIn
+ if offset >= dimIn*dimOut82 {
+ offset -= dimIn*dimOut82 - 1
+ }
+ }
+ }
+}
+
+// Construct block of matrix coefficients for 4 output rows in parallel
+func setupMatrix84(matrixRows [][]byte, inputOffset, outputOffset int, matrix *[matrixSize84]byte) {
+ offset := 0
+ for c := inputOffset; c < inputOffset+dimIn; c++ {
+ for iRow := outputOffset; iRow < outputOffset+dimOut84; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ coeff := matrixRows[iRow][c]
+ copy(matrix[offset*32:], mulTableLow[coeff][:])
+ copy(matrix[offset*32+16:], mulTableHigh[coeff][:])
+ } else {
+ // coefficients not used for this input shard (so null out)
+ v := matrix[offset*32 : offset*32+32]
+ for i := range v {
+ v[i] = 0
+ }
+ }
+ offset += dimIn
+ if offset >= dimIn*dimOut84 {
+ offset -= dimIn*dimOut84 - 1
+ }
+ }
+ }
+}
+
+// Invoke AVX512 routine for single output row in parallel
+func galMulAVX512Parallel81(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix81 *[matrixSize81]byte) {
+ done := stop - start
+ if done <= 0 {
+ return
+ }
+
+ inputEnd := inputOffset + dimIn
+ if inputEnd > len(in) {
+ inputEnd = len(in)
+ }
+ outputEnd := outputOffset + dimOut81
+ if outputEnd > len(out) {
+ outputEnd = len(out)
+ }
+
+ // We know the max size, alloc temp array.
+ var inTmp [dimIn][]byte
+ for i, v := range in[inputOffset:inputEnd] {
+ inTmp[i] = v[start:stop]
+ }
+ var outTmp [dimOut81][]byte
+ for i, v := range out[outputOffset:outputEnd] {
+ outTmp[i] = v[start:stop]
+ }
+
+ addTo := inputOffset != 0 // Except for the first input column, add to previous results
+ _galMulAVX512Parallel81(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix81, addTo)
+
+ done = start + ((done >> 6) << 6)
+ if done < stop {
+ galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
+ }
+}
+
+// Invoke AVX512 routine for 2 output rows in parallel
+func galMulAVX512Parallel82(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix82 *[matrixSize82]byte) {
+ done := stop - start
+ if done <= 0 {
+ return
+ }
+
+ inputEnd := inputOffset + dimIn
+ if inputEnd > len(in) {
+ inputEnd = len(in)
+ }
+ outputEnd := outputOffset + dimOut82
+ if outputEnd > len(out) {
+ outputEnd = len(out)
+ }
+
+ // We know the max size, alloc temp array.
+ var inTmp [dimIn][]byte
+ for i, v := range in[inputOffset:inputEnd] {
+ inTmp[i] = v[start:stop]
+ }
+ var outTmp [dimOut82][]byte
+ for i, v := range out[outputOffset:outputEnd] {
+ outTmp[i] = v[start:stop]
+ }
+
+ addTo := inputOffset != 0 // Except for the first input column, add to previous results
+ _galMulAVX512Parallel82(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix82, addTo)
+
+ done = start + ((done >> 6) << 6)
+ if done < stop {
+ galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
+ }
+}
+
+// Invoke AVX512 routine for 4 output rows in parallel
+func galMulAVX512Parallel84(in, out [][]byte, matrixRows [][]byte, inputOffset, outputOffset, start, stop int, matrix84 *[matrixSize84]byte) {
+ done := stop - start
+ if done <= 0 {
+ return
+ }
+
+ inputEnd := inputOffset + dimIn
+ if inputEnd > len(in) {
+ inputEnd = len(in)
+ }
+ outputEnd := outputOffset + dimOut84
+ if outputEnd > len(out) {
+ outputEnd = len(out)
+ }
+
+ // We know the max size, alloc temp array.
+ var inTmp [dimIn][]byte
+ for i, v := range in[inputOffset:inputEnd] {
+ inTmp[i] = v[start:stop]
+ }
+ var outTmp [dimOut84][]byte
+ for i, v := range out[outputOffset:outputEnd] {
+ outTmp[i] = v[start:stop]
+ }
+
+ addTo := inputOffset != 0 // Except for the first input column, add to previous results
+ _galMulAVX512Parallel84(inTmp[:inputEnd-inputOffset], outTmp[:outputEnd-outputOffset], matrix84, addTo)
+
+ done = start + ((done >> 6) << 6)
+ if done < stop {
+ galMulAVX512LastInput(inputOffset, inputEnd, outputOffset, outputEnd, matrixRows, done, stop, out, in)
+ }
+}
+
+func galMulAVX512LastInput(inputOffset int, inputEnd int, outputOffset int, outputEnd int, matrixRows [][]byte, done int, stop int, out [][]byte, in [][]byte) {
+ for c := inputOffset; c < inputEnd; c++ {
+ for iRow := outputOffset; iRow < outputEnd; iRow++ {
+ if c < len(matrixRows[iRow]) {
+ mt := mulTable[matrixRows[iRow][c]][:256]
+ for i := done; i < stop; i++ {
+ if c == 0 { // only set value for first input column
+ out[iRow][i] = mt[in[c][i]]
+ } else { // and add for all others
+ out[iRow][i] ^= mt[in[c][i]]
+ }
+ }
+ }
+ }
+ }
+}
+
+// Perform the same as codeSomeShards, but taking advantage of
+// AVX512 parallelism for up to 4x faster execution as compared to AVX2
+func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ // Process using no goroutines
+ start, end := 0, r.o.perRound
+ if end > byteCount {
+ end = byteCount
+ }
+ for start < byteCount {
+ matrix84 := [matrixSize84]byte{}
+ matrix82 := [matrixSize82]byte{}
+ matrix81 := [matrixSize81]byte{}
+
+ outputRow := 0
+ // First process (multiple) batches of 4 output rows in parallel
+ if outputRow+dimOut84 <= outputCount {
+ for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
+ galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix84)
+ }
+ }
+ }
+ // Then process a (single) batch of 2 output rows in parallel
+ if outputRow+dimOut82 <= outputCount {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
+ galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix82)
+ }
+ outputRow += dimOut82
+ }
+ // Lastly, we may have a single output row left (for uneven parity)
+ if outputRow < outputCount {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
+ galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, end, &matrix81)
+ }
+ }
+
+ start = end
+ end += r.o.perRound
+ if end > byteCount {
+ end = byteCount
+ }
+ }
+}
+
+// Perform the same as codeSomeShards, but taking advantage of
+// AVX512 parallelism for up to 4x faster execution as compared to AVX2
+func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+ wg.Add(1)
+ go func(grStart, grStop int) {
+ start, stop := grStart, grStart+r.o.perRound
+ if stop > grStop {
+ stop = grStop
+ }
+ // Loop for each round.
+ matrix84 := [matrixSize84]byte{}
+ matrix82 := [matrixSize82]byte{}
+ matrix81 := [matrixSize81]byte{}
+ for start < grStop {
+ outputRow := 0
+ // First process (multiple) batches of 4 output rows in parallel
+ if outputRow+dimOut84 <= outputCount {
+ // 1K matrix buffer
+ for ; outputRow+dimOut84 <= outputCount; outputRow += dimOut84 {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix84(matrixRows, inputRow, outputRow, &matrix84)
+ galMulAVX512Parallel84(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix84)
+ }
+ }
+ }
+ // Then process a (single) batch of 2 output rows in parallel
+ if outputRow+dimOut82 <= outputCount {
+ // 512B matrix buffer
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix82(matrixRows, inputRow, outputRow, &matrix82)
+ galMulAVX512Parallel82(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix82)
+ }
+ outputRow += dimOut82
+ }
+ // Lastly, we may have a single output row left (for uneven parity)
+ if outputRow < outputCount {
+ for inputRow := 0; inputRow < len(inputs); inputRow += dimIn {
+ setupMatrix81(matrixRows, inputRow, outputRow, &matrix81)
+ galMulAVX512Parallel81(inputs, outputs, matrixRows, inputRow, outputRow, start, stop, &matrix81)
+ }
+ }
+ start = stop
+ stop += r.o.perRound
+ if stop > grStop {
+ stop = grStop
+ }
+ }
+ wg.Done()
+ }(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
new file mode 100644
index 0000000..97ad420
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galoisAvx512_amd64.s
@@ -0,0 +1,400 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2019, Minio, Inc.
+
+#define LOAD(OFFSET) \
+ MOVQ OFFSET(SI), BX \
+ VMOVDQU64 (BX)(R11*1), Z0 \
+ VPSRLQ $4, Z0, Z1 \ // high input
+ VPANDQ Z2, Z0, Z0 \ // low input
+ VPANDQ Z2, Z1, Z1 // high input
+
+#define GALOIS_MUL(MUL_LO, MUL_HI, LO, HI, OUT) \
+ VPSHUFB Z0, MUL_LO, LO \ // mul low part
+ VPSHUFB Z1, MUL_HI, HI \ // mul high part
+ VPTERNLOGD $0x96, LO, HI, OUT
+
+#define GALOIS(C1, C2, IN, LO, HI, OUT) \
+ VSHUFI64X2 $C1, IN, IN, LO \
+ VSHUFI64X2 $C2, IN, IN, HI \
+ GALOIS_MUL(LO, HI, LO, HI, OUT)
+
+//
+// Process single output row from a total of 8 input rows
+//
+// func _galMulAVX512Parallel81(in, out [][]byte, matrix *[matrixSize81]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel81(SB), 7, $0
+ MOVQ in+0(FP), SI
+ MOVQ 8(SI), R9 // R9: len(in)
+ SHRQ $6, R9 // len(in) / 64
+ TESTQ R9, R9
+ JZ done_avx512_parallel81
+
+ MOVQ matrix+48(FP), SI
+ VMOVDQU64 0x000(SI), Z16
+ VMOVDQU64 0x040(SI), Z17
+ VMOVDQU64 0x080(SI), Z18
+ VMOVDQU64 0x0c0(SI), Z19
+
+ // Initialize multiplication constants
+ VSHUFI64X2 $0x55, Z16, Z16, Z20
+ VSHUFI64X2 $0xaa, Z16, Z16, Z24
+ VSHUFI64X2 $0xff, Z16, Z16, Z28
+ VSHUFI64X2 $0x00, Z16, Z16, Z16
+
+ VSHUFI64X2 $0x55, Z17, Z17, Z21
+ VSHUFI64X2 $0xaa, Z17, Z17, Z25
+ VSHUFI64X2 $0xff, Z17, Z17, Z29
+ VSHUFI64X2 $0x00, Z17, Z17, Z17
+
+ VSHUFI64X2 $0x55, Z18, Z18, Z22
+ VSHUFI64X2 $0xaa, Z18, Z18, Z26
+ VSHUFI64X2 $0xff, Z18, Z18, Z30
+ VSHUFI64X2 $0x00, Z18, Z18, Z18
+
+ VSHUFI64X2 $0x55, Z19, Z19, Z23
+ VSHUFI64X2 $0xaa, Z19, Z19, Z27
+ VSHUFI64X2 $0xff, Z19, Z19, Z31
+ VSHUFI64X2 $0x00, Z19, Z19, Z19
+
+ MOVQ $15, BX
+ VPBROADCASTB BX, Z2
+
+ MOVB addTo+56(FP), AX
+ IMULQ $-0x1, AX
+ KMOVQ AX, K1
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), AX // number of inputs
+ XORQ R11, R11
+ MOVQ out+24(FP), DX
+ MOVQ (DX), DX // DX: &out[0][0]
+
+loopback_avx512_parallel81:
+ VMOVDQU64.Z (DX), K1, Z4
+
+ LOAD(0x00) // &in[0][0]
+ GALOIS_MUL(Z16, Z20, Z14, Z15, Z4)
+
+ CMPQ AX, $1
+ JE skip_avx512_parallel81
+
+ LOAD(0x18) // &in[1][0]
+ GALOIS_MUL(Z24, Z28, Z14, Z15, Z4)
+
+ CMPQ AX, $2
+ JE skip_avx512_parallel81
+
+ LOAD(0x30) // &in[2][0]
+ GALOIS_MUL(Z17, Z21, Z14, Z15, Z4)
+
+ CMPQ AX, $3
+ JE skip_avx512_parallel81
+
+ LOAD(0x48) // &in[3][0]
+ GALOIS_MUL(Z25, Z29, Z14, Z15, Z4)
+
+ CMPQ AX, $4
+ JE skip_avx512_parallel81
+
+ LOAD(0x60) // &in[4][0]
+ GALOIS_MUL(Z18, Z22, Z14, Z15, Z4)
+
+ CMPQ AX, $5
+ JE skip_avx512_parallel81
+
+ LOAD(0x78) // &in[5][0]
+ GALOIS_MUL(Z26, Z30, Z14, Z15, Z4)
+
+ CMPQ AX, $6
+ JE skip_avx512_parallel81
+
+ LOAD(0x90) // &in[6][0]
+ GALOIS_MUL(Z19, Z23, Z14, Z15, Z4)
+
+ CMPQ AX, $7
+ JE skip_avx512_parallel81
+
+ LOAD(0xa8) // &in[7][0]
+ GALOIS_MUL(Z27, Z31, Z14, Z15, Z4)
+
+skip_avx512_parallel81:
+ VMOVDQU64 Z4, (DX)
+
+ ADDQ $64, R11 // in4+=64
+
+ ADDQ $64, DX // out+=64
+
+ SUBQ $1, R9
+ JNZ loopback_avx512_parallel81
+
+done_avx512_parallel81:
+ VZEROUPPER
+ RET
+
+//
+// Process 2 output rows in parallel from a total of 8 input rows
+//
+// func _galMulAVX512Parallel82(in, out [][]byte, matrix *[matrixSize82]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel82(SB), 7, $0
+ MOVQ in+0(FP), SI
+ MOVQ 8(SI), R9 // R9: len(in)
+ SHRQ $6, R9 // len(in) / 64
+ TESTQ R9, R9
+ JZ done_avx512_parallel82
+
+ MOVQ matrix+48(FP), SI
+ VMOVDQU64 0x000(SI), Z16
+ VMOVDQU64 0x040(SI), Z17
+ VMOVDQU64 0x080(SI), Z18
+ VMOVDQU64 0x0c0(SI), Z19
+ VMOVDQU64 0x100(SI), Z20
+ VMOVDQU64 0x140(SI), Z21
+ VMOVDQU64 0x180(SI), Z22
+ VMOVDQU64 0x1c0(SI), Z23
+
+ // Initialize multiplication constants
+ VSHUFI64X2 $0x55, Z16, Z16, Z24
+ VSHUFI64X2 $0xaa, Z16, Z16, Z25
+ VSHUFI64X2 $0xff, Z16, Z16, Z26
+ VSHUFI64X2 $0x00, Z16, Z16, Z16
+
+ VSHUFI64X2 $0x55, Z20, Z20, Z27
+ VSHUFI64X2 $0xaa, Z20, Z20, Z28
+ VSHUFI64X2 $0xff, Z20, Z20, Z29
+ VSHUFI64X2 $0x00, Z20, Z20, Z20
+
+ VSHUFI64X2 $0x55, Z17, Z17, Z30
+ VSHUFI64X2 $0xaa, Z17, Z17, Z31
+ VSHUFI64X2 $0xff, Z17, Z17, Z11
+ VSHUFI64X2 $0x00, Z17, Z17, Z17
+
+ VSHUFI64X2 $0x55, Z21, Z21, Z8
+ VSHUFI64X2 $0xaa, Z21, Z21, Z9
+ VSHUFI64X2 $0xff, Z21, Z21, Z10
+ VSHUFI64X2 $0x00, Z21, Z21, Z21
+
+ MOVQ $15, BX
+ VPBROADCASTB BX, Z2
+
+ MOVB addTo+56(FP), AX
+ IMULQ $-0x1, AX
+ KMOVQ AX, K1
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), AX // number of inputs
+ XORQ R11, R11
+ MOVQ out+24(FP), DX
+ MOVQ 24(DX), CX // CX: &out[1][0]
+ MOVQ (DX), DX // DX: &out[0][0]
+
+loopback_avx512_parallel82:
+ VMOVDQU64.Z (DX), K1, Z4
+ VMOVDQU64.Z (CX), K1, Z5
+
+ LOAD(0x00) // &in[0][0]
+ GALOIS_MUL(Z16, Z24, Z14, Z15, Z4)
+ GALOIS_MUL(Z20, Z27, Z12, Z13, Z5)
+
+ CMPQ AX, $1
+ JE skip_avx512_parallel82
+
+ LOAD(0x18) // &in[1][0]
+ GALOIS_MUL(Z25, Z26, Z14, Z15, Z4)
+ GALOIS_MUL(Z28, Z29, Z12, Z13, Z5)
+
+ CMPQ AX, $2
+ JE skip_avx512_parallel82
+
+ LOAD(0x30) // &in[2][0]
+ GALOIS_MUL(Z17, Z30, Z14, Z15, Z4)
+ GALOIS_MUL(Z21, Z8, Z12, Z13, Z5)
+
+ CMPQ AX, $3
+ JE skip_avx512_parallel82
+
+ LOAD(0x48) // &in[3][0]
+ GALOIS_MUL(Z31, Z11, Z14, Z15, Z4)
+ GALOIS_MUL(Z9, Z10, Z12, Z13, Z5)
+
+ CMPQ AX, $4
+ JE skip_avx512_parallel82
+
+ LOAD(0x60) // &in[4][0]
+ GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
+
+ CMPQ AX, $5
+ JE skip_avx512_parallel82
+
+ LOAD(0x78) // &in[5][0]
+ GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
+
+ CMPQ AX, $6
+ JE skip_avx512_parallel82
+
+ LOAD(0x90) // &in[6][0]
+ GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
+
+ CMPQ AX, $7
+ JE skip_avx512_parallel82
+
+ LOAD(0xa8) // &in[7][0]
+ GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
+
+skip_avx512_parallel82:
+ VMOVDQU64 Z4, (DX)
+ VMOVDQU64 Z5, (CX)
+
+ ADDQ $64, R11 // in4+=64
+
+ ADDQ $64, DX // out+=64
+ ADDQ $64, CX // out2+=64
+
+ SUBQ $1, R9
+ JNZ loopback_avx512_parallel82
+
+done_avx512_parallel82:
+ VZEROUPPER
+ RET
+
+//
+// Process 4 output rows in parallel from a total of 8 input rows
+//
+// func _galMulAVX512Parallel84(in, out [][]byte, matrix *[matrixSize84]byte, addTo bool)
+TEXT ·_galMulAVX512Parallel84(SB), 7, $0
+ MOVQ in+0(FP), SI
+ MOVQ 8(SI), R9 // R9: len(in)
+ SHRQ $6, R9 // len(in) / 64
+ TESTQ R9, R9
+ JZ done_avx512_parallel84
+
+ MOVQ matrix+48(FP), SI
+ VMOVDQU64 0x000(SI), Z16
+ VMOVDQU64 0x040(SI), Z17
+ VMOVDQU64 0x080(SI), Z18
+ VMOVDQU64 0x0c0(SI), Z19
+ VMOVDQU64 0x100(SI), Z20
+ VMOVDQU64 0x140(SI), Z21
+ VMOVDQU64 0x180(SI), Z22
+ VMOVDQU64 0x1c0(SI), Z23
+ VMOVDQU64 0x200(SI), Z24
+ VMOVDQU64 0x240(SI), Z25
+ VMOVDQU64 0x280(SI), Z26
+ VMOVDQU64 0x2c0(SI), Z27
+ VMOVDQU64 0x300(SI), Z28
+ VMOVDQU64 0x340(SI), Z29
+ VMOVDQU64 0x380(SI), Z30
+ VMOVDQU64 0x3c0(SI), Z31
+
+ MOVQ $15, BX
+ VPBROADCASTB BX, Z2
+
+ MOVB addTo+56(FP), AX
+ IMULQ $-0x1, AX
+ KMOVQ AX, K1
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), AX // number of inputs
+ XORQ R11, R11
+ MOVQ out+24(FP), DX
+ MOVQ 24(DX), CX // CX: &out[1][0]
+ MOVQ 48(DX), R10 // R10: &out[2][0]
+ MOVQ 72(DX), R12 // R12: &out[3][0]
+ MOVQ (DX), DX // DX: &out[0][0]
+
+loopback_avx512_parallel84:
+ VMOVDQU64.Z (DX), K1, Z4
+ VMOVDQU64.Z (CX), K1, Z5
+ VMOVDQU64.Z (R10), K1, Z6
+ VMOVDQU64.Z (R12), K1, Z7
+
+ LOAD(0x00) // &in[0][0]
+ GALOIS(0x00, 0x55, Z16, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z20, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z24, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z28, Z8, Z9, Z7)
+
+ CMPQ AX, $1
+ JE skip_avx512_parallel84
+
+ LOAD(0x18) // &in[1][0]
+ GALOIS(0xaa, 0xff, Z16, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z20, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z24, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z28, Z8, Z9, Z7)
+
+ CMPQ AX, $2
+ JE skip_avx512_parallel84
+
+ LOAD(0x30) // &in[2][0]
+ GALOIS(0x00, 0x55, Z17, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z21, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z25, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z29, Z8, Z9, Z7)
+
+ CMPQ AX, $3
+ JE skip_avx512_parallel84
+
+ LOAD(0x48) // &in[3][0]
+ GALOIS(0xaa, 0xff, Z17, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z21, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z25, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z29, Z8, Z9, Z7)
+
+ CMPQ AX, $4
+ JE skip_avx512_parallel84
+
+ LOAD(0x60) // &in[4][0]
+ GALOIS(0x00, 0x55, Z18, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z22, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z26, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z30, Z8, Z9, Z7)
+
+ CMPQ AX, $5
+ JE skip_avx512_parallel84
+
+ LOAD(0x78) // &in[5][0]
+ GALOIS(0xaa, 0xff, Z18, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z22, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z26, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z30, Z8, Z9, Z7)
+
+ CMPQ AX, $6
+ JE skip_avx512_parallel84
+
+ LOAD(0x90) // &in[6][0]
+ GALOIS(0x00, 0x55, Z19, Z14, Z15, Z4)
+ GALOIS(0x00, 0x55, Z23, Z12, Z13, Z5)
+ GALOIS(0x00, 0x55, Z27, Z10, Z11, Z6)
+ GALOIS(0x00, 0x55, Z31, Z8, Z9, Z7)
+
+ CMPQ AX, $7
+ JE skip_avx512_parallel84
+
+ LOAD(0xa8) // &in[7][0]
+ GALOIS(0xaa, 0xff, Z19, Z14, Z15, Z4)
+ GALOIS(0xaa, 0xff, Z23, Z12, Z13, Z5)
+ GALOIS(0xaa, 0xff, Z27, Z10, Z11, Z6)
+ GALOIS(0xaa, 0xff, Z31, Z8, Z9, Z7)
+
+skip_avx512_parallel84:
+ VMOVDQU64 Z4, (DX)
+ VMOVDQU64 Z5, (CX)
+ VMOVDQU64 Z6, (R10)
+ VMOVDQU64 Z7, (R12)
+
+ ADDQ $64, R11 // in4+=64
+
+ ADDQ $64, DX // out+=64
+ ADDQ $64, CX // out2+=64
+ ADDQ $64, R10 // out3+=64
+ ADDQ $64, R12 // out4+=64
+
+ SUBQ $1, R9
+ JNZ loopback_avx512_parallel84
+
+done_avx512_parallel84:
+ VZEROUPPER
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
new file mode 100644
index 0000000..f757f9d
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@@ -0,0 +1,138 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+//go:noescape
+func galMulSSSE3(low, high, in, out []byte)
+
+//go:noescape
+func galMulSSSE3Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice(in, out []byte)
+
+//go:noescape
+func galMulAVX2Xor_64(low, high, in, out []byte)
+
+//go:noescape
+func galMulAVX2_64(low, high, in, out []byte)
+
+//go:noescape
+func sSE2XorSlice_64(in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulSSSE3(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] = low[l] ^ high[h]
+ }
+}
+
+func galMulSSSE3Xor(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] ^= low[l] ^ high[h]
+ }
+}
+*/
+
+// bigSwitchover is the size where 64 bytes are processed per loop.
+const bigSwitchover = 128
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ if o.useAVX2 {
+ if len(in) >= bigSwitchover {
+ galMulAVX2_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) > 32 {
+ galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ out = out[done:]
+ }
+ } else if o.useSSSE3 {
+ galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mulTable[c][:256]
+ for i := range in {
+ out[i] = mt[in[i]]
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+
+ if o.useAVX2 {
+ if len(in) >= bigSwitchover {
+ galMulAVX2Xor_64(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) >= 32 {
+ galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 5) << 5
+ in = in[done:]
+ out = out[done:]
+ }
+ } else if o.useSSSE3 {
+ galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ out = out[:len(in)]
+ mt := mulTable[c][:256]
+ for i := range in {
+ out[i] ^= mt[in[i]]
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ if o.useSSE2 {
+ if len(in) >= bigSwitchover {
+ sSE2XorSlice_64(in, out)
+ done := (len(in) >> 6) << 6
+ in = in[done:]
+ out = out[done:]
+ }
+ if len(in) >= 16 {
+ sSE2XorSlice(in, out)
+ done := (len(in) >> 4) << 4
+ in = in[done:]
+ out = out[done:]
+ }
+ }
+ out = out[:len(in)]
+ for i := range in {
+ out[i] ^= in[i]
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
new file mode 100644
index 0000000..3501110
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.s
@@ -0,0 +1,368 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
+// and http://jerasure.org/jerasure/gf-complete/tree/master
+
+// func galMulSSSE3Xor(low, high, in, out []byte)
+TEXT ·galMulSSSE3Xor(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVOU (SI), X6 // X6 low
+ MOVOU (DX), X7 // X7: high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X8
+ PXOR X5, X5
+ MOVQ in+48(FP), SI // R11: &in
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+ MOVQ out+72(FP), DX // DX: &out
+ PSHUFB X5, X8 // X8: lomask (unpacked)
+ SHRQ $4, R9 // len(in) / 16
+ MOVQ SI, AX
+ MOVQ DX, BX
+ ANDQ $15, AX
+ ANDQ $15, BX
+ CMPQ R9, $0
+ JEQ done_xor
+ ORQ AX, BX
+ CMPQ BX, $0
+ JNZ loopback_xor
+
+loopback_xor_aligned:
+ MOVOA (SI), X0 // in[x]
+ MOVOA (DX), X4 // out[x]
+ MOVOA X0, X1 // in[x]
+ MOVOA X6, X2 // low copy
+ MOVOA X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ PXOR X4, X3 // X3: Result xor existing out
+ MOVOA X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_xor_aligned
+ JMP done_xor
+
+loopback_xor:
+ MOVOU (SI), X0 // in[x]
+ MOVOU (DX), X4 // out[x]
+ MOVOU X0, X1 // in[x]
+ MOVOU X6, X2 // low copy
+ MOVOU X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ PXOR X4, X3 // X3: Result xor existing out
+ MOVOU X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_xor
+
+done_xor:
+ RET
+
+// func galMulSSSE3(low, high, in, out []byte)
+TEXT ·galMulSSSE3(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVOU (SI), X6 // X6 low
+ MOVOU (DX), X7 // X7: high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X8
+ PXOR X5, X5
+ MOVQ in+48(FP), SI // R11: &in
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+ MOVQ out+72(FP), DX // DX: &out
+ PSHUFB X5, X8 // X8: lomask (unpacked)
+ MOVQ SI, AX
+ MOVQ DX, BX
+ SHRQ $4, R9 // len(in) / 16
+ ANDQ $15, AX
+ ANDQ $15, BX
+ CMPQ R9, $0
+ JEQ done
+ ORQ AX, BX
+ CMPQ BX, $0
+ JNZ loopback
+
+loopback_aligned:
+ MOVOA (SI), X0 // in[x]
+ MOVOA X0, X1 // in[x]
+ MOVOA X6, X2 // low copy
+ MOVOA X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ MOVOA X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_aligned
+ JMP done
+
+loopback:
+ MOVOU (SI), X0 // in[x]
+ MOVOU X0, X1 // in[x]
+ MOVOA X6, X2 // low copy
+ MOVOA X7, X3 // high copy
+ PSRLQ $4, X1 // X1: high input
+ PAND X8, X0 // X0: low input
+ PAND X8, X1 // X0: high input
+ PSHUFB X0, X2 // X2: mul low part
+ PSHUFB X1, X3 // X3: mul high part
+ PXOR X2, X3 // X3: Result
+ MOVOU X3, (DX) // Store
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback
+
+done:
+ RET
+
+// func galMulAVX2Xor(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $5, R9 // len(in) / 32
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_xor_avx2
+
+loopback_xor_avx2:
+ VMOVDQU (SI), Y0
+ VMOVDQU (DX), Y4
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y1, Y1 // Y1: high input
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPXOR Y3, Y2, Y3 // Y3: Result
+ VPXOR Y4, Y3, Y4 // Y4: Result
+ VMOVDQU Y4, (DX)
+
+ ADDQ $32, SI // in+=32
+ ADDQ $32, DX // out+=32
+ SUBQ $1, R9
+ JNZ loopback_xor_avx2
+
+done_xor_avx2:
+ VZEROUPPER
+ RET
+
+// func galMulAVX2(low, high, in, out []byte)
+TEXT ·galMulAVX2(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $5, R9 // len(in) / 32
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_avx2
+
+loopback_avx2:
+ VMOVDQU (SI), Y0
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y1, Y1 // Y1: high input
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPXOR Y3, Y2, Y4 // Y4: Result
+ VMOVDQU Y4, (DX)
+
+ ADDQ $32, SI // in+=32
+ ADDQ $32, DX // out+=32
+ SUBQ $1, R9
+ JNZ loopback_avx2
+
+done_avx2:
+ VZEROUPPER
+ RET
+
+// func sSE2XorSlice(in, out []byte)
+TEXT ·sSE2XorSlice(SB), 7, $0
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), R9 // R9: len(in)
+ MOVQ out+24(FP), DX // DX: &out
+ SHRQ $4, R9 // len(in) / 16
+ CMPQ R9, $0
+ JEQ done_xor_sse2
+
+loopback_xor_sse2:
+ MOVOU (SI), X0 // in[x]
+ MOVOU (DX), X1 // out[x]
+ PXOR X0, X1
+ MOVOU X1, (DX)
+ ADDQ $16, SI // in+=16
+ ADDQ $16, DX // out+=16
+ SUBQ $1, R9
+ JNZ loopback_xor_sse2
+
+done_xor_sse2:
+ RET
+
+// func galMulAVX2Xor_64(low, high, in, out []byte)
+TEXT ·galMulAVX2Xor_64(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $6, R9 // len(in) / 64
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_xor_avx2_64
+
+loopback_xor_avx2_64:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y10
+ VMOVDQU (DX), Y4
+ VMOVDQU 32(DX), Y14
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPSRLQ $4, Y10, Y11 // Y11: high input 2
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y10, Y10 // Y10: low input 2
+ VPAND Y8, Y1, Y1 // Y11: high input
+ VPAND Y8, Y11, Y11 // Y11: high input 2
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
+ VPXOR Y3, Y2, Y3 // Y3: Result
+ VPXOR Y13, Y12, Y13 // Y13: Result 2
+ VPXOR Y4, Y3, Y4 // Y4: Result
+ VPXOR Y14, Y13, Y14 // Y4: Result 2
+ VMOVDQU Y4, (DX)
+ VMOVDQU Y14, 32(DX)
+
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_xor_avx2_64
+
+done_xor_avx2_64:
+ VZEROUPPER
+ RET
+
+// func galMulAVX2_64(low, high, in, out []byte)
+TEXT ·galMulAVX2_64(SB), 7, $0
+ MOVQ low+0(FP), SI // SI: &low
+ MOVQ high+24(FP), DX // DX: &high
+ MOVQ $15, BX // BX: low mask
+ MOVQ BX, X5
+ MOVOU (SI), X6 // X6: low
+ MOVOU (DX), X7 // X7: high
+ MOVQ in_len+56(FP), R9 // R9: len(in)
+
+ VINSERTI128 $1, X6, Y6, Y6 // low
+ VINSERTI128 $1, X7, Y7, Y7 // high
+ VPBROADCASTB X5, Y8 // Y8: lomask (unpacked)
+
+ SHRQ $6, R9 // len(in) / 64
+ MOVQ out+72(FP), DX // DX: &out
+ MOVQ in+48(FP), SI // SI: &in
+ TESTQ R9, R9
+ JZ done_avx2_64
+
+loopback_avx2_64:
+ VMOVDQU (SI), Y0
+ VMOVDQU 32(SI), Y10
+ VPSRLQ $4, Y0, Y1 // Y1: high input
+ VPSRLQ $4, Y10, Y11 // Y11: high input 2
+ VPAND Y8, Y0, Y0 // Y0: low input
+ VPAND Y8, Y10, Y10 // Y10: low input
+ VPAND Y8, Y1, Y1 // Y1: high input
+ VPAND Y8, Y11, Y11 // Y11: high input 2
+ VPSHUFB Y0, Y6, Y2 // Y2: mul low part
+ VPSHUFB Y10, Y6, Y12 // Y12: mul low part 2
+ VPSHUFB Y1, Y7, Y3 // Y3: mul high part
+ VPSHUFB Y11, Y7, Y13 // Y13: mul high part 2
+ VPXOR Y3, Y2, Y4 // Y4: Result
+ VPXOR Y13, Y12, Y14 // Y14: Result 2
+ VMOVDQU Y4, (DX)
+ VMOVDQU Y14, 32(DX)
+
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_avx2_64
+
+done_avx2_64:
+ VZEROUPPER
+ RET
+
+// func sSE2XorSlice_64(in, out []byte)
+TEXT ·sSE2XorSlice_64(SB), 7, $0
+ MOVQ in+0(FP), SI // SI: &in
+ MOVQ in_len+8(FP), R9 // R9: len(in)
+ MOVQ out+24(FP), DX // DX: &out
+ SHRQ $6, R9 // len(in) / 64
+ CMPQ R9, $0
+ JEQ done_xor_sse2_64
+
+loopback_xor_sse2_64:
+ MOVOU (SI), X0 // in[x]
+ MOVOU 16(SI), X2 // in[x]
+ MOVOU 32(SI), X4 // in[x]
+ MOVOU 48(SI), X6 // in[x]
+ MOVOU (DX), X1 // out[x]
+ MOVOU 16(DX), X3 // out[x]
+ MOVOU 32(DX), X5 // out[x]
+ MOVOU 48(DX), X7 // out[x]
+ PXOR X0, X1
+ PXOR X2, X3
+ PXOR X4, X5
+ PXOR X6, X7
+ MOVOU X1, (DX)
+ MOVOU X3, 16(DX)
+ MOVOU X5, 32(DX)
+ MOVOU X7, 48(DX)
+ ADDQ $64, SI // in+=64
+ ADDQ $64, DX // out+=64
+ SUBQ $1, R9
+ JNZ loopback_xor_sse2_64
+
+done_xor_sse2_64:
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.go b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
new file mode 100644
index 0000000..23a1dd2
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.go
@@ -0,0 +1,67 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func galMulNEON(low, high, in, out []byte)
+
+//go:noescape
+func galMulXorNEON(low, high, in, out []byte)
+
+//go:noescape
+func galXorNEON(in, out []byte)
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ var done int
+ galMulNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done = (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] = mt[in[i]]
+ }
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+ var done int
+ galMulXorNEON(mulTableLow[c][:], mulTableHigh[c][:], in, out)
+ done = (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] ^= mt[in[i]]
+ }
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+
+ galXorNEON(in, out)
+ done := (len(in) >> 5) << 5
+
+ remain := len(in) - done
+ if remain > 0 {
+ for i := done; i < len(in); i++ {
+ out[i] ^= in[i]
+ }
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_arm64.s b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
new file mode 100644
index 0000000..d2cac2c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_arm64.s
@@ -0,0 +1,125 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2017, Minio, Inc.
+
+#define LOAD(LO1, LO2, HI1, HI2) \
+ VLD1.P 32(R1), [LO1.B16, LO2.B16] \
+ \
+ \ // Get low input and high input
+ VUSHR $4, LO1.B16, HI1.B16 \
+ VUSHR $4, LO2.B16, HI2.B16 \
+ VAND V8.B16, LO1.B16, LO1.B16 \
+ VAND V8.B16, LO2.B16, LO2.B16
+
+#define GALOIS_MUL(MUL_LO, MUL_HI, OUT1, OUT2, TMP1, TMP2) \
+ \ // Mul low part and mul high part
+ VTBL V0.B16, [MUL_LO.B16], OUT1.B16 \
+ VTBL V10.B16, [MUL_HI.B16], OUT2.B16 \
+ VTBL V1.B16, [MUL_LO.B16], TMP1.B16 \
+ VTBL V11.B16, [MUL_HI.B16], TMP2.B16 \
+ \
+ \ // Combine results
+ VEOR OUT2.B16, OUT1.B16, OUT1.B16 \
+ VEOR TMP2.B16, TMP1.B16, OUT2.B16
+
+// func galMulNEON(low, high, in, out []byte)
+TEXT ·galMulNEON(SB), 7, $0
+ MOVD in_base+48(FP), R1
+ MOVD in_len+56(FP), R2 // length of message
+ MOVD out_base+72(FP), R5
+ SUBS $32, R2
+ BMI complete
+
+ MOVD low+0(FP), R10 // R10: &low
+ MOVD high+24(FP), R11 // R11: &high
+ VLD1 (R10), [V6.B16]
+ VLD1 (R11), [V7.B16]
+
+ //
+ // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
+ // WORD $0x4e010c68 // dup v8.16b, w3
+ //
+ MOVD $0x0f, R3
+ VMOV R3, V8.B[0]
+ VDUP V8.B[0], V8.B16
+
+loop:
+ // Main loop
+ LOAD(V0, V1, V10, V11)
+ GALOIS_MUL(V6, V7, V4, V5, V14, V15)
+
+ // Store result
+ VST1.P [V4.D2, V5.D2], 32(R5)
+
+ SUBS $32, R2
+ BPL loop
+
+complete:
+ RET
+
+// func galMulXorNEON(low, high, in, out []byte)
+TEXT ·galMulXorNEON(SB), 7, $0
+ MOVD in_base+48(FP), R1
+ MOVD in_len+56(FP), R2 // length of message
+ MOVD out_base+72(FP), R5
+ SUBS $32, R2
+ BMI completeXor
+
+ MOVD low+0(FP), R10 // R10: &low
+ MOVD high+24(FP), R11 // R11: &high
+ VLD1 (R10), [V6.B16]
+ VLD1 (R11), [V7.B16]
+
+ //
+ // Use an extra instruction below since `VDUP R3, V8.B16` generates assembler error
+ // WORD $0x4e010c68 // dup v8.16b, w3
+ //
+ MOVD $0x0f, R3
+ VMOV R3, V8.B[0]
+ VDUP V8.B[0], V8.B16
+
+loopXor:
+ // Main loop
+ VLD1 (R5), [V20.B16, V21.B16]
+
+ LOAD(V0, V1, V10, V11)
+ GALOIS_MUL(V6, V7, V4, V5, V14, V15)
+
+ VEOR V20.B16, V4.B16, V4.B16
+ VEOR V21.B16, V5.B16, V5.B16
+
+ // Store result
+ VST1.P [V4.D2, V5.D2], 32(R5)
+
+ SUBS $32, R2
+ BPL loopXor
+
+completeXor:
+ RET
+
+// func galXorNEON(in, out []byte)
+TEXT ·galXorNEON(SB), 7, $0
+ MOVD in_base+0(FP), R1
+ MOVD in_len+8(FP), R2 // length of message
+ MOVD out_base+24(FP), R5
+ SUBS $32, R2
+ BMI completeXor
+
+loopXor:
+ // Main loop
+ VLD1.P 32(R1), [V0.B16, V1.B16]
+ VLD1 (R5), [V20.B16, V21.B16]
+
+ VEOR V20.B16, V0.B16, V4.B16
+ VEOR V21.B16, V1.B16, V5.B16
+
+ // Store result
+ VST1.P [V4.D2, V5.D2], 32(R5)
+
+ SUBS $32, R2
+ BPL loopXor
+
+completeXor:
+ RET
+
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
new file mode 100644
index 0000000..edd6376
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.go
@@ -0,0 +1,408 @@
+// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build !nogen
+// +build gc
+
+package reedsolomon
+
+// mulAvxTwo_1x1 takes 1 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x2 takes 1 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x3 takes 1 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x4 takes 1 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x5 takes 1 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x6 takes 1 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x7 takes 1 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_1x8 takes 1 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x1 takes 2 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x2 takes 2 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x3 takes 2 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x4 takes 2 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x5 takes 2 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x6 takes 2 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x7 takes 2 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_2x8 takes 2 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x1 takes 3 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x2 takes 3 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x3 takes 3 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x4 takes 3 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x5 takes 3 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x6 takes 3 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x7 takes 3 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_3x8 takes 3 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x1 takes 4 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x2 takes 4 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x3 takes 4 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x4 takes 4 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x5 takes 4 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x6 takes 4 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x7 takes 4 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_4x8 takes 4 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x1 takes 5 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x2 takes 5 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x3 takes 5 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x4 takes 5 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x5 takes 5 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x6 takes 5 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x7 takes 5 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_5x8 takes 5 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x1 takes 6 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x2 takes 6 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x3 takes 6 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x4 takes 6 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x5 takes 6 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x6 takes 6 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x7 takes 6 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_6x8 takes 6 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x1 takes 7 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x2 takes 7 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x3 takes 7 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x4 takes 7 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x5 takes 7 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x6 takes 7 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x7 takes 7 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_7x8 takes 7 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x1 takes 8 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x2 takes 8 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x3 takes 8 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x4 takes 8 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x5 takes 8 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x6 takes 8 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x7 takes 8 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_8x8 takes 8 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x1 takes 9 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x2 takes 9 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x3 takes 9 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x4 takes 9 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x5 takes 9 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x6 takes 9 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x7 takes 9 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_9x8 takes 9 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x1 takes 10 inputs and produces 1 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x2 takes 10 inputs and produces 2 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x3 takes 10 inputs and produces 3 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x4 takes 10 inputs and produces 4 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x5 takes 10 inputs and produces 5 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x6 takes 10 inputs and produces 6 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x7 takes 10 inputs and produces 7 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+
+// mulAvxTwo_10x8 takes 10 inputs and produces 8 outputs.
+// The output is initialized to 0.
+//go:noescape
+func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
new file mode 100644
index 0000000..c76db3c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_amd64.s
@@ -0,0 +1,18526 @@
+// Code generated by command: go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build !nogen
+// +build gc
+
+// func mulAvxTwo_1x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 6 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ $0x0000000f, BX
+ MOVQ BX, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), BX
+
+mulAvxTwo_1x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (CX)(BX*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y3, Y4, Y4
+ VPAND Y3, Y5, Y5
+ VPSHUFB Y4, Y1, Y4
+ VPSHUFB Y5, Y2, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(BX*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, BX
+ DECQ AX
+ JNZ mulAvxTwo_1x1_loop
+ VZEROUPPER
+
+mulAvxTwo_1x1_end:
+ RET
+
+// func mulAvxTwo_1x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x2(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 11 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), BP
+
+mulAvxTwo_1x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (CX)(BP*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VPSHUFB Y9, Y2, Y7
+ VPSHUFB Y10, Y3, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VPSHUFB Y9, Y4, Y7
+ VPSHUFB Y10, Y5, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(BP*1)
+ VMOVDQU Y1, (DX)(BP*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, BP
+ DECQ AX
+ JNZ mulAvxTwo_1x2_loop
+ VZEROUPPER
+
+mulAvxTwo_1x2_end:
+ RET
+
+// func mulAvxTwo_1x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x3(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X9
+ VPBROADCASTB X9, Y9
+ MOVQ start+72(FP), SI
+
+mulAvxTwo_1x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (CX)(SI*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y9, Y12, Y12
+ VPAND Y9, Y13, Y13
+ VPSHUFB Y12, Y3, Y10
+ VPSHUFB Y13, Y4, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+ VPSHUFB Y12, Y5, Y10
+ VPSHUFB Y13, Y6, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y1, Y1
+ VPSHUFB Y12, Y7, Y10
+ VPSHUFB Y13, Y8, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(SI*1)
+ VMOVDQU Y1, (BP)(SI*1)
+ VMOVDQU Y2, (DX)(SI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, SI
+ DECQ AX
+ JNZ mulAvxTwo_1x3_loop
+ VZEROUPPER
+
+mulAvxTwo_1x3_end:
+ RET
+
+// func mulAvxTwo_1x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 17 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), DI
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_1x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (DI)(R8*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R8*1)
+ VMOVDQU Y1, (BP)(R8*1)
+ VMOVDQU Y2, (SI)(R8*1)
+ VMOVDQU Y3, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_1x4_loop
+ VZEROUPPER
+
+mulAvxTwo_1x4_end:
+ RET
+
+// func mulAvxTwo_1x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R8
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_1x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R8)(R9*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (BP)(R9*1)
+ VMOVDQU Y2, (SI)(R9*1)
+ VMOVDQU Y3, (DI)(R9*1)
+ VMOVDQU Y4, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_1x5_loop
+ VZEROUPPER
+
+mulAvxTwo_1x5_end:
+ RET
+
+// func mulAvxTwo_1x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R9
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_1x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R9)(R10*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (SI)(R10*1)
+ VMOVDQU Y3, (DI)(R10*1)
+ VMOVDQU Y4, (R8)(R10*1)
+ VMOVDQU Y5, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_1x6_loop
+ VZEROUPPER
+
+mulAvxTwo_1x6_end:
+ RET
+
+// func mulAvxTwo_1x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R10
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_1x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R10)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DI)(R11*1)
+ VMOVDQU Y4, (R8)(R11*1)
+ VMOVDQU Y5, (R9)(R11*1)
+ VMOVDQU Y6, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_1x7_loop
+ VZEROUPPER
+
+mulAvxTwo_1x7_end:
+ RET
+
+// func mulAvxTwo_1x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_1x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 29 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_1x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R11
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_1x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R11)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (R8)(R12*1)
+ VMOVDQU Y5, (R9)(R12*1)
+ VMOVDQU Y6, (R10)(R12*1)
+ VMOVDQU Y7, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_1x8_loop
+ VZEROUPPER
+
+mulAvxTwo_1x8_end:
+ RET
+
+// func mulAvxTwo_2x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 8 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), CX
+ MOVQ $0x0000000f, BP
+ MOVQ BP, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), BP
+
+mulAvxTwo_2x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(BP*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y1, Y6
+ VPSHUFB Y7, Y2, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (CX)(BP*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y5, Y6, Y6
+ VPAND Y5, Y7, Y7
+ VPSHUFB Y6, Y3, Y6
+ VPSHUFB Y7, Y4, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(BP*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, BP
+ DECQ AX
+ JNZ mulAvxTwo_2x1_loop
+ VZEROUPPER
+
+mulAvxTwo_2x1_end:
+ RET
+
+// func mulAvxTwo_2x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x2(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 15 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BP
+ MOVQ 24(CX), CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X10
+ VPBROADCASTB X10, Y10
+ MOVQ start+72(FP), SI
+
+mulAvxTwo_2x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (BP)(SI*1), Y13
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y2, Y11
+ VPSHUFB Y14, Y3, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y0, Y0
+ VPSHUFB Y13, Y4, Y11
+ VPSHUFB Y14, Y5, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (CX)(SI*1), Y13
+ VPSRLQ $0x04, Y13, Y14
+ VPAND Y10, Y13, Y13
+ VPAND Y10, Y14, Y14
+ VPSHUFB Y13, Y6, Y11
+ VPSHUFB Y14, Y7, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y0, Y0
+ VPSHUFB Y13, Y8, Y11
+ VPSHUFB Y14, Y9, Y12
+ VPXOR Y11, Y12, Y11
+ VPXOR Y11, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(SI*1)
+ VMOVDQU Y1, (DX)(SI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, SI
+ DECQ AX
+ JNZ mulAvxTwo_2x2_loop
+ VZEROUPPER
+
+mulAvxTwo_2x2_end:
+ RET
+
+// func mulAvxTwo_2x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), SI
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_2x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R8*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI)(R8*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R8*1)
+ VMOVDQU Y1, (BP)(R8*1)
+ VMOVDQU Y2, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_2x3_loop
+ VZEROUPPER
+
+mulAvxTwo_2x3_end:
+ RET
+
+// func mulAvxTwo_2x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 25 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), DI
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_2x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R9*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (DI)(R9*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (BP)(R9*1)
+ VMOVDQU Y2, (SI)(R9*1)
+ VMOVDQU Y3, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_2x4_loop
+ VZEROUPPER
+
+mulAvxTwo_2x4_end:
+ RET
+
+// func mulAvxTwo_2x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 30 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R8
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_2x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R10*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R8)(R10*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (SI)(R10*1)
+ VMOVDQU Y3, (DI)(R10*1)
+ VMOVDQU Y4, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_2x5_loop
+ VZEROUPPER
+
+mulAvxTwo_2x5_end:
+ RET
+
+// func mulAvxTwo_2x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R9
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_2x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R9)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DI)(R11*1)
+ VMOVDQU Y4, (R8)(R11*1)
+ VMOVDQU Y5, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_2x6_loop
+ VZEROUPPER
+
+mulAvxTwo_2x6_end:
+ RET
+
+// func mulAvxTwo_2x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R10
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_2x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R10)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (R8)(R12*1)
+ VMOVDQU Y5, (R9)(R12*1)
+ VMOVDQU Y6, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_2x7_loop
+ VZEROUPPER
+
+mulAvxTwo_2x7_end:
+ RET
+
+// func mulAvxTwo_2x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_2x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 45 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_2x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R11
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_2x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R12)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (R11)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (R8)(R13*1)
+ VMOVDQU Y5, (R9)(R13*1)
+ VMOVDQU Y6, (R10)(R13*1)
+ VMOVDQU Y7, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_2x8_loop
+ VZEROUPPER
+
+mulAvxTwo_2x8_end:
+ RET
+
+// func mulAvxTwo_3x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 10 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), CX
+ MOVQ $0x0000000f, SI
+ MOVQ SI, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), SI
+
+mulAvxTwo_3x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(SI*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y1, Y8
+ VPSHUFB Y9, Y2, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(SI*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y3, Y8
+ VPSHUFB Y9, Y4, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (CX)(SI*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y7, Y8, Y8
+ VPAND Y7, Y9, Y9
+ VPSHUFB Y8, Y5, Y8
+ VPSHUFB Y9, Y6, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(SI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, SI
+ DECQ AX
+ JNZ mulAvxTwo_3x1_loop
+ VZEROUPPER
+
+mulAvxTwo_3x1_end:
+ RET
+
+// func mulAvxTwo_3x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 19 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), BP
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_3x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R8*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R8*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (BP)(R8*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R8*1)
+ VMOVDQU Y1, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_3x2_loop
+ VZEROUPPER
+
+mulAvxTwo_3x2_end:
+ RET
+
+// func mulAvxTwo_3x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 26 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), SI
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_3x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R9*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R9*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (SI)(R9*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (BP)(R9*1)
+ VMOVDQU Y2, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_3x3_loop
+ VZEROUPPER
+
+mulAvxTwo_3x3_end:
+ RET
+
+// func mulAvxTwo_3x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 33 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), DI
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_3x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R10*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R10*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI)(R10*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (SI)(R10*1)
+ VMOVDQU Y3, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_3x4_loop
+ VZEROUPPER
+
+mulAvxTwo_3x4_end:
+ RET
+
+// func mulAvxTwo_3x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 40 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R8
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_3x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R11*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R11*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R8)(R11*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DI)(R11*1)
+ VMOVDQU Y4, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_3x5_loop
+ VZEROUPPER
+
+mulAvxTwo_3x5_end:
+ RET
+
+// func mulAvxTwo_3x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R9
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_3x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R9)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (R8)(R12*1)
+ VMOVDQU Y5, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_3x6_loop
+ VZEROUPPER
+
+mulAvxTwo_3x6_end:
+ RET
+
+// func mulAvxTwo_3x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 54 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R10
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_3x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R12)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (R10)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (R8)(R13*1)
+ VMOVDQU Y5, (R9)(R13*1)
+ VMOVDQU Y6, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_3x7_loop
+ VZEROUPPER
+
+mulAvxTwo_3x7_end:
+ RET
+
+// func mulAvxTwo_3x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_3x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 61 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_3x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R11
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_3x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R12)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (R13)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (R11)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (R8)(R14*1)
+ VMOVDQU Y5, (R9)(R14*1)
+ VMOVDQU Y6, (R10)(R14*1)
+ VMOVDQU Y7, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_3x8_loop
+ VZEROUPPER
+
+mulAvxTwo_3x8_end:
+ RET
+
+// func mulAvxTwo_4x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 12 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), CX
+ MOVQ $0x0000000f, DI
+ MOVQ DI, X9
+ VPBROADCASTB X9, Y9
+ MOVQ start+72(FP), DI
+
+mulAvxTwo_4x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y1, Y10
+ VPSHUFB Y11, Y2, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y3, Y10
+ VPSHUFB Y11, Y4, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y5, Y10
+ VPSHUFB Y11, Y6, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (CX)(DI*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y9, Y10, Y10
+ VPAND Y9, Y11, Y11
+ VPSHUFB Y10, Y7, Y10
+ VPSHUFB Y11, Y8, Y11
+ VPXOR Y10, Y11, Y10
+ VPXOR Y10, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(DI*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, DI
+ DECQ AX
+ JNZ mulAvxTwo_4x1_loop
+ VZEROUPPER
+
+mulAvxTwo_4x1_end:
+ RET
+
+// func mulAvxTwo_4x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 23 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), BP
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_4x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (BP)(R9*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R9*1)
+ VMOVDQU Y1, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_4x2_loop
+ VZEROUPPER
+
+mulAvxTwo_4x2_end:
+ RET
+
+// func mulAvxTwo_4x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 32 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), SI
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_4x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (SI)(R10*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (BP)(R10*1)
+ VMOVDQU Y2, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_4x3_loop
+ VZEROUPPER
+
+mulAvxTwo_4x3_end:
+ RET
+
+// func mulAvxTwo_4x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 41 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), DI
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_4x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (DI)(R11*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (SI)(R11*1)
+ VMOVDQU Y3, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_4x4_loop
+ VZEROUPPER
+
+mulAvxTwo_4x4_end:
+ RET
+
+// func mulAvxTwo_4x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R8
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_4x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DI)(R12*1)
+ VMOVDQU Y4, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_4x5_loop
+ VZEROUPPER
+
+mulAvxTwo_4x5_end:
+ RET
+
+// func mulAvxTwo_4x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 59 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R9
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_4x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R12)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R9)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (R8)(R13*1)
+ VMOVDQU Y5, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_4x6_loop
+ VZEROUPPER
+
+mulAvxTwo_4x6_end:
+ RET
+
+// func mulAvxTwo_4x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R10
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_4x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R12)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (R13)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R10)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (R8)(R14*1)
+ VMOVDQU Y5, (R9)(R14*1)
+ VMOVDQU Y6, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_4x7_loop
+ VZEROUPPER
+
+mulAvxTwo_4x7_end:
+ RET
+
+// func mulAvxTwo_4x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_4x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 77 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_4x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), R10
+ MOVQ 168(DX), DX
+ MOVQ in_base+24(FP), R11
+ MOVQ (R11), R12
+ MOVQ 24(R11), R13
+ MOVQ 48(R11), R14
+ MOVQ 72(R11), R11
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_4x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (R12)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (R13)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (R14)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R11)(R15*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (R8)(R15*1)
+ VMOVDQU Y5, (R9)(R15*1)
+ VMOVDQU Y6, (R10)(R15*1)
+ VMOVDQU Y7, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_4x8_loop
+ VZEROUPPER
+
+mulAvxTwo_4x8_end:
+ RET
+
+// func mulAvxTwo_5x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 14 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), CX
+ MOVQ $0x0000000f, R8
+ MOVQ R8, X11
+ VPBROADCASTB X11, Y11
+ MOVQ start+72(FP), R8
+
+mulAvxTwo_5x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y1, Y12
+ VPSHUFB Y13, Y2, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y3, Y12
+ VPSHUFB Y13, Y4, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y5, Y12
+ VPSHUFB Y13, Y6, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y7, Y12
+ VPSHUFB Y13, Y8, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (CX)(R8*1), Y12
+ VPSRLQ $0x04, Y12, Y13
+ VPAND Y11, Y12, Y12
+ VPAND Y11, Y13, Y13
+ VPSHUFB Y12, Y9, Y12
+ VPSHUFB Y13, Y10, Y13
+ VPXOR Y12, Y13, Y12
+ VPXOR Y12, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R8*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R8
+ DECQ AX
+ JNZ mulAvxTwo_5x1_loop
+ VZEROUPPER
+
+mulAvxTwo_5x1_end:
+ RET
+
+// func mulAvxTwo_5x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 27 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), BP
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_5x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (BP)(R10*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R10*1)
+ VMOVDQU Y1, (DX)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_5x2_loop
+ VZEROUPPER
+
+mulAvxTwo_5x2_end:
+ RET
+
+// func mulAvxTwo_5x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 38 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), SI
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_5x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (SI)(R11*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (BP)(R11*1)
+ VMOVDQU Y2, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_5x3_loop
+ VZEROUPPER
+
+mulAvxTwo_5x3_end:
+ RET
+
+// func mulAvxTwo_5x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 49 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), DI
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_5x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (DI)(R12*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (SI)(R12*1)
+ VMOVDQU Y3, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_5x4_loop
+ VZEROUPPER
+
+mulAvxTwo_5x4_end:
+ RET
+
+// func mulAvxTwo_5x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 60 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R8
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_5x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R12)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R8)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DI)(R13*1)
+ VMOVDQU Y4, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_5x5_loop
+ VZEROUPPER
+
+mulAvxTwo_5x5_end:
+ RET
+
+// func mulAvxTwo_5x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 71 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R9
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_5x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R12)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R13)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (R8)(R14*1)
+ VMOVDQU Y5, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_5x6_loop
+ VZEROUPPER
+
+mulAvxTwo_5x6_end:
+ RET
+
+// func mulAvxTwo_5x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 82 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), R9
+ MOVQ 144(DX), DX
+ MOVQ in_base+24(FP), R10
+ MOVQ (R10), R11
+ MOVQ 24(R10), R12
+ MOVQ 48(R10), R13
+ MOVQ 72(R10), R14
+ MOVQ 96(R10), R10
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_5x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (R11)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (R12)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (R13)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R14)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R10)(R15*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (R8)(R15*1)
+ VMOVDQU Y5, (R9)(R15*1)
+ VMOVDQU Y6, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_5x7_loop
+ VZEROUPPER
+
+mulAvxTwo_5x7_end:
+ RET
+
+// func mulAvxTwo_5x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_5x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 93 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_5x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), BX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_5x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (BX)(R9*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R10
+ VMOVDQU Y0, (R10)(R9*1)
+ MOVQ 24(DX), R10
+ VMOVDQU Y1, (R10)(R9*1)
+ MOVQ 48(DX), R10
+ VMOVDQU Y2, (R10)(R9*1)
+ MOVQ 72(DX), R10
+ VMOVDQU Y3, (R10)(R9*1)
+ MOVQ 96(DX), R10
+ VMOVDQU Y4, (R10)(R9*1)
+ MOVQ 120(DX), R10
+ VMOVDQU Y5, (R10)(R9*1)
+ MOVQ 144(DX), R10
+ VMOVDQU Y6, (R10)(R9*1)
+ MOVQ 168(DX), R10
+ VMOVDQU Y7, (R10)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_5x8_loop
+ VZEROUPPER
+
+mulAvxTwo_5x8_end:
+ RET
+
+// func mulAvxTwo_6x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x1(SB), $0-88
+ // Loading all tables to registers
+ // Full registers estimated 16 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ VMOVDQU (CX), Y1
+ VMOVDQU 32(CX), Y2
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VMOVDQU 320(CX), Y11
+ VMOVDQU 352(CX), Y12
+ MOVQ in_base+24(FP), CX
+ MOVQ (CX), BX
+ MOVQ 24(CX), BP
+ MOVQ 48(CX), SI
+ MOVQ 72(CX), DI
+ MOVQ 96(CX), R8
+ MOVQ 120(CX), CX
+ MOVQ $0x0000000f, R9
+ MOVQ R9, X13
+ VPBROADCASTB X13, Y13
+ MOVQ start+72(FP), R9
+
+mulAvxTwo_6x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BX)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y1, Y14
+ VPSHUFB Y15, Y2, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (BP)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y3, Y14
+ VPSHUFB Y15, Y4, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (SI)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y5, Y14
+ VPSHUFB Y15, Y6, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (DI)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y7, Y14
+ VPSHUFB Y15, Y8, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R8)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y9, Y14
+ VPSHUFB Y15, Y10, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (CX)(R9*1), Y14
+ VPSRLQ $0x04, Y14, Y15
+ VPAND Y13, Y14, Y14
+ VPAND Y13, Y15, Y15
+ VPSHUFB Y14, Y11, Y14
+ VPSHUFB Y15, Y12, Y15
+ VPXOR Y14, Y15, Y14
+ VPXOR Y14, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R9*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R9
+ DECQ AX
+ JNZ mulAvxTwo_6x1_loop
+ VZEROUPPER
+
+mulAvxTwo_6x1_end:
+ RET
+
+// func mulAvxTwo_6x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 31 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), BP
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_6x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (BP)(R11*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R11*1)
+ VMOVDQU Y1, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_6x2_loop
+ VZEROUPPER
+
+mulAvxTwo_6x2_end:
+ RET
+
+// func mulAvxTwo_6x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 44 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), SI
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_6x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (SI)(R12*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (BP)(R12*1)
+ VMOVDQU Y2, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_6x3_loop
+ VZEROUPPER
+
+mulAvxTwo_6x3_end:
+ RET
+
+// func mulAvxTwo_6x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 57 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), DI
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_6x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R12)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (DI)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (SI)(R13*1)
+ VMOVDQU Y3, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_6x4_loop
+ VZEROUPPER
+
+mulAvxTwo_6x4_end:
+ RET
+
+// func mulAvxTwo_6x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 70 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R8
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_6x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R12)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R13)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R8)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DI)(R14*1)
+ VMOVDQU Y4, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_6x5_loop
+ VZEROUPPER
+
+mulAvxTwo_6x5_end:
+ RET
+
+// func mulAvxTwo_6x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 83 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), R8
+ MOVQ 120(DX), DX
+ MOVQ in_base+24(FP), R9
+ MOVQ (R9), R10
+ MOVQ 24(R9), R11
+ MOVQ 48(R9), R12
+ MOVQ 72(R9), R13
+ MOVQ 96(R9), R14
+ MOVQ 120(R9), R9
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_6x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (R10)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (R11)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (R12)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R13)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R14)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R9)(R15*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (R8)(R15*1)
+ VMOVDQU Y5, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_6x6_loop
+ VZEROUPPER
+
+mulAvxTwo_6x6_end:
+ RET
+
+// func mulAvxTwo_6x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 96 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), BX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_6x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (BX)(R10*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(DX), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(DX), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(DX), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(DX), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(DX), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(DX), R11
+ VMOVDQU Y6, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_6x7_loop
+ VZEROUPPER
+
+mulAvxTwo_6x7_end:
+ RET
+
+// func mulAvxTwo_6x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_6x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 109 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_6x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), BX
+ MOVQ $0x0000000f, R10
+ MOVQ R10, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R10
+
+mulAvxTwo_6x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (BX)(R10*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R11
+ VMOVDQU Y0, (R11)(R10*1)
+ MOVQ 24(DX), R11
+ VMOVDQU Y1, (R11)(R10*1)
+ MOVQ 48(DX), R11
+ VMOVDQU Y2, (R11)(R10*1)
+ MOVQ 72(DX), R11
+ VMOVDQU Y3, (R11)(R10*1)
+ MOVQ 96(DX), R11
+ VMOVDQU Y4, (R11)(R10*1)
+ MOVQ 120(DX), R11
+ VMOVDQU Y5, (R11)(R10*1)
+ MOVQ 144(DX), R11
+ VMOVDQU Y6, (R11)(R10*1)
+ MOVQ 168(DX), R11
+ VMOVDQU Y7, (R11)(R10*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R10
+ DECQ AX
+ JNZ mulAvxTwo_6x8_loop
+ VZEROUPPER
+
+mulAvxTwo_6x8_end:
+ RET
+
+// func mulAvxTwo_7x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 18 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (BX)(R11*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x1_loop
+ VZEROUPPER
+
+mulAvxTwo_7x1_end:
+ RET
+
+// func mulAvxTwo_7x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 35 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), BP
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_7x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (BP)(R12*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R12*1)
+ VMOVDQU Y1, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_7x2_loop
+ VZEROUPPER
+
+mulAvxTwo_7x2_end:
+ RET
+
+// func mulAvxTwo_7x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 50 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), SI
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_7x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R12)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (SI)(R13*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (BP)(R13*1)
+ VMOVDQU Y2, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_7x3_loop
+ VZEROUPPER
+
+mulAvxTwo_7x3_end:
+ RET
+
+// func mulAvxTwo_7x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 65 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), DI
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_7x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R12)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R13)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (DI)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (SI)(R14*1)
+ VMOVDQU Y3, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_7x4_loop
+ VZEROUPPER
+
+mulAvxTwo_7x4_end:
+ RET
+
+// func mulAvxTwo_7x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 80 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DI
+ MOVQ 96(DX), DX
+ MOVQ in_base+24(FP), R8
+ MOVQ (R8), R9
+ MOVQ 24(R8), R10
+ MOVQ 48(R8), R11
+ MOVQ 72(R8), R12
+ MOVQ 96(R8), R13
+ MOVQ 120(R8), R14
+ MOVQ 144(R8), R8
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_7x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (R9)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (R10)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (R11)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R12)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R13)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R14)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R8)(R15*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DI)(R15*1)
+ VMOVDQU Y4, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_7x5_loop
+ VZEROUPPER
+
+mulAvxTwo_7x5_end:
+ RET
+
+// func mulAvxTwo_7x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 95 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (BX)(R11*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(DX), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(DX), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(DX), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(DX), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(DX), R12
+ VMOVDQU Y5, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x6_loop
+ VZEROUPPER
+
+mulAvxTwo_7x6_end:
+ RET
+
+// func mulAvxTwo_7x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (BX)(R11*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(DX), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(DX), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(DX), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(DX), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(DX), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(DX), R12
+ VMOVDQU Y6, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x7_loop
+ VZEROUPPER
+
+mulAvxTwo_7x7_end:
+ RET
+
+// func mulAvxTwo_7x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_7x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 125 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_7x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), BX
+ MOVQ $0x0000000f, R11
+ MOVQ R11, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R11
+
+mulAvxTwo_7x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (BX)(R11*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R12
+ VMOVDQU Y0, (R12)(R11*1)
+ MOVQ 24(DX), R12
+ VMOVDQU Y1, (R12)(R11*1)
+ MOVQ 48(DX), R12
+ VMOVDQU Y2, (R12)(R11*1)
+ MOVQ 72(DX), R12
+ VMOVDQU Y3, (R12)(R11*1)
+ MOVQ 96(DX), R12
+ VMOVDQU Y4, (R12)(R11*1)
+ MOVQ 120(DX), R12
+ VMOVDQU Y5, (R12)(R11*1)
+ MOVQ 144(DX), R12
+ VMOVDQU Y6, (R12)(R11*1)
+ MOVQ 168(DX), R12
+ VMOVDQU Y7, (R12)(R11*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R11
+ DECQ AX
+ JNZ mulAvxTwo_7x8_loop
+ VZEROUPPER
+
+mulAvxTwo_7x8_end:
+ RET
+
+// func mulAvxTwo_8x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 20 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (BX)(R12*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x1_loop
+ VZEROUPPER
+
+mulAvxTwo_8x1_end:
+ RET
+
+// func mulAvxTwo_8x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 39 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), R12
+ MOVQ 168(BP), BP
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_8x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R12)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (BP)(R13*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R13*1)
+ VMOVDQU Y1, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_8x2_loop
+ VZEROUPPER
+
+mulAvxTwo_8x2_end:
+ RET
+
+// func mulAvxTwo_8x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 56 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), SI
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_8x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R12)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R13)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (SI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (BP)(R14*1)
+ VMOVDQU Y2, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_8x3_loop
+ VZEROUPPER
+
+mulAvxTwo_8x3_end:
+ RET
+
+// func mulAvxTwo_8x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 73 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), SI
+ MOVQ 72(DX), DX
+ MOVQ in_base+24(FP), DI
+ MOVQ (DI), R8
+ MOVQ 24(DI), R9
+ MOVQ 48(DI), R10
+ MOVQ 72(DI), R11
+ MOVQ 96(DI), R12
+ MOVQ 120(DI), R13
+ MOVQ 144(DI), R14
+ MOVQ 168(DI), DI
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_8x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (R8)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (R9)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (R10)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R11)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R12)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R13)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R14)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (DI)(R15*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (SI)(R15*1)
+ VMOVDQU Y3, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_8x4_loop
+ VZEROUPPER
+
+mulAvxTwo_8x4_end:
+ RET
+
+// func mulAvxTwo_8x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 90 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BP)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (BX)(R12*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x5_loop
+ VZEROUPPER
+
+mulAvxTwo_8x5_end:
+ RET
+
+// func mulAvxTwo_8x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 107 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (BX)(R12*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(DX), R13
+ VMOVDQU Y5, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x6_loop
+ VZEROUPPER
+
+mulAvxTwo_8x6_end:
+ RET
+
+// func mulAvxTwo_8x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 124 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (BX)(R12*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(DX), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(DX), R13
+ VMOVDQU Y6, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x7_loop
+ VZEROUPPER
+
+mulAvxTwo_8x7_end:
+ RET
+
+// func mulAvxTwo_8x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_8x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 141 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_8x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), BX
+ MOVQ $0x0000000f, R12
+ MOVQ R12, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R12
+
+mulAvxTwo_8x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (BX)(R12*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R13
+ VMOVDQU Y0, (R13)(R12*1)
+ MOVQ 24(DX), R13
+ VMOVDQU Y1, (R13)(R12*1)
+ MOVQ 48(DX), R13
+ VMOVDQU Y2, (R13)(R12*1)
+ MOVQ 72(DX), R13
+ VMOVDQU Y3, (R13)(R12*1)
+ MOVQ 96(DX), R13
+ VMOVDQU Y4, (R13)(R12*1)
+ MOVQ 120(DX), R13
+ VMOVDQU Y5, (R13)(R12*1)
+ MOVQ 144(DX), R13
+ VMOVDQU Y6, (R13)(R12*1)
+ MOVQ 168(DX), R13
+ VMOVDQU Y7, (R13)(R12*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R12
+ DECQ AX
+ JNZ mulAvxTwo_8x8_loop
+ VZEROUPPER
+
+mulAvxTwo_8x8_end:
+ RET
+
+// func mulAvxTwo_9x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 22 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (BX)(R13*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x1_loop
+ VZEROUPPER
+
+mulAvxTwo_9x1_end:
+ RET
+
+// func mulAvxTwo_9x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 43 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), R12
+ MOVQ 168(BP), R13
+ MOVQ 192(BP), BP
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_9x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R12)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R13)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (BP)(R14*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R14*1)
+ VMOVDQU Y1, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_9x2_loop
+ VZEROUPPER
+
+mulAvxTwo_9x2_end:
+ RET
+
+// func mulAvxTwo_9x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 62 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), BP
+ MOVQ 48(DX), DX
+ MOVQ in_base+24(FP), SI
+ MOVQ (SI), DI
+ MOVQ 24(SI), R8
+ MOVQ 48(SI), R9
+ MOVQ 72(SI), R10
+ MOVQ 96(SI), R11
+ MOVQ 120(SI), R12
+ MOVQ 144(SI), R13
+ MOVQ 168(SI), R14
+ MOVQ 192(SI), SI
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_9x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (DI)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (R8)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (R9)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R10)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R11)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R12)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R13)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R14)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (SI)(R15*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (BP)(R15*1)
+ VMOVDQU Y2, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_9x3_loop
+ VZEROUPPER
+
+mulAvxTwo_9x3_end:
+ RET
+
+// func mulAvxTwo_9x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 81 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BP)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R12)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (BX)(R13*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x4_loop
+ VZEROUPPER
+
+mulAvxTwo_9x4_end:
+ RET
+
+// func mulAvxTwo_9x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 100 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BP)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (BX)(R13*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x5_loop
+ VZEROUPPER
+
+mulAvxTwo_9x5_end:
+ RET
+
+// func mulAvxTwo_9x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 119 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (BX)(R13*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(DX), R14
+ VMOVDQU Y5, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x6_loop
+ VZEROUPPER
+
+mulAvxTwo_9x6_end:
+ RET
+
+// func mulAvxTwo_9x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 138 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (BX)(R13*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(DX), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(DX), R14
+ VMOVDQU Y6, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x7_loop
+ VZEROUPPER
+
+mulAvxTwo_9x7_end:
+ RET
+
+// func mulAvxTwo_9x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_9x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 157 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_9x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), BX
+ MOVQ $0x0000000f, R13
+ MOVQ R13, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R13
+
+mulAvxTwo_9x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (BX)(R13*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R14
+ VMOVDQU Y0, (R14)(R13*1)
+ MOVQ 24(DX), R14
+ VMOVDQU Y1, (R14)(R13*1)
+ MOVQ 48(DX), R14
+ VMOVDQU Y2, (R14)(R13*1)
+ MOVQ 72(DX), R14
+ VMOVDQU Y3, (R14)(R13*1)
+ MOVQ 96(DX), R14
+ VMOVDQU Y4, (R14)(R13*1)
+ MOVQ 120(DX), R14
+ VMOVDQU Y5, (R14)(R13*1)
+ MOVQ 144(DX), R14
+ VMOVDQU Y6, (R14)(R13*1)
+ MOVQ 168(DX), R14
+ VMOVDQU Y7, (R14)(R13*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R13
+ DECQ AX
+ JNZ mulAvxTwo_9x8_loop
+ VZEROUPPER
+
+mulAvxTwo_9x8_end:
+ RET
+
+// func mulAvxTwo_10x1(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x1(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 24 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x1_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X1
+ VPBROADCASTB X1, Y1
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x1_loop:
+ // Clear 1 outputs
+ VPXOR Y0, Y0, Y0
+
+ // Load and process 32 bytes from input 0 to 1 outputs
+ VMOVDQU (BP)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU (CX), Y2
+ VMOVDQU 32(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 1 to 1 outputs
+ VMOVDQU (SI)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 64(CX), Y2
+ VMOVDQU 96(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 2 to 1 outputs
+ VMOVDQU (DI)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 128(CX), Y2
+ VMOVDQU 160(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 3 to 1 outputs
+ VMOVDQU (R8)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 192(CX), Y2
+ VMOVDQU 224(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 4 to 1 outputs
+ VMOVDQU (R9)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 256(CX), Y2
+ VMOVDQU 288(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 5 to 1 outputs
+ VMOVDQU (R10)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 320(CX), Y2
+ VMOVDQU 352(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 6 to 1 outputs
+ VMOVDQU (R11)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 384(CX), Y2
+ VMOVDQU 416(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 7 to 1 outputs
+ VMOVDQU (R12)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 448(CX), Y2
+ VMOVDQU 480(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 8 to 1 outputs
+ VMOVDQU (R13)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 512(CX), Y2
+ VMOVDQU 544(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Load and process 32 bytes from input 9 to 1 outputs
+ VMOVDQU (BX)(R14*1), Y4
+ VPSRLQ $0x04, Y4, Y5
+ VPAND Y1, Y4, Y4
+ VPAND Y1, Y5, Y5
+ VMOVDQU 576(CX), Y2
+ VMOVDQU 608(CX), Y3
+ VPSHUFB Y4, Y2, Y2
+ VPSHUFB Y5, Y3, Y3
+ VPXOR Y2, Y3, Y2
+ VPXOR Y2, Y0, Y0
+
+ // Store 1 outputs
+ VMOVDQU Y0, (DX)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x1_loop
+ VZEROUPPER
+
+mulAvxTwo_10x1_end:
+ RET
+
+// func mulAvxTwo_10x2(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x2(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 47 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x2_end
+ MOVQ out_base+48(FP), DX
+ MOVQ (DX), BX
+ MOVQ 24(DX), DX
+ MOVQ in_base+24(FP), BP
+ MOVQ (BP), SI
+ MOVQ 24(BP), DI
+ MOVQ 48(BP), R8
+ MOVQ 72(BP), R9
+ MOVQ 96(BP), R10
+ MOVQ 120(BP), R11
+ MOVQ 144(BP), R12
+ MOVQ 168(BP), R13
+ MOVQ 192(BP), R14
+ MOVQ 216(BP), BP
+ MOVQ $0x0000000f, R15
+ MOVQ R15, X2
+ VPBROADCASTB X2, Y2
+ MOVQ start+72(FP), R15
+
+mulAvxTwo_10x2_loop:
+ // Clear 2 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+
+ // Load and process 32 bytes from input 0 to 2 outputs
+ VMOVDQU (SI)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU (CX), Y3
+ VMOVDQU 32(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 64(CX), Y3
+ VMOVDQU 96(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 1 to 2 outputs
+ VMOVDQU (DI)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 128(CX), Y3
+ VMOVDQU 160(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 192(CX), Y3
+ VMOVDQU 224(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 2 to 2 outputs
+ VMOVDQU (R8)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 256(CX), Y3
+ VMOVDQU 288(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 320(CX), Y3
+ VMOVDQU 352(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 3 to 2 outputs
+ VMOVDQU (R9)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 384(CX), Y3
+ VMOVDQU 416(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 448(CX), Y3
+ VMOVDQU 480(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 4 to 2 outputs
+ VMOVDQU (R10)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 512(CX), Y3
+ VMOVDQU 544(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 576(CX), Y3
+ VMOVDQU 608(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 5 to 2 outputs
+ VMOVDQU (R11)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 640(CX), Y3
+ VMOVDQU 672(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 704(CX), Y3
+ VMOVDQU 736(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 6 to 2 outputs
+ VMOVDQU (R12)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 768(CX), Y3
+ VMOVDQU 800(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 832(CX), Y3
+ VMOVDQU 864(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 7 to 2 outputs
+ VMOVDQU (R13)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 896(CX), Y3
+ VMOVDQU 928(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 960(CX), Y3
+ VMOVDQU 992(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 8 to 2 outputs
+ VMOVDQU (R14)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1024(CX), Y3
+ VMOVDQU 1056(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 1088(CX), Y3
+ VMOVDQU 1120(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Load and process 32 bytes from input 9 to 2 outputs
+ VMOVDQU (BP)(R15*1), Y5
+ VPSRLQ $0x04, Y5, Y6
+ VPAND Y2, Y5, Y5
+ VPAND Y2, Y6, Y6
+ VMOVDQU 1152(CX), Y3
+ VMOVDQU 1184(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y0, Y0
+ VMOVDQU 1216(CX), Y3
+ VMOVDQU 1248(CX), Y4
+ VPSHUFB Y5, Y3, Y3
+ VPSHUFB Y6, Y4, Y4
+ VPXOR Y3, Y4, Y3
+ VPXOR Y3, Y1, Y1
+
+ // Store 2 outputs
+ VMOVDQU Y0, (BX)(R15*1)
+ VMOVDQU Y1, (DX)(R15*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R15
+ DECQ AX
+ JNZ mulAvxTwo_10x2_loop
+ VZEROUPPER
+
+mulAvxTwo_10x2_end:
+ RET
+
+// func mulAvxTwo_10x3(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x3(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 68 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x3_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X3
+ VPBROADCASTB X3, Y3
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x3_loop:
+ // Clear 3 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+
+ // Load and process 32 bytes from input 0 to 3 outputs
+ VMOVDQU (BP)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU (CX), Y4
+ VMOVDQU 32(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 64(CX), Y4
+ VMOVDQU 96(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 128(CX), Y4
+ VMOVDQU 160(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 1 to 3 outputs
+ VMOVDQU (SI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 192(CX), Y4
+ VMOVDQU 224(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 256(CX), Y4
+ VMOVDQU 288(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 320(CX), Y4
+ VMOVDQU 352(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 2 to 3 outputs
+ VMOVDQU (DI)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 384(CX), Y4
+ VMOVDQU 416(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 448(CX), Y4
+ VMOVDQU 480(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 512(CX), Y4
+ VMOVDQU 544(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 3 to 3 outputs
+ VMOVDQU (R8)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 576(CX), Y4
+ VMOVDQU 608(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 640(CX), Y4
+ VMOVDQU 672(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 704(CX), Y4
+ VMOVDQU 736(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 4 to 3 outputs
+ VMOVDQU (R9)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 768(CX), Y4
+ VMOVDQU 800(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 832(CX), Y4
+ VMOVDQU 864(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 896(CX), Y4
+ VMOVDQU 928(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 5 to 3 outputs
+ VMOVDQU (R10)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 960(CX), Y4
+ VMOVDQU 992(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1024(CX), Y4
+ VMOVDQU 1056(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1088(CX), Y4
+ VMOVDQU 1120(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 6 to 3 outputs
+ VMOVDQU (R11)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1152(CX), Y4
+ VMOVDQU 1184(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1216(CX), Y4
+ VMOVDQU 1248(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1280(CX), Y4
+ VMOVDQU 1312(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 7 to 3 outputs
+ VMOVDQU (R12)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1344(CX), Y4
+ VMOVDQU 1376(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1408(CX), Y4
+ VMOVDQU 1440(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1472(CX), Y4
+ VMOVDQU 1504(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 8 to 3 outputs
+ VMOVDQU (R13)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1536(CX), Y4
+ VMOVDQU 1568(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1600(CX), Y4
+ VMOVDQU 1632(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1664(CX), Y4
+ VMOVDQU 1696(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Load and process 32 bytes from input 9 to 3 outputs
+ VMOVDQU (BX)(R14*1), Y6
+ VPSRLQ $0x04, Y6, Y7
+ VPAND Y3, Y6, Y6
+ VPAND Y3, Y7, Y7
+ VMOVDQU 1728(CX), Y4
+ VMOVDQU 1760(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y0, Y0
+ VMOVDQU 1792(CX), Y4
+ VMOVDQU 1824(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y1, Y1
+ VMOVDQU 1856(CX), Y4
+ VMOVDQU 1888(CX), Y5
+ VPSHUFB Y6, Y4, Y4
+ VPSHUFB Y7, Y5, Y5
+ VPXOR Y4, Y5, Y4
+ VPXOR Y4, Y2, Y2
+
+ // Store 3 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x3_loop
+ VZEROUPPER
+
+mulAvxTwo_10x3_end:
+ RET
+
+// func mulAvxTwo_10x4(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x4(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 89 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x4_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X4
+ VPBROADCASTB X4, Y4
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x4_loop:
+ // Clear 4 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+
+ // Load and process 32 bytes from input 0 to 4 outputs
+ VMOVDQU (BP)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU (CX), Y5
+ VMOVDQU 32(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 64(CX), Y5
+ VMOVDQU 96(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 128(CX), Y5
+ VMOVDQU 160(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 192(CX), Y5
+ VMOVDQU 224(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 1 to 4 outputs
+ VMOVDQU (SI)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 256(CX), Y5
+ VMOVDQU 288(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 320(CX), Y5
+ VMOVDQU 352(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 384(CX), Y5
+ VMOVDQU 416(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 448(CX), Y5
+ VMOVDQU 480(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 2 to 4 outputs
+ VMOVDQU (DI)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 512(CX), Y5
+ VMOVDQU 544(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 576(CX), Y5
+ VMOVDQU 608(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 640(CX), Y5
+ VMOVDQU 672(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 704(CX), Y5
+ VMOVDQU 736(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 3 to 4 outputs
+ VMOVDQU (R8)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 768(CX), Y5
+ VMOVDQU 800(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 832(CX), Y5
+ VMOVDQU 864(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 896(CX), Y5
+ VMOVDQU 928(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 960(CX), Y5
+ VMOVDQU 992(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 4 to 4 outputs
+ VMOVDQU (R9)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1024(CX), Y5
+ VMOVDQU 1056(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1088(CX), Y5
+ VMOVDQU 1120(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1152(CX), Y5
+ VMOVDQU 1184(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1216(CX), Y5
+ VMOVDQU 1248(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 5 to 4 outputs
+ VMOVDQU (R10)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1280(CX), Y5
+ VMOVDQU 1312(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1344(CX), Y5
+ VMOVDQU 1376(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1408(CX), Y5
+ VMOVDQU 1440(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1472(CX), Y5
+ VMOVDQU 1504(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 6 to 4 outputs
+ VMOVDQU (R11)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1536(CX), Y5
+ VMOVDQU 1568(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1600(CX), Y5
+ VMOVDQU 1632(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1664(CX), Y5
+ VMOVDQU 1696(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1728(CX), Y5
+ VMOVDQU 1760(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 7 to 4 outputs
+ VMOVDQU (R12)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 1792(CX), Y5
+ VMOVDQU 1824(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 1856(CX), Y5
+ VMOVDQU 1888(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 1920(CX), Y5
+ VMOVDQU 1952(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 1984(CX), Y5
+ VMOVDQU 2016(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 8 to 4 outputs
+ VMOVDQU (R13)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2048(CX), Y5
+ VMOVDQU 2080(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 2112(CX), Y5
+ VMOVDQU 2144(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 2176(CX), Y5
+ VMOVDQU 2208(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 2240(CX), Y5
+ VMOVDQU 2272(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Load and process 32 bytes from input 9 to 4 outputs
+ VMOVDQU (BX)(R14*1), Y7
+ VPSRLQ $0x04, Y7, Y8
+ VPAND Y4, Y7, Y7
+ VPAND Y4, Y8, Y8
+ VMOVDQU 2304(CX), Y5
+ VMOVDQU 2336(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y0, Y0
+ VMOVDQU 2368(CX), Y5
+ VMOVDQU 2400(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y1, Y1
+ VMOVDQU 2432(CX), Y5
+ VMOVDQU 2464(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y2, Y2
+ VMOVDQU 2496(CX), Y5
+ VMOVDQU 2528(CX), Y6
+ VPSHUFB Y7, Y5, Y5
+ VPSHUFB Y8, Y6, Y6
+ VPXOR Y5, Y6, Y5
+ VPXOR Y5, Y3, Y3
+
+ // Store 4 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x4_loop
+ VZEROUPPER
+
+mulAvxTwo_10x4_end:
+ RET
+
+// func mulAvxTwo_10x5(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x5(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 110 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x5_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X5
+ VPBROADCASTB X5, Y5
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x5_loop:
+ // Clear 5 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+
+ // Load and process 32 bytes from input 0 to 5 outputs
+ VMOVDQU (BP)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU (CX), Y6
+ VMOVDQU 32(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 64(CX), Y6
+ VMOVDQU 96(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 128(CX), Y6
+ VMOVDQU 160(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 192(CX), Y6
+ VMOVDQU 224(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 256(CX), Y6
+ VMOVDQU 288(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 1 to 5 outputs
+ VMOVDQU (SI)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 320(CX), Y6
+ VMOVDQU 352(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 384(CX), Y6
+ VMOVDQU 416(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 448(CX), Y6
+ VMOVDQU 480(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 512(CX), Y6
+ VMOVDQU 544(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 576(CX), Y6
+ VMOVDQU 608(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 2 to 5 outputs
+ VMOVDQU (DI)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 640(CX), Y6
+ VMOVDQU 672(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 704(CX), Y6
+ VMOVDQU 736(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 768(CX), Y6
+ VMOVDQU 800(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 832(CX), Y6
+ VMOVDQU 864(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 896(CX), Y6
+ VMOVDQU 928(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 3 to 5 outputs
+ VMOVDQU (R8)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 960(CX), Y6
+ VMOVDQU 992(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1024(CX), Y6
+ VMOVDQU 1056(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1088(CX), Y6
+ VMOVDQU 1120(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1152(CX), Y6
+ VMOVDQU 1184(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1216(CX), Y6
+ VMOVDQU 1248(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 4 to 5 outputs
+ VMOVDQU (R9)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1280(CX), Y6
+ VMOVDQU 1312(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1344(CX), Y6
+ VMOVDQU 1376(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1408(CX), Y6
+ VMOVDQU 1440(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1472(CX), Y6
+ VMOVDQU 1504(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1536(CX), Y6
+ VMOVDQU 1568(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 5 to 5 outputs
+ VMOVDQU (R10)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1600(CX), Y6
+ VMOVDQU 1632(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1664(CX), Y6
+ VMOVDQU 1696(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 1728(CX), Y6
+ VMOVDQU 1760(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 1792(CX), Y6
+ VMOVDQU 1824(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 1856(CX), Y6
+ VMOVDQU 1888(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 6 to 5 outputs
+ VMOVDQU (R11)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 1920(CX), Y6
+ VMOVDQU 1952(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 1984(CX), Y6
+ VMOVDQU 2016(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2048(CX), Y6
+ VMOVDQU 2080(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2112(CX), Y6
+ VMOVDQU 2144(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2176(CX), Y6
+ VMOVDQU 2208(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 7 to 5 outputs
+ VMOVDQU (R12)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2240(CX), Y6
+ VMOVDQU 2272(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2304(CX), Y6
+ VMOVDQU 2336(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2368(CX), Y6
+ VMOVDQU 2400(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2432(CX), Y6
+ VMOVDQU 2464(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2496(CX), Y6
+ VMOVDQU 2528(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 8 to 5 outputs
+ VMOVDQU (R13)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2560(CX), Y6
+ VMOVDQU 2592(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2624(CX), Y6
+ VMOVDQU 2656(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 2688(CX), Y6
+ VMOVDQU 2720(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 2752(CX), Y6
+ VMOVDQU 2784(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 2816(CX), Y6
+ VMOVDQU 2848(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Load and process 32 bytes from input 9 to 5 outputs
+ VMOVDQU (BX)(R14*1), Y8
+ VPSRLQ $0x04, Y8, Y9
+ VPAND Y5, Y8, Y8
+ VPAND Y5, Y9, Y9
+ VMOVDQU 2880(CX), Y6
+ VMOVDQU 2912(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y0, Y0
+ VMOVDQU 2944(CX), Y6
+ VMOVDQU 2976(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y1, Y1
+ VMOVDQU 3008(CX), Y6
+ VMOVDQU 3040(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y2, Y2
+ VMOVDQU 3072(CX), Y6
+ VMOVDQU 3104(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y3, Y3
+ VMOVDQU 3136(CX), Y6
+ VMOVDQU 3168(CX), Y7
+ VPSHUFB Y8, Y6, Y6
+ VPSHUFB Y9, Y7, Y7
+ VPXOR Y6, Y7, Y6
+ VPXOR Y6, Y4, Y4
+
+ // Store 5 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x5_loop
+ VZEROUPPER
+
+mulAvxTwo_10x5_end:
+ RET
+
+// func mulAvxTwo_10x6(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x6(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 131 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x6_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X6
+ VPBROADCASTB X6, Y6
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x6_loop:
+ // Clear 6 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+
+ // Load and process 32 bytes from input 0 to 6 outputs
+ VMOVDQU (BP)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU (CX), Y7
+ VMOVDQU 32(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 64(CX), Y7
+ VMOVDQU 96(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 128(CX), Y7
+ VMOVDQU 160(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 192(CX), Y7
+ VMOVDQU 224(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 256(CX), Y7
+ VMOVDQU 288(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 320(CX), Y7
+ VMOVDQU 352(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 1 to 6 outputs
+ VMOVDQU (SI)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 384(CX), Y7
+ VMOVDQU 416(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 448(CX), Y7
+ VMOVDQU 480(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 512(CX), Y7
+ VMOVDQU 544(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 576(CX), Y7
+ VMOVDQU 608(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 640(CX), Y7
+ VMOVDQU 672(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 704(CX), Y7
+ VMOVDQU 736(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 2 to 6 outputs
+ VMOVDQU (DI)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 768(CX), Y7
+ VMOVDQU 800(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 832(CX), Y7
+ VMOVDQU 864(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 896(CX), Y7
+ VMOVDQU 928(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 960(CX), Y7
+ VMOVDQU 992(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1024(CX), Y7
+ VMOVDQU 1056(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1088(CX), Y7
+ VMOVDQU 1120(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 3 to 6 outputs
+ VMOVDQU (R8)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1152(CX), Y7
+ VMOVDQU 1184(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1216(CX), Y7
+ VMOVDQU 1248(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1280(CX), Y7
+ VMOVDQU 1312(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1344(CX), Y7
+ VMOVDQU 1376(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1408(CX), Y7
+ VMOVDQU 1440(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1472(CX), Y7
+ VMOVDQU 1504(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 4 to 6 outputs
+ VMOVDQU (R9)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1536(CX), Y7
+ VMOVDQU 1568(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1600(CX), Y7
+ VMOVDQU 1632(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 1664(CX), Y7
+ VMOVDQU 1696(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 1728(CX), Y7
+ VMOVDQU 1760(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 1792(CX), Y7
+ VMOVDQU 1824(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 1856(CX), Y7
+ VMOVDQU 1888(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 5 to 6 outputs
+ VMOVDQU (R10)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 1920(CX), Y7
+ VMOVDQU 1952(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 1984(CX), Y7
+ VMOVDQU 2016(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2048(CX), Y7
+ VMOVDQU 2080(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2112(CX), Y7
+ VMOVDQU 2144(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2176(CX), Y7
+ VMOVDQU 2208(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2240(CX), Y7
+ VMOVDQU 2272(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 6 to 6 outputs
+ VMOVDQU (R11)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2304(CX), Y7
+ VMOVDQU 2336(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2368(CX), Y7
+ VMOVDQU 2400(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2432(CX), Y7
+ VMOVDQU 2464(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2496(CX), Y7
+ VMOVDQU 2528(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2560(CX), Y7
+ VMOVDQU 2592(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 2624(CX), Y7
+ VMOVDQU 2656(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 7 to 6 outputs
+ VMOVDQU (R12)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 2688(CX), Y7
+ VMOVDQU 2720(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 2752(CX), Y7
+ VMOVDQU 2784(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 2816(CX), Y7
+ VMOVDQU 2848(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 2880(CX), Y7
+ VMOVDQU 2912(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 2944(CX), Y7
+ VMOVDQU 2976(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3008(CX), Y7
+ VMOVDQU 3040(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 8 to 6 outputs
+ VMOVDQU (R13)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3072(CX), Y7
+ VMOVDQU 3104(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 3136(CX), Y7
+ VMOVDQU 3168(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 3200(CX), Y7
+ VMOVDQU 3232(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 3264(CX), Y7
+ VMOVDQU 3296(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 3328(CX), Y7
+ VMOVDQU 3360(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3392(CX), Y7
+ VMOVDQU 3424(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Load and process 32 bytes from input 9 to 6 outputs
+ VMOVDQU (BX)(R14*1), Y9
+ VPSRLQ $0x04, Y9, Y10
+ VPAND Y6, Y9, Y9
+ VPAND Y6, Y10, Y10
+ VMOVDQU 3456(CX), Y7
+ VMOVDQU 3488(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y0, Y0
+ VMOVDQU 3520(CX), Y7
+ VMOVDQU 3552(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y1, Y1
+ VMOVDQU 3584(CX), Y7
+ VMOVDQU 3616(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y2, Y2
+ VMOVDQU 3648(CX), Y7
+ VMOVDQU 3680(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y3, Y3
+ VMOVDQU 3712(CX), Y7
+ VMOVDQU 3744(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y4, Y4
+ VMOVDQU 3776(CX), Y7
+ VMOVDQU 3808(CX), Y8
+ VPSHUFB Y9, Y7, Y7
+ VPSHUFB Y10, Y8, Y8
+ VPXOR Y7, Y8, Y7
+ VPXOR Y7, Y5, Y5
+
+ // Store 6 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(DX), R15
+ VMOVDQU Y5, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x6_loop
+ VZEROUPPER
+
+mulAvxTwo_10x6_end:
+ RET
+
+// func mulAvxTwo_10x7(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x7(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 152 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x7_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X7
+ VPBROADCASTB X7, Y7
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x7_loop:
+ // Clear 7 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+
+ // Load and process 32 bytes from input 0 to 7 outputs
+ VMOVDQU (BP)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU (CX), Y8
+ VMOVDQU 32(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 64(CX), Y8
+ VMOVDQU 96(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 128(CX), Y8
+ VMOVDQU 160(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 192(CX), Y8
+ VMOVDQU 224(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 256(CX), Y8
+ VMOVDQU 288(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 320(CX), Y8
+ VMOVDQU 352(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 384(CX), Y8
+ VMOVDQU 416(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 1 to 7 outputs
+ VMOVDQU (SI)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 448(CX), Y8
+ VMOVDQU 480(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 512(CX), Y8
+ VMOVDQU 544(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 576(CX), Y8
+ VMOVDQU 608(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 640(CX), Y8
+ VMOVDQU 672(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 704(CX), Y8
+ VMOVDQU 736(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 768(CX), Y8
+ VMOVDQU 800(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 832(CX), Y8
+ VMOVDQU 864(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 2 to 7 outputs
+ VMOVDQU (DI)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 896(CX), Y8
+ VMOVDQU 928(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 960(CX), Y8
+ VMOVDQU 992(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1024(CX), Y8
+ VMOVDQU 1056(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1088(CX), Y8
+ VMOVDQU 1120(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1152(CX), Y8
+ VMOVDQU 1184(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1216(CX), Y8
+ VMOVDQU 1248(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1280(CX), Y8
+ VMOVDQU 1312(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 3 to 7 outputs
+ VMOVDQU (R8)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1344(CX), Y8
+ VMOVDQU 1376(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1408(CX), Y8
+ VMOVDQU 1440(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1472(CX), Y8
+ VMOVDQU 1504(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1536(CX), Y8
+ VMOVDQU 1568(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 1600(CX), Y8
+ VMOVDQU 1632(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 1664(CX), Y8
+ VMOVDQU 1696(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 1728(CX), Y8
+ VMOVDQU 1760(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 4 to 7 outputs
+ VMOVDQU (R9)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 1792(CX), Y8
+ VMOVDQU 1824(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 1856(CX), Y8
+ VMOVDQU 1888(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 1920(CX), Y8
+ VMOVDQU 1952(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 1984(CX), Y8
+ VMOVDQU 2016(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2048(CX), Y8
+ VMOVDQU 2080(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2112(CX), Y8
+ VMOVDQU 2144(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2176(CX), Y8
+ VMOVDQU 2208(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 5 to 7 outputs
+ VMOVDQU (R10)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2240(CX), Y8
+ VMOVDQU 2272(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2304(CX), Y8
+ VMOVDQU 2336(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2368(CX), Y8
+ VMOVDQU 2400(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2432(CX), Y8
+ VMOVDQU 2464(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2496(CX), Y8
+ VMOVDQU 2528(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 2560(CX), Y8
+ VMOVDQU 2592(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 2624(CX), Y8
+ VMOVDQU 2656(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 6 to 7 outputs
+ VMOVDQU (R11)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 2688(CX), Y8
+ VMOVDQU 2720(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 2752(CX), Y8
+ VMOVDQU 2784(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 2816(CX), Y8
+ VMOVDQU 2848(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 2880(CX), Y8
+ VMOVDQU 2912(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 2944(CX), Y8
+ VMOVDQU 2976(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3008(CX), Y8
+ VMOVDQU 3040(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3072(CX), Y8
+ VMOVDQU 3104(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 7 to 7 outputs
+ VMOVDQU (R12)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3136(CX), Y8
+ VMOVDQU 3168(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3200(CX), Y8
+ VMOVDQU 3232(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3264(CX), Y8
+ VMOVDQU 3296(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3328(CX), Y8
+ VMOVDQU 3360(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3392(CX), Y8
+ VMOVDQU 3424(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3456(CX), Y8
+ VMOVDQU 3488(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3520(CX), Y8
+ VMOVDQU 3552(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 8 to 7 outputs
+ VMOVDQU (R13)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 3584(CX), Y8
+ VMOVDQU 3616(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 3648(CX), Y8
+ VMOVDQU 3680(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 3712(CX), Y8
+ VMOVDQU 3744(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 3776(CX), Y8
+ VMOVDQU 3808(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 3840(CX), Y8
+ VMOVDQU 3872(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 3904(CX), Y8
+ VMOVDQU 3936(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 3968(CX), Y8
+ VMOVDQU 4000(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Load and process 32 bytes from input 9 to 7 outputs
+ VMOVDQU (BX)(R14*1), Y10
+ VPSRLQ $0x04, Y10, Y11
+ VPAND Y7, Y10, Y10
+ VPAND Y7, Y11, Y11
+ VMOVDQU 4032(CX), Y8
+ VMOVDQU 4064(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y0, Y0
+ VMOVDQU 4096(CX), Y8
+ VMOVDQU 4128(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y1, Y1
+ VMOVDQU 4160(CX), Y8
+ VMOVDQU 4192(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y2, Y2
+ VMOVDQU 4224(CX), Y8
+ VMOVDQU 4256(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y3, Y3
+ VMOVDQU 4288(CX), Y8
+ VMOVDQU 4320(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y4, Y4
+ VMOVDQU 4352(CX), Y8
+ VMOVDQU 4384(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y5, Y5
+ VMOVDQU 4416(CX), Y8
+ VMOVDQU 4448(CX), Y9
+ VPSHUFB Y10, Y8, Y8
+ VPSHUFB Y11, Y9, Y9
+ VPXOR Y8, Y9, Y8
+ VPXOR Y8, Y6, Y6
+
+ // Store 7 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(DX), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(DX), R15
+ VMOVDQU Y6, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x7_loop
+ VZEROUPPER
+
+mulAvxTwo_10x7_end:
+ RET
+
+// func mulAvxTwo_10x8(matrix []byte, in [][]byte, out [][]byte, start int, n int)
+// Requires: AVX, AVX2, SSE2
+TEXT ·mulAvxTwo_10x8(SB), $0-88
+ // Loading no tables to registers
+ // Full registers estimated 173 YMM used
+ MOVQ n+80(FP), AX
+ MOVQ matrix_base+0(FP), CX
+ SHRQ $0x05, AX
+ TESTQ AX, AX
+ JZ mulAvxTwo_10x8_end
+ MOVQ out_base+48(FP), DX
+ MOVQ in_base+24(FP), BX
+ MOVQ (BX), BP
+ MOVQ 24(BX), SI
+ MOVQ 48(BX), DI
+ MOVQ 72(BX), R8
+ MOVQ 96(BX), R9
+ MOVQ 120(BX), R10
+ MOVQ 144(BX), R11
+ MOVQ 168(BX), R12
+ MOVQ 192(BX), R13
+ MOVQ 216(BX), BX
+ MOVQ $0x0000000f, R14
+ MOVQ R14, X8
+ VPBROADCASTB X8, Y8
+ MOVQ start+72(FP), R14
+
+mulAvxTwo_10x8_loop:
+ // Clear 8 outputs
+ VPXOR Y0, Y0, Y0
+ VPXOR Y1, Y1, Y1
+ VPXOR Y2, Y2, Y2
+ VPXOR Y3, Y3, Y3
+ VPXOR Y4, Y4, Y4
+ VPXOR Y5, Y5, Y5
+ VPXOR Y6, Y6, Y6
+ VPXOR Y7, Y7, Y7
+
+ // Load and process 32 bytes from input 0 to 8 outputs
+ VMOVDQU (BP)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU (CX), Y9
+ VMOVDQU 32(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 64(CX), Y9
+ VMOVDQU 96(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 128(CX), Y9
+ VMOVDQU 160(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 192(CX), Y9
+ VMOVDQU 224(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 256(CX), Y9
+ VMOVDQU 288(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 320(CX), Y9
+ VMOVDQU 352(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 384(CX), Y9
+ VMOVDQU 416(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 448(CX), Y9
+ VMOVDQU 480(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 1 to 8 outputs
+ VMOVDQU (SI)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 512(CX), Y9
+ VMOVDQU 544(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 576(CX), Y9
+ VMOVDQU 608(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 640(CX), Y9
+ VMOVDQU 672(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 704(CX), Y9
+ VMOVDQU 736(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 768(CX), Y9
+ VMOVDQU 800(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 832(CX), Y9
+ VMOVDQU 864(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 896(CX), Y9
+ VMOVDQU 928(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 960(CX), Y9
+ VMOVDQU 992(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 2 to 8 outputs
+ VMOVDQU (DI)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1024(CX), Y9
+ VMOVDQU 1056(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1088(CX), Y9
+ VMOVDQU 1120(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1152(CX), Y9
+ VMOVDQU 1184(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1216(CX), Y9
+ VMOVDQU 1248(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1280(CX), Y9
+ VMOVDQU 1312(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1344(CX), Y9
+ VMOVDQU 1376(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1408(CX), Y9
+ VMOVDQU 1440(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1472(CX), Y9
+ VMOVDQU 1504(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 3 to 8 outputs
+ VMOVDQU (R8)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 1536(CX), Y9
+ VMOVDQU 1568(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 1600(CX), Y9
+ VMOVDQU 1632(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 1664(CX), Y9
+ VMOVDQU 1696(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 1728(CX), Y9
+ VMOVDQU 1760(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 1792(CX), Y9
+ VMOVDQU 1824(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 1856(CX), Y9
+ VMOVDQU 1888(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 1920(CX), Y9
+ VMOVDQU 1952(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 1984(CX), Y9
+ VMOVDQU 2016(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 4 to 8 outputs
+ VMOVDQU (R9)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2048(CX), Y9
+ VMOVDQU 2080(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2112(CX), Y9
+ VMOVDQU 2144(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2176(CX), Y9
+ VMOVDQU 2208(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2240(CX), Y9
+ VMOVDQU 2272(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2304(CX), Y9
+ VMOVDQU 2336(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2368(CX), Y9
+ VMOVDQU 2400(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2432(CX), Y9
+ VMOVDQU 2464(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 2496(CX), Y9
+ VMOVDQU 2528(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 5 to 8 outputs
+ VMOVDQU (R10)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 2560(CX), Y9
+ VMOVDQU 2592(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 2624(CX), Y9
+ VMOVDQU 2656(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 2688(CX), Y9
+ VMOVDQU 2720(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 2752(CX), Y9
+ VMOVDQU 2784(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 2816(CX), Y9
+ VMOVDQU 2848(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 2880(CX), Y9
+ VMOVDQU 2912(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 2944(CX), Y9
+ VMOVDQU 2976(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3008(CX), Y9
+ VMOVDQU 3040(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 6 to 8 outputs
+ VMOVDQU (R11)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3072(CX), Y9
+ VMOVDQU 3104(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3136(CX), Y9
+ VMOVDQU 3168(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3200(CX), Y9
+ VMOVDQU 3232(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3264(CX), Y9
+ VMOVDQU 3296(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3328(CX), Y9
+ VMOVDQU 3360(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3392(CX), Y9
+ VMOVDQU 3424(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3456(CX), Y9
+ VMOVDQU 3488(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 3520(CX), Y9
+ VMOVDQU 3552(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 7 to 8 outputs
+ VMOVDQU (R12)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 3584(CX), Y9
+ VMOVDQU 3616(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 3648(CX), Y9
+ VMOVDQU 3680(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 3712(CX), Y9
+ VMOVDQU 3744(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 3776(CX), Y9
+ VMOVDQU 3808(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 3840(CX), Y9
+ VMOVDQU 3872(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 3904(CX), Y9
+ VMOVDQU 3936(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 3968(CX), Y9
+ VMOVDQU 4000(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4032(CX), Y9
+ VMOVDQU 4064(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 8 to 8 outputs
+ VMOVDQU (R13)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4096(CX), Y9
+ VMOVDQU 4128(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 4160(CX), Y9
+ VMOVDQU 4192(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 4224(CX), Y9
+ VMOVDQU 4256(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 4288(CX), Y9
+ VMOVDQU 4320(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 4352(CX), Y9
+ VMOVDQU 4384(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 4416(CX), Y9
+ VMOVDQU 4448(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 4480(CX), Y9
+ VMOVDQU 4512(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 4544(CX), Y9
+ VMOVDQU 4576(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Load and process 32 bytes from input 9 to 8 outputs
+ VMOVDQU (BX)(R14*1), Y11
+ VPSRLQ $0x04, Y11, Y12
+ VPAND Y8, Y11, Y11
+ VPAND Y8, Y12, Y12
+ VMOVDQU 4608(CX), Y9
+ VMOVDQU 4640(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y0, Y0
+ VMOVDQU 4672(CX), Y9
+ VMOVDQU 4704(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y1, Y1
+ VMOVDQU 4736(CX), Y9
+ VMOVDQU 4768(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y2, Y2
+ VMOVDQU 4800(CX), Y9
+ VMOVDQU 4832(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y3, Y3
+ VMOVDQU 4864(CX), Y9
+ VMOVDQU 4896(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y4, Y4
+ VMOVDQU 4928(CX), Y9
+ VMOVDQU 4960(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y5, Y5
+ VMOVDQU 4992(CX), Y9
+ VMOVDQU 5024(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y6, Y6
+ VMOVDQU 5056(CX), Y9
+ VMOVDQU 5088(CX), Y10
+ VPSHUFB Y11, Y9, Y9
+ VPSHUFB Y12, Y10, Y10
+ VPXOR Y9, Y10, Y9
+ VPXOR Y9, Y7, Y7
+
+ // Store 8 outputs
+ MOVQ (DX), R15
+ VMOVDQU Y0, (R15)(R14*1)
+ MOVQ 24(DX), R15
+ VMOVDQU Y1, (R15)(R14*1)
+ MOVQ 48(DX), R15
+ VMOVDQU Y2, (R15)(R14*1)
+ MOVQ 72(DX), R15
+ VMOVDQU Y3, (R15)(R14*1)
+ MOVQ 96(DX), R15
+ VMOVDQU Y4, (R15)(R14*1)
+ MOVQ 120(DX), R15
+ VMOVDQU Y5, (R15)(R14*1)
+ MOVQ 144(DX), R15
+ VMOVDQU Y6, (R15)(R14*1)
+ MOVQ 168(DX), R15
+ VMOVDQU Y7, (R15)(R14*1)
+
+ // Prepare for next loop
+ ADDQ $0x20, R14
+ DECQ AX
+ JNZ mulAvxTwo_10x8_loop
+ VZEROUPPER
+
+mulAvxTwo_10x8_end:
+ RET
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
new file mode 100644
index 0000000..b4917bc
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_none.go
@@ -0,0 +1,11 @@
+//+build !amd64 noasm appengine gccgo nogen
+
+package reedsolomon
+
+const maxAvx2Inputs = 0
+const maxAvx2Outputs = 0
+const avx2CodeGen = false
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+ panic("avx2 codegen not available")
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
new file mode 100644
index 0000000..0b49a1e
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_gen_switch_amd64.go
@@ -0,0 +1,293 @@
+// Code generated by command: go generate gen.go. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build gc
+// +build !nogen
+
+package reedsolomon
+
+import "fmt"
+
+const avx2CodeGen = true
+const maxAvx2Inputs = 10
+const maxAvx2Outputs = 8
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+ n := stop - start
+ n = (n >> 5) << 5
+
+ switch len(in) {
+ case 1:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_1x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_1x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_1x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_1x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_1x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_1x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_1x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_1x8(matrix, in, out, start, n)
+ return n
+ }
+ case 2:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_2x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_2x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_2x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_2x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_2x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_2x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_2x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_2x8(matrix, in, out, start, n)
+ return n
+ }
+ case 3:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_3x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_3x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_3x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_3x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_3x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_3x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_3x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_3x8(matrix, in, out, start, n)
+ return n
+ }
+ case 4:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_4x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_4x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_4x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_4x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_4x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_4x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_4x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_4x8(matrix, in, out, start, n)
+ return n
+ }
+ case 5:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_5x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_5x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_5x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_5x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_5x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_5x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_5x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_5x8(matrix, in, out, start, n)
+ return n
+ }
+ case 6:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_6x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_6x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_6x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_6x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_6x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_6x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_6x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_6x8(matrix, in, out, start, n)
+ return n
+ }
+ case 7:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_7x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_7x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_7x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_7x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_7x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_7x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_7x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_7x8(matrix, in, out, start, n)
+ return n
+ }
+ case 8:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_8x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_8x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_8x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_8x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_8x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_8x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_8x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_8x8(matrix, in, out, start, n)
+ return n
+ }
+ case 9:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_9x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_9x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_9x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_9x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_9x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_9x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_9x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_9x8(matrix, in, out, start, n)
+ return n
+ }
+ case 10:
+ switch len(out) {
+ case 1:
+ mulAvxTwo_10x1(matrix, in, out, start, n)
+ return n
+ case 2:
+ mulAvxTwo_10x2(matrix, in, out, start, n)
+ return n
+ case 3:
+ mulAvxTwo_10x3(matrix, in, out, start, n)
+ return n
+ case 4:
+ mulAvxTwo_10x4(matrix, in, out, start, n)
+ return n
+ case 5:
+ mulAvxTwo_10x5(matrix, in, out, start, n)
+ return n
+ case 6:
+ mulAvxTwo_10x6(matrix, in, out, start, n)
+ return n
+ case 7:
+ mulAvxTwo_10x7(matrix, in, out, start, n)
+ return n
+ case 8:
+ mulAvxTwo_10x8(matrix, in, out, start, n)
+ return n
+ }
+ }
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
new file mode 100644
index 0000000..1d00e06
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
@@ -0,0 +1,44 @@
+//+build !amd64 noasm appengine gccgo
+//+build !arm64 noasm appengine gccgo
+//+build !ppc64le noasm appengine gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ out = out[:len(in)]
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ mt := mulTable[c][:256]
+ for n, input := range in {
+ out[n] = mt[input]
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ out = out[:len(in)]
+ if c == 1 {
+ for n, input := range in {
+ out[n] ^= input
+ }
+ return
+ }
+ mt := mulTable[c][:256]
+ for n, input := range in {
+ out[n] ^= mt[input]
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ for n, input := range in {
+ out[n] ^= input
+ }
+}
+
+func init() {
+ defaultOptions.useAVX512 = false
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
new file mode 100644
index 0000000..bd15e3a
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_notamd64.go
@@ -0,0 +1,13 @@
+//+build !amd64 noasm appengine gccgo
+
+// Copyright 2020, Klaus Post, see LICENSE for details.
+
+package reedsolomon
+
+func (r *reedSolomon) codeSomeShardsAvx512(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ panic("codeSomeShardsAvx512 should not be called if built without asm")
+}
+
+func (r *reedSolomon) codeSomeShardsAvx512P(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ panic("codeSomeShardsAvx512P should not be called if built without asm")
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
new file mode 100644
index 0000000..70f93d6
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.go
@@ -0,0 +1,75 @@
+//+build !noasm
+//+build !appengine
+//+build !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+package reedsolomon
+
+//go:noescape
+func galMulPpc(low, high, in, out []byte)
+
+//go:noescape
+func galMulPpcXor(low, high, in, out []byte)
+
+// This is what the assembler routines do in blocks of 16 bytes:
+/*
+func galMulPpc(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] = low[l] ^ high[h]
+ }
+}
+func galMulPpcXor(low, high, in, out []byte) {
+ for n, input := range in {
+ l := input & 0xf
+ h := input >> 4
+ out[n] ^= low[l] ^ high[h]
+ }
+}
+*/
+
+func galMulSlice(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ copy(out, in)
+ return
+ }
+ done := (len(in) >> 4) << 4
+ if done > 0 {
+ galMulPpc(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+ }
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] = mt[in[i]]
+ }
+ }
+}
+
+func galMulSliceXor(c byte, in, out []byte, o *options) {
+ if c == 1 {
+ sliceXor(in, out, o)
+ return
+ }
+ done := (len(in) >> 4) << 4
+ if done > 0 {
+ galMulPpcXor(mulTableLow[c][:], mulTableHigh[c][:], in[:done], out)
+ }
+ remain := len(in) - done
+ if remain > 0 {
+ mt := mulTable[c][:256]
+ for i := done; i < len(in); i++ {
+ out[i] ^= mt[in[i]]
+ }
+ }
+}
+
+// slice galois add
+func sliceXor(in, out []byte, o *options) {
+ for n, input := range in {
+ out[n] ^= input
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
new file mode 100644
index 0000000..8838f0c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/galois_ppc64le.s
@@ -0,0 +1,124 @@
+//+build !noasm !appengine !gccgo
+
+// Copyright 2015, Klaus Post, see LICENSE for details.
+// Copyright 2018, Minio, Inc.
+
+#include "textflag.h"
+
+#define LOW R3
+#define HIGH R4
+#define IN R5
+#define LEN R6
+#define OUT R7
+#define CONSTANTS R8
+#define OFFSET R9
+#define OFFSET1 R10
+#define OFFSET2 R11
+
+#define X6 VS34
+#define X6_ V2
+#define X7 VS35
+#define X7_ V3
+#define MSG VS36
+#define MSG_ V4
+#define MSG_HI VS37
+#define MSG_HI_ V5
+#define RESULT VS38
+#define RESULT_ V6
+#define ROTATE VS39
+#define ROTATE_ V7
+#define MASK VS40
+#define MASK_ V8
+#define FLIP VS41
+#define FLIP_ V9
+
+// func galMulPpc(low, high, in, out []byte)
+TEXT ·galMulPpc(SB), NOFRAME|NOSPLIT, $0-96
+ MOVD low+0(FP), LOW
+ MOVD high+24(FP), HIGH
+ MOVD in+48(FP), IN
+ MOVD in_len+56(FP), LEN
+ MOVD out+72(FP), OUT
+
+ MOVD $16, OFFSET1
+ MOVD $32, OFFSET2
+
+ MOVD $·constants(SB), CONSTANTS
+ LXVD2X (CONSTANTS)(R0), ROTATE
+ LXVD2X (CONSTANTS)(OFFSET1), MASK
+ LXVD2X (CONSTANTS)(OFFSET2), FLIP
+
+ LXVD2X (LOW)(R0), X6
+ LXVD2X (HIGH)(R0), X7
+ VPERM X6_, V31, FLIP_, X6_
+ VPERM X7_, V31, FLIP_, X7_
+
+ MOVD $0, OFFSET
+
+loop:
+ LXVD2X (IN)(OFFSET), MSG
+
+ VSRB MSG_, ROTATE_, MSG_HI_
+ VAND MSG_, MASK_, MSG_
+ VPERM X6_, V31, MSG_, MSG_
+ VPERM X7_, V31, MSG_HI_, MSG_HI_
+
+ VXOR MSG_, MSG_HI_, MSG_
+
+ STXVD2X MSG, (OUT)(OFFSET)
+
+ ADD $16, OFFSET, OFFSET
+ CMP LEN, OFFSET
+ BGT loop
+ RET
+
+// func galMulPpcXorlow, high, in, out []byte)
+TEXT ·galMulPpcXor(SB), NOFRAME|NOSPLIT, $0-96
+ MOVD low+0(FP), LOW
+ MOVD high+24(FP), HIGH
+ MOVD in+48(FP), IN
+ MOVD in_len+56(FP), LEN
+ MOVD out+72(FP), OUT
+
+ MOVD $16, OFFSET1
+ MOVD $32, OFFSET2
+
+ MOVD $·constants(SB), CONSTANTS
+ LXVD2X (CONSTANTS)(R0), ROTATE
+ LXVD2X (CONSTANTS)(OFFSET1), MASK
+ LXVD2X (CONSTANTS)(OFFSET2), FLIP
+
+ LXVD2X (LOW)(R0), X6
+ LXVD2X (HIGH)(R0), X7
+ VPERM X6_, V31, FLIP_, X6_
+ VPERM X7_, V31, FLIP_, X7_
+
+ MOVD $0, OFFSET
+
+loopXor:
+ LXVD2X (IN)(OFFSET), MSG
+ LXVD2X (OUT)(OFFSET), RESULT
+
+ VSRB MSG_, ROTATE_, MSG_HI_
+ VAND MSG_, MASK_, MSG_
+ VPERM X6_, V31, MSG_, MSG_
+ VPERM X7_, V31, MSG_HI_, MSG_HI_
+
+ VXOR MSG_, MSG_HI_, MSG_
+ VXOR MSG_, RESULT_, RESULT_
+
+ STXVD2X RESULT, (OUT)(OFFSET)
+
+ ADD $16, OFFSET, OFFSET
+ CMP LEN, OFFSET
+ BGT loopXor
+ RET
+
+DATA ·constants+0x0(SB)/8, $0x0404040404040404
+DATA ·constants+0x8(SB)/8, $0x0404040404040404
+DATA ·constants+0x10(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x18(SB)/8, $0x0f0f0f0f0f0f0f0f
+DATA ·constants+0x20(SB)/8, $0x0706050403020100
+DATA ·constants+0x28(SB)/8, $0x0f0e0d0c0b0a0908
+
+GLOBL ·constants(SB), 8, $48
diff --git a/vendor/github.com/klauspost/reedsolomon/gen.go b/vendor/github.com/klauspost/reedsolomon/gen.go
new file mode 100644
index 0000000..6fc545c
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/gen.go
@@ -0,0 +1,249 @@
+//+build generate
+
+//go:generate go run gen.go -out galois_gen_amd64.s -stubs galois_gen_amd64.go
+//go:generate gofmt -w galois_gen_switch_amd64.go
+
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+
+ . "github.com/mmcloughlin/avo/build"
+ "github.com/mmcloughlin/avo/buildtags"
+ . "github.com/mmcloughlin/avo/operand"
+ "github.com/mmcloughlin/avo/reg"
+)
+
+// Technically we can do slightly bigger, but we stay reasonable.
+const inputMax = 10
+const outputMax = 8
+
+var switchDefs [inputMax][outputMax]string
+var switchDefsX [inputMax][outputMax]string
+
+const perLoopBits = 5
+const perLoop = 1 << perLoopBits
+
+func main() {
+ Constraint(buildtags.Not("appengine").ToConstraint())
+ Constraint(buildtags.Not("noasm").ToConstraint())
+ Constraint(buildtags.Not("nogen").ToConstraint())
+ Constraint(buildtags.Term("gc").ToConstraint())
+
+ for i := 1; i <= inputMax; i++ {
+ for j := 1; j <= outputMax; j++ {
+ //genMulAvx2(fmt.Sprintf("mulAvxTwoXor_%dx%d", i, j), i, j, true)
+ genMulAvx2(fmt.Sprintf("mulAvxTwo_%dx%d", i, j), i, j, false)
+ }
+ }
+ f, err := os.Create("galois_gen_switch_amd64.go")
+ if err != nil {
+ panic(err)
+ }
+ defer f.Close()
+ w := bufio.NewWriter(f)
+ defer w.Flush()
+ w.WriteString(`// Code generated by command: go generate ` + os.Getenv("GOFILE") + `. DO NOT EDIT.
+
+// +build !appengine
+// +build !noasm
+// +build gc
+// +build !nogen
+
+package reedsolomon
+
+import "fmt"
+
+`)
+
+ w.WriteString("const avx2CodeGen = true\n")
+ w.WriteString(fmt.Sprintf("const maxAvx2Inputs = %d\nconst maxAvx2Outputs = %d\n", inputMax, outputMax))
+ w.WriteString(`
+
+func galMulSlicesAvx2(matrix []byte, in, out [][]byte, start, stop int) int {
+ n := stop-start
+`)
+
+ w.WriteString(fmt.Sprintf("n = (n>>%d)<<%d\n\n", perLoopBits, perLoopBits))
+ w.WriteString(`switch len(in) {
+`)
+ for in, defs := range switchDefs[:] {
+ w.WriteString(fmt.Sprintf(" case %d:\n switch len(out) {\n", in+1))
+ for out, def := range defs[:] {
+ w.WriteString(fmt.Sprintf(" case %d:\n", out+1))
+ w.WriteString(def)
+ }
+ w.WriteString("}\n")
+ }
+ w.WriteString(`}
+ panic(fmt.Sprintf("unhandled size: %dx%d", len(in), len(out)))
+}
+`)
+ Generate()
+}
+
+func genMulAvx2(name string, inputs int, outputs int, xor bool) {
+ total := inputs * outputs
+
+ doc := []string{
+ fmt.Sprintf("%s takes %d inputs and produces %d outputs.", name, inputs, outputs),
+ }
+ if !xor {
+ doc = append(doc, "The output is initialized to 0.")
+ }
+
+ // Load shuffle masks on every use.
+ var loadNone bool
+ // Use registers for destination registers.
+ var regDst = true
+
+ // lo, hi, 1 in, 1 out, 2 tmp, 1 mask
+ est := total*2 + outputs + 5
+ if outputs == 1 {
+ // We don't need to keep a copy of the input if only 1 output.
+ est -= 2
+ }
+
+ if est > 16 {
+ loadNone = true
+ // We run out of GP registers first, now.
+ if inputs+outputs > 12 {
+ regDst = false
+ }
+ }
+
+ TEXT(name, 0, fmt.Sprintf("func(matrix []byte, in [][]byte, out [][]byte, start, n int)"))
+
+ // SWITCH DEFINITION:
+ s := fmt.Sprintf(" mulAvxTwo_%dx%d(matrix, in, out, start, n)\n", inputs, outputs)
+ s += fmt.Sprintf("\t\t\t\treturn n\n")
+ switchDefs[inputs-1][outputs-1] = s
+
+ if loadNone {
+ Comment("Loading no tables to registers")
+ } else {
+ // loadNone == false
+ Comment("Loading all tables to registers")
+ }
+
+ Doc(doc...)
+ Pragma("noescape")
+ Commentf("Full registers estimated %d YMM used", est)
+
+ length := Load(Param("n"), GP64())
+ matrixBase := GP64()
+ MOVQ(Param("matrix").Base().MustAddr(), matrixBase)
+ SHRQ(U8(perLoopBits), length)
+ TESTQ(length, length)
+ JZ(LabelRef(name + "_end"))
+
+ dst := make([]reg.VecVirtual, outputs)
+ dstPtr := make([]reg.GPVirtual, outputs)
+ outBase := Param("out").Base().MustAddr()
+ outSlicePtr := GP64()
+ MOVQ(outBase, outSlicePtr)
+ for i := range dst {
+ dst[i] = YMM()
+ if !regDst {
+ continue
+ }
+ ptr := GP64()
+ MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
+ dstPtr[i] = ptr
+ }
+
+ inLo := make([]reg.VecVirtual, total)
+ inHi := make([]reg.VecVirtual, total)
+
+ for i := range inLo {
+ if loadNone {
+ break
+ }
+ tableLo := YMM()
+ tableHi := YMM()
+ VMOVDQU(Mem{Base: matrixBase, Disp: i * 64}, tableLo)
+ VMOVDQU(Mem{Base: matrixBase, Disp: i*64 + 32}, tableHi)
+ inLo[i] = tableLo
+ inHi[i] = tableHi
+ }
+
+ inPtrs := make([]reg.GPVirtual, inputs)
+ inSlicePtr := GP64()
+ MOVQ(Param("in").Base().MustAddr(), inSlicePtr)
+ for i := range inPtrs {
+ ptr := GP64()
+ MOVQ(Mem{Base: inSlicePtr, Disp: i * 24}, ptr)
+ inPtrs[i] = ptr
+ }
+
+ tmpMask := GP64()
+ MOVQ(U32(15), tmpMask)
+ lowMask := YMM()
+ MOVQ(tmpMask, lowMask.AsX())
+ VPBROADCASTB(lowMask.AsX(), lowMask)
+
+ offset := GP64()
+ MOVQ(Param("start").MustAddr(), offset)
+ Label(name + "_loop")
+ if xor {
+ Commentf("Load %d outputs", outputs)
+ } else {
+ Commentf("Clear %d outputs", outputs)
+ }
+ for i := range dst {
+ if xor {
+ if regDst {
+ VMOVDQU(Mem{Base: dstPtr[i], Index: offset, Scale: 1}, dst[i])
+ continue
+ }
+ ptr := GP64()
+ MOVQ(outBase, ptr)
+ VMOVDQU(Mem{Base: ptr, Index: offset, Scale: 1}, dst[i])
+ } else {
+ VPXOR(dst[i], dst[i], dst[i])
+ }
+ }
+
+ lookLow, lookHigh := YMM(), YMM()
+ inLow, inHigh := YMM(), YMM()
+ for i := range inPtrs {
+ Commentf("Load and process 32 bytes from input %d to %d outputs", i, outputs)
+ VMOVDQU(Mem{Base: inPtrs[i], Index: offset, Scale: 1}, inLow)
+ VPSRLQ(U8(4), inLow, inHigh)
+ VPAND(lowMask, inLow, inLow)
+ VPAND(lowMask, inHigh, inHigh)
+ for j := range dst {
+ if loadNone {
+ VMOVDQU(Mem{Base: matrixBase, Disp: 64 * (i*outputs + j)}, lookLow)
+ VMOVDQU(Mem{Base: matrixBase, Disp: 32 + 64*(i*outputs+j)}, lookHigh)
+ VPSHUFB(inLow, lookLow, lookLow)
+ VPSHUFB(inHigh, lookHigh, lookHigh)
+ } else {
+ VPSHUFB(inLow, inLo[i*outputs+j], lookLow)
+ VPSHUFB(inHigh, inHi[i*outputs+j], lookHigh)
+ }
+ VPXOR(lookLow, lookHigh, lookLow)
+ VPXOR(lookLow, dst[j], dst[j])
+ }
+ }
+ Commentf("Store %d outputs", outputs)
+ for i := range dst {
+ if regDst {
+ VMOVDQU(dst[i], Mem{Base: dstPtr[i], Index: offset, Scale: 1})
+ continue
+ }
+ ptr := GP64()
+ MOVQ(Mem{Base: outSlicePtr, Disp: i * 24}, ptr)
+ VMOVDQU(dst[i], Mem{Base: ptr, Index: offset, Scale: 1})
+ }
+ Comment("Prepare for next loop")
+ ADDQ(U8(perLoop), offset)
+ DECQ(length)
+ JNZ(LabelRef(name + "_loop"))
+ VZEROUPPER()
+
+ Label(name + "_end")
+ RET()
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/go.mod b/vendor/github.com/klauspost/reedsolomon/go.mod
new file mode 100644
index 0000000..a059d86
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/go.mod
@@ -0,0 +1,7 @@
+module github.com/klauspost/reedsolomon
+
+go 1.14
+
+require (
+ github.com/klauspost/cpuid v1.2.4
+)
diff --git a/vendor/github.com/klauspost/reedsolomon/go.sum b/vendor/github.com/klauspost/reedsolomon/go.sum
new file mode 100644
index 0000000..5a44d81
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/go.sum
@@ -0,0 +1,2 @@
+github.com/klauspost/cpuid v1.2.4 h1:EBfaK0SWSwk+fgk6efYFWdzl8MwRWoOO1gkmiaTXPW4=
+github.com/klauspost/cpuid v1.2.4/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
diff --git a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
new file mode 100644
index 0000000..c9d8ab2
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go
@@ -0,0 +1,160 @@
+/**
+ * A thread-safe tree which caches inverted matrices.
+ *
+ * Copyright 2016, Peter Collins
+ */
+
+package reedsolomon
+
+import (
+ "errors"
+ "sync"
+)
+
+// The tree uses a Reader-Writer mutex to make it thread-safe
+// when accessing cached matrices and inserting new ones.
+type inversionTree struct {
+ mutex *sync.RWMutex
+ root inversionNode
+}
+
+type inversionNode struct {
+ matrix matrix
+ children []*inversionNode
+}
+
+// newInversionTree initializes a tree for storing inverted matrices.
+// Note that the root node is the identity matrix as it implies
+// there were no errors with the original data.
+func newInversionTree(dataShards, parityShards int) inversionTree {
+ identity, _ := identityMatrix(dataShards)
+ root := inversionNode{
+ matrix: identity,
+ children: make([]*inversionNode, dataShards+parityShards),
+ }
+ return inversionTree{
+ mutex: &sync.RWMutex{},
+ root: root,
+ }
+}
+
+// GetInvertedMatrix returns the cached inverted matrix or nil if it
+// is not found in the tree keyed on the indices of invalid rows.
+func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
+ // Lock the tree for reading before accessing the tree.
+ t.mutex.RLock()
+ defer t.mutex.RUnlock()
+
+ // If no invalid indices were give we should return the root
+ // identity matrix.
+ if len(invalidIndices) == 0 {
+ return t.root.matrix
+ }
+
+ // Recursively search for the inverted matrix in the tree, passing in
+ // 0 as the parent index as we start at the root of the tree.
+ return t.root.getInvertedMatrix(invalidIndices, 0)
+}
+
+// errAlreadySet is returned if the root node matrix is overwritten
+var errAlreadySet = errors.New("the root node identity matrix is already set")
+
+// InsertInvertedMatrix inserts a new inverted matrix into the tree
+// keyed by the indices of invalid rows. The total number of shards
+// is required for creating the proper length lists of child nodes for
+// each node.
+func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
+ // If no invalid indices were given then we are done because the
+ // root node is already set with the identity matrix.
+ if len(invalidIndices) == 0 {
+ return errAlreadySet
+ }
+
+ if !matrix.IsSquare() {
+ return errNotSquare
+ }
+
+ // Lock the tree for writing and reading before accessing the tree.
+ t.mutex.Lock()
+ defer t.mutex.Unlock()
+
+ // Recursively create nodes for the inverted matrix in the tree until
+ // we reach the node to insert the matrix to. We start by passing in
+ // 0 as the parent index as we start at the root of the tree.
+ t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
+
+ return nil
+}
+
+func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
+ // Get the child node to search next from the list of children. The
+ // list of children starts relative to the parent index passed in
+ // because the indices of invalid rows is sorted (by default). As we
+ // search recursively, the first invalid index gets popped off the list,
+ // so when searching through the list of children, use that first invalid
+ // index to find the child node.
+ firstIndex := invalidIndices[0]
+ node := n.children[firstIndex-parent]
+
+ // If the child node doesn't exist in the list yet, fail fast by
+ // returning, so we can construct and insert the proper inverted matrix.
+ if node == nil {
+ return nil
+ }
+
+ // If there's more than one invalid index left in the list we should
+ // keep searching recursively.
+ if len(invalidIndices) > 1 {
+ // Search recursively on the child node by passing in the invalid indices
+ // with the first index popped off the front. Also the parent index to
+ // pass down is the first index plus one.
+ return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
+ }
+ // If there aren't any more invalid indices to search, we've found our
+ // node. Return it, however keep in mind that the matrix could still be
+ // nil because intermediary nodes in the tree are created sometimes with
+ // their inversion matrices uninitialized.
+ return node.matrix
+}
+
+func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
+ // As above, get the child node to search next from the list of children.
+ // The list of children starts relative to the parent index passed in
+ // because the indices of invalid rows is sorted (by default). As we
+ // search recursively, the first invalid index gets popped off the list,
+ // so when searching through the list of children, use that first invalid
+ // index to find the child node.
+ firstIndex := invalidIndices[0]
+ node := n.children[firstIndex-parent]
+
+ // If the child node doesn't exist in the list yet, create a new
+ // node because we have the writer lock and add it to the list
+ // of children.
+ if node == nil {
+ // Make the length of the list of children equal to the number
+ // of shards minus the first invalid index because the list of
+ // invalid indices is sorted, so only this length of errors
+ // are possible in the tree.
+ node = &inversionNode{
+ children: make([]*inversionNode, shards-firstIndex),
+ }
+ // Insert the new node into the tree at the first index relative
+ // to the parent index that was given in this recursive call.
+ n.children[firstIndex-parent] = node
+ }
+
+ // If there's more than one invalid index left in the list we should
+ // keep searching recursively in order to find the node to add our
+ // matrix.
+ if len(invalidIndices) > 1 {
+ // As above, search recursively on the child node by passing in
+ // the invalid indices with the first index popped off the front.
+ // Also the total number of shards and parent index are passed down
+ // which is equal to the first index plus one.
+ node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
+ } else {
+ // If there aren't any more invalid indices to search, we've found our
+ // node. Cache the inverted matrix in this node.
+ node.matrix = matrix
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/matrix.go b/vendor/github.com/klauspost/reedsolomon/matrix.go
new file mode 100644
index 0000000..a6b9730
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/matrix.go
@@ -0,0 +1,279 @@
+/**
+ * Matrix Algebra over an 8-bit Galois Field
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// byte[row][col]
+type matrix [][]byte
+
+// newMatrix returns a matrix of zeros.
+func newMatrix(rows, cols int) (matrix, error) {
+ if rows <= 0 {
+ return nil, errInvalidRowSize
+ }
+ if cols <= 0 {
+ return nil, errInvalidColSize
+ }
+
+ m := matrix(make([][]byte, rows))
+ for i := range m {
+ m[i] = make([]byte, cols)
+ }
+ return m, nil
+}
+
+// NewMatrixData initializes a matrix with the given row-major data.
+// Note that data is not copied from input.
+func newMatrixData(data [][]byte) (matrix, error) {
+ m := matrix(data)
+ err := m.Check()
+ if err != nil {
+ return nil, err
+ }
+ return m, nil
+}
+
+// IdentityMatrix returns an identity matrix of the given size.
+func identityMatrix(size int) (matrix, error) {
+ m, err := newMatrix(size, size)
+ if err != nil {
+ return nil, err
+ }
+ for i := range m {
+ m[i][i] = 1
+ }
+ return m, nil
+}
+
+// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
+var errInvalidRowSize = errors.New("invalid row size")
+
+// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
+var errInvalidColSize = errors.New("invalid column size")
+
+// errColSizeMismatch is returned if the size of matrix columns mismatch.
+var errColSizeMismatch = errors.New("column size is not the same for all rows")
+
+func (m matrix) Check() error {
+ rows := len(m)
+ if rows <= 0 {
+ return errInvalidRowSize
+ }
+ cols := len(m[0])
+ if cols <= 0 {
+ return errInvalidColSize
+ }
+
+ for _, col := range m {
+ if len(col) != cols {
+ return errColSizeMismatch
+ }
+ }
+ return nil
+}
+
+// String returns a human-readable string of the matrix contents.
+//
+// Example: [[1, 2], [3, 4]]
+func (m matrix) String() string {
+ rowOut := make([]string, 0, len(m))
+ for _, row := range m {
+ colOut := make([]string, 0, len(row))
+ for _, col := range row {
+ colOut = append(colOut, strconv.Itoa(int(col)))
+ }
+ rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
+ }
+ return "[" + strings.Join(rowOut, ", ") + "]"
+}
+
+// Multiply multiplies this matrix (the one on the left) by another
+// matrix (the one on the right) and returns a new matrix with the result.
+func (m matrix) Multiply(right matrix) (matrix, error) {
+ if len(m[0]) != len(right) {
+ return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
+ }
+ result, _ := newMatrix(len(m), len(right[0]))
+ for r, row := range result {
+ for c := range row {
+ var value byte
+ for i := range m[0] {
+ value ^= galMultiply(m[r][i], right[i][c])
+ }
+ result[r][c] = value
+ }
+ }
+ return result, nil
+}
+
+// Augment returns the concatenation of this matrix and the matrix on the right.
+func (m matrix) Augment(right matrix) (matrix, error) {
+ if len(m) != len(right) {
+ return nil, errMatrixSize
+ }
+
+ result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
+ for r, row := range m {
+ for c := range row {
+ result[r][c] = m[r][c]
+ }
+ cols := len(m[0])
+ for c := range right[0] {
+ result[r][cols+c] = right[r][c]
+ }
+ }
+ return result, nil
+}
+
+// errMatrixSize is returned if matrix dimensions are doesn't match.
+var errMatrixSize = errors.New("matrix sizes do not match")
+
+func (m matrix) SameSize(n matrix) error {
+ if len(m) != len(n) {
+ return errMatrixSize
+ }
+ for i := range m {
+ if len(m[i]) != len(n[i]) {
+ return errMatrixSize
+ }
+ }
+ return nil
+}
+
+// SubMatrix returns a part of this matrix. Data is copied.
+func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
+ result, err := newMatrix(rmax-rmin, cmax-cmin)
+ if err != nil {
+ return nil, err
+ }
+ // OPTME: If used heavily, use copy function to copy slice
+ for r := rmin; r < rmax; r++ {
+ for c := cmin; c < cmax; c++ {
+ result[r-rmin][c-cmin] = m[r][c]
+ }
+ }
+ return result, nil
+}
+
+// SwapRows Exchanges two rows in the matrix.
+func (m matrix) SwapRows(r1, r2 int) error {
+ if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
+ return errInvalidRowSize
+ }
+ m[r2], m[r1] = m[r1], m[r2]
+ return nil
+}
+
+// IsSquare will return true if the matrix is square
+// and nil if the matrix is square
+func (m matrix) IsSquare() bool {
+ return len(m) == len(m[0])
+}
+
+// errSingular is returned if the matrix is singular and cannot be inversed
+var errSingular = errors.New("matrix is singular")
+
+// errNotSquare is returned if attempting to inverse a non-square matrix.
+var errNotSquare = errors.New("only square matrices can be inverted")
+
+// Invert returns the inverse of this matrix.
+// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
+// The matrix must be square, otherwise ErrNotSquare is returned.
+func (m matrix) Invert() (matrix, error) {
+ if !m.IsSquare() {
+ return nil, errNotSquare
+ }
+
+ size := len(m)
+ work, _ := identityMatrix(size)
+ work, _ = m.Augment(work)
+
+ err := work.gaussianElimination()
+ if err != nil {
+ return nil, err
+ }
+
+ return work.SubMatrix(0, size, size, size*2)
+}
+
+func (m matrix) gaussianElimination() error {
+ rows := len(m)
+ columns := len(m[0])
+ // Clear out the part below the main diagonal and scale the main
+ // diagonal to be 1.
+ for r := 0; r < rows; r++ {
+ // If the element on the diagonal is 0, find a row below
+ // that has a non-zero and swap them.
+ if m[r][r] == 0 {
+ for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+ if m[rowBelow][r] != 0 {
+ m.SwapRows(r, rowBelow)
+ break
+ }
+ }
+ }
+ // If we couldn't find one, the matrix is singular.
+ if m[r][r] == 0 {
+ return errSingular
+ }
+ // Scale to 1.
+ if m[r][r] != 1 {
+ scale := galDivide(1, m[r][r])
+ for c := 0; c < columns; c++ {
+ m[r][c] = galMultiply(m[r][c], scale)
+ }
+ }
+ // Make everything below the 1 be a 0 by subtracting
+ // a multiple of it. (Subtraction and addition are
+ // both exclusive or in the Galois field.)
+ for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
+ if m[rowBelow][r] != 0 {
+ scale := m[rowBelow][r]
+ for c := 0; c < columns; c++ {
+ m[rowBelow][c] ^= galMultiply(scale, m[r][c])
+ }
+ }
+ }
+ }
+
+ // Now clear the part above the main diagonal.
+ for d := 0; d < rows; d++ {
+ for rowAbove := 0; rowAbove < d; rowAbove++ {
+ if m[rowAbove][d] != 0 {
+ scale := m[rowAbove][d]
+ for c := 0; c < columns; c++ {
+ m[rowAbove][c] ^= galMultiply(scale, m[d][c])
+ }
+
+ }
+ }
+ }
+ return nil
+}
+
+// Create a Vandermonde matrix, which is guaranteed to have the
+// property that any subset of rows that forms a square matrix
+// is invertible.
+func vandermonde(rows, cols int) (matrix, error) {
+ result, err := newMatrix(rows, cols)
+ if err != nil {
+ return nil, err
+ }
+ for r, row := range result {
+ for c := range row {
+ result[r][c] = galExp(byte(r), c)
+ }
+ }
+ return result, nil
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/options.go b/vendor/github.com/klauspost/reedsolomon/options.go
new file mode 100644
index 0000000..b4adc2a
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
@@ -0,0 +1,175 @@
+package reedsolomon
+
+import (
+ "runtime"
+
+ "github.com/klauspost/cpuid"
+)
+
+// Option allows to override processing parameters.
+type Option func(*options)
+
+type options struct {
+ maxGoroutines int
+ minSplitSize int
+ shardSize int
+ perRound int
+
+ useAVX512, useAVX2, useSSSE3, useSSE2 bool
+ usePAR1Matrix bool
+ useCauchy bool
+ fastOneParity bool
+
+ // stream options
+ concReads bool
+ concWrites bool
+ streamBS int
+}
+
+var defaultOptions = options{
+ maxGoroutines: 384,
+ minSplitSize: -1,
+ fastOneParity: false,
+
+ // Detect CPU capabilities.
+ useSSSE3: cpuid.CPU.SSSE3(),
+ useSSE2: cpuid.CPU.SSE2(),
+ useAVX2: cpuid.CPU.AVX2(),
+ useAVX512: cpuid.CPU.AVX512F() && cpuid.CPU.AVX512BW(),
+}
+
+func init() {
+ if runtime.GOMAXPROCS(0) <= 1 {
+ defaultOptions.maxGoroutines = 1
+ }
+}
+
+// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
+// Jobs will be split into this many parts, unless each goroutine would have to process
+// less than minSplitSize bytes (set with WithMinSplitSize).
+// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
+// scheduling.
+// If n <= 0, it is ignored.
+func WithMaxGoroutines(n int) Option {
+ return func(o *options) {
+ if n > 0 {
+ o.maxGoroutines = n
+ }
+ }
+}
+
+// WithAutoGoroutines will adjust the number of goroutines for optimal speed with a
+// specific shard size.
+// Send in the shard size you expect to send. Other shard sizes will work, but may not
+// run at the optimal speed.
+// Overwrites WithMaxGoroutines.
+// If shardSize <= 0, it is ignored.
+func WithAutoGoroutines(shardSize int) Option {
+ return func(o *options) {
+ o.shardSize = shardSize
+ }
+}
+
+// WithMinSplitSize is the minimum encoding size in bytes per goroutine.
+// By default this parameter is determined by CPU cache characteristics.
+// See WithMaxGoroutines on how jobs are split.
+// If n <= 0, it is ignored.
+func WithMinSplitSize(n int) Option {
+ return func(o *options) {
+ if n > 0 {
+ o.minSplitSize = n
+ }
+ }
+}
+
+// WithConcurrentStreams will enable concurrent reads and writes on the streams.
+// Default: Disabled, meaning only one stream will be read/written at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreams(enabled bool) Option {
+ return func(o *options) {
+ o.concReads, o.concWrites = enabled, enabled
+ }
+}
+
+// WithConcurrentStreamReads will enable concurrent reads from the input streams.
+// Default: Disabled, meaning only one stream will be read at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreamReads(enabled bool) Option {
+ return func(o *options) {
+ o.concReads = enabled
+ }
+}
+
+// WithConcurrentStreamWrites will enable concurrent writes to the the output streams.
+// Default: Disabled, meaning only one stream will be written at the time.
+// Ignored if not used on a stream input.
+func WithConcurrentStreamWrites(enabled bool) Option {
+ return func(o *options) {
+ o.concWrites = enabled
+ }
+}
+
+// WithStreamBlockSize allows to set a custom block size per round of reads/writes.
+// If not set, any shard size set with WithAutoGoroutines will be used.
+// If WithAutoGoroutines is also unset, 4MB will be used.
+// Ignored if not used on stream.
+func WithStreamBlockSize(n int) Option {
+ return func(o *options) {
+ o.streamBS = n
+ }
+}
+
+func withSSSE3(enabled bool) Option {
+ return func(o *options) {
+ o.useSSSE3 = enabled
+ }
+}
+
+func withAVX2(enabled bool) Option {
+ return func(o *options) {
+ o.useAVX2 = enabled
+ }
+}
+
+func withSSE2(enabled bool) Option {
+ return func(o *options) {
+ o.useSSE2 = enabled
+ }
+}
+
+func withAVX512(enabled bool) Option {
+ return func(o *options) {
+ o.useAVX512 = enabled
+ }
+}
+
+// WithPAR1Matrix causes the encoder to build the matrix how PARv1
+// does. Note that the method they use is buggy, and may lead to cases
+// where recovery is impossible, even if there are enough parity
+// shards.
+func WithPAR1Matrix() Option {
+ return func(o *options) {
+ o.usePAR1Matrix = true
+ o.useCauchy = false
+ }
+}
+
+// WithCauchyMatrix will make the encoder build a Cauchy style matrix.
+// The output of this is not compatible with the standard output.
+// A Cauchy matrix is faster to generate. This does not affect data throughput,
+// but will result in slightly faster start-up time.
+func WithCauchyMatrix() Option {
+ return func(o *options) {
+ o.useCauchy = true
+ o.usePAR1Matrix = false
+ }
+}
+
+// WithFastOneParityMatrix will switch the matrix to a simple xor
+// if there is only one parity shard.
+// The PAR1 matrix already has this property so it has little effect there.
+func WithFastOneParityMatrix() Option {
+ return func(o *options) {
+ o.fastOneParity = true
+ }
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
new file mode 100644
index 0000000..13a35d2
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
@@ -0,0 +1,1011 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+// Package reedsolomon enables Erasure Coding in Go
+//
+// For usage and examples, see https://github.com/klauspost/reedsolomon
+//
+package reedsolomon
+
+import (
+ "bytes"
+ "errors"
+ "io"
+ "runtime"
+ "sync"
+
+ "github.com/klauspost/cpuid"
+)
+
+// Encoder is an interface to encode Reed-Salomon parity sets for your data.
+type Encoder interface {
+ // Encode parity for a set of data shards.
+ // Input is 'shards' containing data shards followed by parity shards.
+ // The number of shards must match the number given to New().
+ // Each shard is a byte array, and they must all be the same size.
+ // The parity shards will always be overwritten and the data shards
+ // will remain the same, so it is safe for you to read from the
+ // data shards while this is running.
+ Encode(shards [][]byte) error
+
+ // Verify returns true if the parity shards contain correct data.
+ // The data is the same format as Encode. No data is modified, so
+ // you are allowed to read from data while this is running.
+ Verify(shards [][]byte) (bool, error)
+
+ // Reconstruct will recreate the missing shards if possible.
+ //
+ // Given a list of shards, some of which contain data, fills in the
+ // ones that don't have data.
+ //
+ // The length of the array must be equal to the total number of shards.
+ // You indicate that a shard is missing by setting it to nil or zero-length.
+ // If a shard is zero-length but has sufficient capacity, that memory will
+ // be used, otherwise a new []byte will be allocated.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // The reconstructed shard set is complete, but integrity is not verified.
+ // Use the Verify function to check if data set is ok.
+ Reconstruct(shards [][]byte) error
+
+ // ReconstructData will recreate any missing data shards, if possible.
+ //
+ // Given a list of shards, some of which contain data, fills in the
+ // data shards that don't have data.
+ //
+ // The length of the array must be equal to Shards.
+ // You indicate that a shard is missing by setting it to nil or zero-length.
+ // If a shard is zero-length but has sufficient capacity, that memory will
+ // be used, otherwise a new []byte will be allocated.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // As the reconstructed shard set may contain missing parity shards,
+ // calling the Verify function is likely to fail.
+ ReconstructData(shards [][]byte) error
+
+ // Update parity is use for change a few data shards and update it's parity.
+ // Input 'newDatashards' containing data shards changed.
+ // Input 'shards' containing old data shards (if data shard not changed, it can be nil) and old parity shards.
+ // new parity shards will in shards[DataShards:]
+ // Update is very useful if DataShards much larger than ParityShards and changed data shards is few. It will
+ // faster than Encode and not need read all data shards to encode.
+ Update(shards [][]byte, newDatashards [][]byte) error
+
+ // Split a data slice into the number of shards given to the encoder,
+ // and create empty parity shards.
+ //
+ // The data will be split into equally sized shards.
+ // If the data size isn't dividable by the number of shards,
+ // the last shard will contain extra zeros.
+ //
+ // There must be at least 1 byte otherwise ErrShortData will be
+ // returned.
+ //
+ // The data will not be copied, except for the last shard, so you
+ // should not modify the data of the input slice afterwards.
+ Split(data []byte) ([][]byte, error)
+
+ // Join the shards and write the data segment to dst.
+ //
+ // Only the data shards are considered.
+ // You must supply the exact output size you want.
+ // If there are to few shards given, ErrTooFewShards will be returned.
+ // If the total data size is less than outSize, ErrShortData will be returned.
+ Join(dst io.Writer, shards [][]byte, outSize int) error
+}
+
+// reedSolomon contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using New()
+type reedSolomon struct {
+ DataShards int // Number of data shards, should not be modified.
+ ParityShards int // Number of parity shards, should not be modified.
+ Shards int // Total number of shards. Calculated, and should not be modified.
+ m matrix
+ tree inversionTree
+ parity [][]byte
+ o options
+ mPool sync.Pool
+}
+
+// ErrInvShardNum will be returned by New, if you attempt to create
+// an Encoder where either data or parity shards is zero or less.
+var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards")
+
+// ErrMaxShardNum will be returned by New, if you attempt to create an
+// Encoder where data and parity shards are bigger than the order of
+// GF(2^8).
+var ErrMaxShardNum = errors.New("cannot create Encoder with more than 256 data+parity shards")
+
+// buildMatrix creates the matrix to use for encoding, given the
+// number of data shards and the number of total shards.
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrix(dataShards, totalShards int) (matrix, error) {
+ // Start with a Vandermonde matrix. This matrix would work,
+ // in theory, but doesn't have the property that the data
+ // shards are unchanged after encoding.
+ vm, err := vandermonde(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ // Multiply by the inverse of the top square of the matrix.
+ // This will make the top square be the identity matrix, but
+ // preserve the property that any square subset of rows is
+ // invertible.
+ top, err := vm.SubMatrix(0, 0, dataShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ topInv, err := top.Invert()
+ if err != nil {
+ return nil, err
+ }
+
+ return vm.Multiply(topInv)
+}
+
+// buildMatrixPAR1 creates the matrix to use for encoding according to
+// the PARv1 spec, given the number of data shards and the number of
+// total shards. Note that the method they use is buggy, and may lead
+// to cases where recovery is impossible, even if there are enough
+// parity shards.
+//
+// The top square of the matrix is guaranteed to be an identity
+// matrix, which means that the data shards are unchanged after
+// encoding.
+func buildMatrixPAR1(dataShards, totalShards int) (matrix, error) {
+ result, err := newMatrix(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ for r, row := range result {
+ // The top portion of the matrix is the identity
+ // matrix, and the bottom is a transposed Vandermonde
+ // matrix starting at 1 instead of 0.
+ if r < dataShards {
+ result[r][r] = 1
+ } else {
+ for c := range row {
+ result[r][c] = galExp(byte(c+1), r-dataShards)
+ }
+ }
+ }
+ return result, nil
+}
+
+func buildMatrixCauchy(dataShards, totalShards int) (matrix, error) {
+ result, err := newMatrix(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ for r, row := range result {
+ // The top portion of the matrix is the identity
+ // matrix, and the bottom is a transposed Cauchy matrix.
+ if r < dataShards {
+ result[r][r] = 1
+ } else {
+ for c := range row {
+ result[r][c] = invTable[(byte(r ^ c))]
+ }
+ }
+ }
+ return result, nil
+}
+
+// buildXorMatrix can be used to build a matrix with pure XOR
+// operations if there is only one parity shard.
+func buildXorMatrix(dataShards, totalShards int) (matrix, error) {
+ if dataShards+1 != totalShards {
+ return nil, errors.New("internal error")
+ }
+ result, err := newMatrix(totalShards, dataShards)
+ if err != nil {
+ return nil, err
+ }
+
+ for r, row := range result {
+ // The top portion of the matrix is the identity
+ // matrix.
+ if r < dataShards {
+ result[r][r] = 1
+ } else {
+ // Set all values to 1 (XOR)
+ for c := range row {
+ result[r][c] = 1
+ }
+ }
+ }
+ return result, nil
+}
+
+// New creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of total shards is 256.
+// If no options are supplied, default options are used.
+func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
+ r := reedSolomon{
+ DataShards: dataShards,
+ ParityShards: parityShards,
+ Shards: dataShards + parityShards,
+ o: defaultOptions,
+ }
+
+ for _, opt := range opts {
+ opt(&r.o)
+ }
+ if dataShards <= 0 || parityShards <= 0 {
+ return nil, ErrInvShardNum
+ }
+
+ if dataShards+parityShards > 256 {
+ return nil, ErrMaxShardNum
+ }
+
+ var err error
+ switch {
+ case r.o.fastOneParity && parityShards == 1:
+ r.m, err = buildXorMatrix(dataShards, r.Shards)
+ case r.o.useCauchy:
+ r.m, err = buildMatrixCauchy(dataShards, r.Shards)
+ case r.o.usePAR1Matrix:
+ r.m, err = buildMatrixPAR1(dataShards, r.Shards)
+ default:
+ r.m, err = buildMatrix(dataShards, r.Shards)
+ }
+ if err != nil {
+ return nil, err
+ }
+
+ // Calculate what we want per round
+ r.o.perRound = cpuid.CPU.Cache.L2
+ if r.o.perRound <= 0 {
+ // Set to 128K if undetectable.
+ r.o.perRound = 128 << 10
+ }
+
+ if cpuid.CPU.ThreadsPerCore > 1 && r.o.maxGoroutines > cpuid.CPU.PhysicalCores {
+ // If multiple threads per core, make sure they don't contend for cache.
+ r.o.perRound /= cpuid.CPU.ThreadsPerCore
+ }
+ // 1 input + parity must fit in cache, and we add one more to be safer.
+ r.o.perRound = r.o.perRound / (1 + parityShards)
+ // Align to 64 bytes.
+ r.o.perRound = ((r.o.perRound + 63) / 64) * 64
+
+ if r.o.minSplitSize <= 0 {
+ // Set minsplit as high as we can, but still have parity in L1.
+ cacheSize := cpuid.CPU.Cache.L1D
+ if cacheSize <= 0 {
+ cacheSize = 32 << 10
+ }
+
+ r.o.minSplitSize = cacheSize / (parityShards + 1)
+ // Min 1K
+ if r.o.minSplitSize < 1024 {
+ r.o.minSplitSize = 1024
+ }
+ }
+
+ if r.o.perRound < r.o.minSplitSize {
+ r.o.perRound = r.o.minSplitSize
+ }
+
+ if r.o.shardSize > 0 {
+ p := runtime.GOMAXPROCS(0)
+ if p == 1 || r.o.shardSize <= r.o.minSplitSize*2 {
+ // Not worth it.
+ r.o.maxGoroutines = 1
+ } else {
+ g := r.o.shardSize / r.o.perRound
+
+ // Overprovision by a factor of 2.
+ if g < p*2 && r.o.perRound > r.o.minSplitSize*2 {
+ g = p * 2
+ r.o.perRound /= 2
+ }
+
+ // Have g be multiple of p
+ g += p - 1
+ g -= g % p
+
+ r.o.maxGoroutines = g
+ }
+ }
+
+ // Inverted matrices are cached in a tree keyed by the indices
+ // of the invalid rows of the data to reconstruct.
+ // The inversion root node will have the identity matrix as
+ // its inversion matrix because it implies there are no errors
+ // with the original data.
+ r.tree = newInversionTree(dataShards, parityShards)
+
+ r.parity = make([][]byte, parityShards)
+ for i := range r.parity {
+ r.parity[i] = r.m[dataShards+i]
+ }
+
+ if avx2CodeGen && r.o.useAVX2 {
+ r.mPool.New = func() interface{} {
+ return make([]byte, r.Shards*2*32)
+ }
+ }
+ return &r, err
+}
+
+// ErrTooFewShards is returned if too few shards where given to
+// Encode/Verify/Reconstruct/Update. It will also be returned from Reconstruct
+// if there were too few shards to reconstruct the missing data.
+var ErrTooFewShards = errors.New("too few shards given")
+
+// Encodes parity for a set of data shards.
+// An array 'shards' containing data shards followed by parity shards.
+// The number of shards must match the number given to New.
+// Each shard is a byte array, and they must all be the same size.
+// The parity shards will always be overwritten and the data shards
+// will remain the same.
+func (r *reedSolomon) Encode(shards [][]byte) error {
+ if len(shards) != r.Shards {
+ return ErrTooFewShards
+ }
+
+ err := checkShards(shards, false)
+ if err != nil {
+ return err
+ }
+
+ // Get the slice of output buffers.
+ output := shards[r.DataShards:]
+
+ // Do the coding.
+ r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
+ return nil
+}
+
+// ErrInvalidInput is returned if invalid input parameter of Update.
+var ErrInvalidInput = errors.New("invalid input")
+
+func (r *reedSolomon) Update(shards [][]byte, newDatashards [][]byte) error {
+ if len(shards) != r.Shards {
+ return ErrTooFewShards
+ }
+
+ if len(newDatashards) != r.DataShards {
+ return ErrTooFewShards
+ }
+
+ err := checkShards(shards, true)
+ if err != nil {
+ return err
+ }
+
+ err = checkShards(newDatashards, true)
+ if err != nil {
+ return err
+ }
+
+ for i := range newDatashards {
+ if newDatashards[i] != nil && shards[i] == nil {
+ return ErrInvalidInput
+ }
+ }
+ for _, p := range shards[r.DataShards:] {
+ if p == nil {
+ return ErrInvalidInput
+ }
+ }
+
+ shardSize := shardSize(shards)
+
+ // Get the slice of output buffers.
+ output := shards[r.DataShards:]
+
+ // Do the coding.
+ r.updateParityShards(r.parity, shards[0:r.DataShards], newDatashards[0:r.DataShards], output, r.ParityShards, shardSize)
+ return nil
+}
+
+func (r *reedSolomon) updateParityShards(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+ if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+ r.updateParityShardsP(matrixRows, oldinputs, newinputs, outputs, outputCount, byteCount)
+ return
+ }
+
+ for c := 0; c < r.DataShards; c++ {
+ in := newinputs[c]
+ if in == nil {
+ continue
+ }
+ oldin := oldinputs[c]
+ // oldinputs data will be change
+ sliceXor(in, oldin, &r.o)
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], oldin, outputs[iRow], &r.o)
+ }
+ }
+}
+
+func (r *reedSolomon) updateParityShardsP(matrixRows, oldinputs, newinputs, outputs [][]byte, outputCount, byteCount int) {
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+ wg.Add(1)
+ go func(start, stop int) {
+ for c := 0; c < r.DataShards; c++ {
+ in := newinputs[c]
+ if in == nil {
+ continue
+ }
+ oldin := oldinputs[c]
+ // oldinputs data will be change
+ sliceXor(in[start:stop], oldin[start:stop], &r.o)
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], oldin[start:stop], outputs[iRow][start:stop], &r.o)
+ }
+ }
+ wg.Done()
+ }(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
+
+// Verify returns true if the parity shards contain the right data.
+// The data is the same format as Encode. No data is modified.
+func (r *reedSolomon) Verify(shards [][]byte) (bool, error) {
+ if len(shards) != r.Shards {
+ return false, ErrTooFewShards
+ }
+ err := checkShards(shards, false)
+ if err != nil {
+ return false, err
+ }
+
+ // Slice of buffers being checked.
+ toCheck := shards[r.DataShards:]
+
+ // Do the checking.
+ return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
+}
+
+// Multiplies a subset of rows from a coding matrix by a full set of
+// input shards to produce some output shards.
+// 'matrixRows' is The rows from the matrix to use.
+// 'inputs' An array of byte arrays, each of which is one input shard.
+// The number of inputs used is determined by the length of each matrix row.
+// outputs Byte arrays where the computed shards are stored.
+// The number of outputs computed, and the
+// number of matrix rows used, is determined by
+// outputCount, which is the number of outputs to compute.
+func (r *reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ if len(outputs) == 0 {
+ return
+ }
+ switch {
+ case r.o.useAVX512 && r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize && len(inputs) >= 4 && len(outputs) >= 2:
+ r.codeSomeShardsAvx512P(matrixRows, inputs, outputs, outputCount, byteCount)
+ return
+ case r.o.useAVX512 && len(inputs) >= 4 && len(outputs) >= 2:
+ r.codeSomeShardsAvx512(matrixRows, inputs, outputs, outputCount, byteCount)
+ return
+ case r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize:
+ r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
+ return
+ }
+
+ // Process using no goroutines
+ start, end := 0, r.o.perRound
+ if end > len(inputs[0]) {
+ end = len(inputs[0])
+ }
+ if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
+ m := genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
+ start += galMulSlicesAvx2(m, inputs, outputs, 0, byteCount)
+ r.mPool.Put(m)
+ end = len(inputs[0])
+ }
+
+ for start < len(inputs[0]) {
+ for c := 0; c < r.DataShards; c++ {
+ in := inputs[c][start:end]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][start:end], &r.o)
+ }
+ }
+ }
+ start = end
+ end += r.o.perRound
+ if end > len(inputs[0]) {
+ end = len(inputs[0])
+ }
+ }
+}
+
+// Perform the same as codeSomeShards, but split the workload into
+// several goroutines.
+func (r *reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ var avx2Matrix []byte
+ if avx2CodeGen && r.o.useAVX2 && byteCount >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
+ avx2Matrix = genAvx2Matrix(matrixRows, len(inputs), len(outputs), r.mPool.Get().([]byte))
+ defer r.mPool.Put(avx2Matrix)
+ }
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+
+ wg.Add(1)
+ go func(start, stop int) {
+ if avx2CodeGen && r.o.useAVX2 && stop-start >= 32 && len(inputs) > 1 && len(outputs) > 1 && len(inputs) <= maxAvx2Inputs && len(outputs) <= maxAvx2Outputs {
+ start += galMulSlicesAvx2(avx2Matrix, inputs, outputs, start, stop)
+ }
+
+ lstart, lstop := start, start+r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ for lstart < stop {
+ for c := 0; c < r.DataShards; c++ {
+ in := inputs[c][lstart:lstop]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ if c == 0 {
+ galMulSlice(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ } else {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow][lstart:lstop], &r.o)
+ }
+ }
+ }
+ lstart = lstop
+ lstop += r.o.perRound
+ if lstop > stop {
+ lstop = stop
+ }
+ }
+ wg.Done()
+ }(start, start+do)
+ start += do
+ }
+ wg.Wait()
+}
+
+// checkSomeShards is mostly the same as codeSomeShards,
+// except this will check values and return
+// as soon as a difference is found.
+func (r *reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+ if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+ return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
+ }
+ outputs := make([][]byte, len(toCheck))
+ for i := range outputs {
+ outputs[i] = make([]byte, byteCount)
+ }
+ for c := 0; c < r.DataShards; c++ {
+ in := inputs[c]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+ }
+ }
+
+ for i, calc := range outputs {
+ if !bytes.Equal(calc, toCheck[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+func (r *reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+ same := true
+ var mu sync.RWMutex // For above
+
+ var wg sync.WaitGroup
+ do := byteCount / r.o.maxGoroutines
+ if do < r.o.minSplitSize {
+ do = r.o.minSplitSize
+ }
+ // Make sizes divisible by 64
+ do = (do + 63) & (^63)
+ start := 0
+ for start < byteCount {
+ if start+do > byteCount {
+ do = byteCount - start
+ }
+ wg.Add(1)
+ go func(start, do int) {
+ defer wg.Done()
+ outputs := make([][]byte, len(toCheck))
+ for i := range outputs {
+ outputs[i] = make([]byte, do)
+ }
+ for c := 0; c < r.DataShards; c++ {
+ mu.RLock()
+ if !same {
+ mu.RUnlock()
+ return
+ }
+ mu.RUnlock()
+ in := inputs[c][start : start+do]
+ for iRow := 0; iRow < outputCount; iRow++ {
+ galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], &r.o)
+ }
+ }
+
+ for i, calc := range outputs {
+ if !bytes.Equal(calc, toCheck[i][start:start+do]) {
+ mu.Lock()
+ same = false
+ mu.Unlock()
+ return
+ }
+ }
+ }(start, do)
+ start += do
+ }
+ wg.Wait()
+ return same
+}
+
+// ErrShardNoData will be returned if there are no shards,
+// or if the length of all shards is zero.
+var ErrShardNoData = errors.New("no shard data")
+
+// ErrShardSize is returned if shard length isn't the same for all
+// shards.
+var ErrShardSize = errors.New("shard sizes do not match")
+
+// checkShards will check if shards are the same size
+// or 0, if allowed. An error is returned if this fails.
+// An error is also returned if all shards are size 0.
+func checkShards(shards [][]byte, nilok bool) error {
+ size := shardSize(shards)
+ if size == 0 {
+ return ErrShardNoData
+ }
+ for _, shard := range shards {
+ if len(shard) != size {
+ if len(shard) != 0 || !nilok {
+ return ErrShardSize
+ }
+ }
+ }
+ return nil
+}
+
+// shardSize return the size of a single shard.
+// The first non-zero size is returned,
+// or 0 if all shards are size 0.
+func shardSize(shards [][]byte) int {
+ for _, shard := range shards {
+ if len(shard) != 0 {
+ return len(shard)
+ }
+ }
+ return 0
+}
+
+// Reconstruct will recreate the missing shards, if possible.
+//
+// Given a list of shards, some of which contain data, fills in the
+// ones that don't have data.
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete, but integrity is not verified.
+// Use the Verify function to check if data set is ok.
+func (r *reedSolomon) Reconstruct(shards [][]byte) error {
+ return r.reconstruct(shards, false)
+}
+
+// ReconstructData will recreate any missing data shards, if possible.
+//
+// Given a list of shards, some of which contain data, fills in the
+// data shards that don't have data.
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil or zero-length.
+// If a shard is zero-length but has sufficient capacity, that memory will
+// be used, otherwise a new []byte will be allocated.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// As the reconstructed shard set may contain missing parity shards,
+// calling the Verify function is likely to fail.
+func (r *reedSolomon) ReconstructData(shards [][]byte) error {
+ return r.reconstruct(shards, true)
+}
+
+// reconstruct will recreate the missing data shards, and unless
+// dataOnly is true, also the missing parity shards
+//
+// The length of the array must be equal to Shards.
+// You indicate that a shard is missing by setting it to nil.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+func (r *reedSolomon) reconstruct(shards [][]byte, dataOnly bool) error {
+ if len(shards) != r.Shards {
+ return ErrTooFewShards
+ }
+ // Check arguments.
+ err := checkShards(shards, true)
+ if err != nil {
+ return err
+ }
+
+ shardSize := shardSize(shards)
+
+ // Quick check: are all of the shards present? If so, there's
+ // nothing to do.
+ numberPresent := 0
+ dataPresent := 0
+ for i := 0; i < r.Shards; i++ {
+ if len(shards[i]) != 0 {
+ numberPresent++
+ if i < r.DataShards {
+ dataPresent++
+ }
+ }
+ }
+ if numberPresent == r.Shards || dataOnly && dataPresent == r.DataShards {
+ // Cool. All of the shards data data. We don't
+ // need to do anything.
+ return nil
+ }
+
+ // More complete sanity check
+ if numberPresent < r.DataShards {
+ return ErrTooFewShards
+ }
+
+ // Pull out an array holding just the shards that
+ // correspond to the rows of the submatrix. These shards
+ // will be the input to the decoding process that re-creates
+ // the missing data shards.
+ //
+ // Also, create an array of indices of the valid rows we do have
+ // and the invalid rows we don't have up until we have enough valid rows.
+ subShards := make([][]byte, r.DataShards)
+ validIndices := make([]int, r.DataShards)
+ invalidIndices := make([]int, 0)
+ subMatrixRow := 0
+ for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
+ if len(shards[matrixRow]) != 0 {
+ subShards[subMatrixRow] = shards[matrixRow]
+ validIndices[subMatrixRow] = matrixRow
+ subMatrixRow++
+ } else {
+ invalidIndices = append(invalidIndices, matrixRow)
+ }
+ }
+
+ // Attempt to get the cached inverted matrix out of the tree
+ // based on the indices of the invalid rows.
+ dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
+
+ // If the inverted matrix isn't cached in the tree yet we must
+ // construct it ourselves and insert it into the tree for the
+ // future. In this way the inversion tree is lazily loaded.
+ if dataDecodeMatrix == nil {
+ // Pull out the rows of the matrix that correspond to the
+ // shards that we have and build a square matrix. This
+ // matrix could be used to generate the shards that we have
+ // from the original data.
+ subMatrix, _ := newMatrix(r.DataShards, r.DataShards)
+ for subMatrixRow, validIndex := range validIndices {
+ for c := 0; c < r.DataShards; c++ {
+ subMatrix[subMatrixRow][c] = r.m[validIndex][c]
+ }
+ }
+ // Invert the matrix, so we can go from the encoded shards
+ // back to the original data. Then pull out the row that
+ // generates the shard that we want to decode. Note that
+ // since this matrix maps back to the original data, it can
+ // be used to create a data shard, but not a parity shard.
+ dataDecodeMatrix, err = subMatrix.Invert()
+ if err != nil {
+ return err
+ }
+
+ // Cache the inverted matrix in the tree for future use keyed on the
+ // indices of the invalid rows.
+ err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards)
+ if err != nil {
+ return err
+ }
+ }
+
+ // Re-create any data shards that were missing.
+ //
+ // The input to the coding is all of the shards we actually
+ // have, and the output is the missing data shards. The computation
+ // is done using the special decode matrix we just built.
+ outputs := make([][]byte, r.ParityShards)
+ matrixRows := make([][]byte, r.ParityShards)
+ outputCount := 0
+
+ for iShard := 0; iShard < r.DataShards; iShard++ {
+ if len(shards[iShard]) == 0 {
+ if cap(shards[iShard]) >= shardSize {
+ shards[iShard] = shards[iShard][0:shardSize]
+ } else {
+ shards[iShard] = make([]byte, shardSize)
+ }
+ outputs[outputCount] = shards[iShard]
+ matrixRows[outputCount] = dataDecodeMatrix[iShard]
+ outputCount++
+ }
+ }
+ r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize)
+
+ if dataOnly {
+ // Exit out early if we are only interested in the data shards
+ return nil
+ }
+
+ // Now that we have all of the data shards intact, we can
+ // compute any of the parity that is missing.
+ //
+ // The input to the coding is ALL of the data shards, including
+ // any that we just calculated. The output is whichever of the
+ // data shards were missing.
+ outputCount = 0
+ for iShard := r.DataShards; iShard < r.Shards; iShard++ {
+ if len(shards[iShard]) == 0 {
+ if cap(shards[iShard]) >= shardSize {
+ shards[iShard] = shards[iShard][0:shardSize]
+ } else {
+ shards[iShard] = make([]byte, shardSize)
+ }
+ outputs[outputCount] = shards[iShard]
+ matrixRows[outputCount] = r.parity[iShard-r.DataShards]
+ outputCount++
+ }
+ }
+ r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
+ return nil
+}
+
+// ErrShortData will be returned by Split(), if there isn't enough data
+// to fill the number of shards.
+var ErrShortData = errors.New("not enough data to fill the number of requested shards")
+
+// Split a data slice into the number of shards given to the encoder,
+// and create empty parity shards if necessary.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't divisible by the number of shards,
+// the last shard will contain extra zeros.
+//
+// There must be at least 1 byte otherwise ErrShortData will be
+// returned.
+//
+// The data will not be copied, except for the last shard, so you
+// should not modify the data of the input slice afterwards.
+func (r *reedSolomon) Split(data []byte) ([][]byte, error) {
+ if len(data) == 0 {
+ return nil, ErrShortData
+ }
+ // Calculate number of bytes per data shard.
+ perShard := (len(data) + r.DataShards - 1) / r.DataShards
+
+ if cap(data) > len(data) {
+ data = data[:cap(data)]
+ }
+
+ // Only allocate memory if necessary
+ var padding []byte
+ if len(data) < (r.Shards * perShard) {
+ // calculate maximum number of full shards in `data` slice
+ fullShards := len(data) / perShard
+ padding = make([]byte, r.Shards*perShard-perShard*fullShards)
+ copy(padding, data[perShard*fullShards:])
+ data = data[0 : perShard*fullShards]
+ }
+
+ // Split into equal-length shards.
+ dst := make([][]byte, r.Shards)
+ i := 0
+ for ; i < len(dst) && len(data) >= perShard; i++ {
+ dst[i] = data[:perShard:perShard]
+ data = data[perShard:]
+ }
+
+ for j := 0; i+j < len(dst); j++ {
+ dst[i+j] = padding[:perShard:perShard]
+ padding = padding[perShard:]
+ }
+
+ return dst, nil
+}
+
+// ErrReconstructRequired is returned if too few data shards are intact and a
+// reconstruction is required before you can successfully join the shards.
+var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+// You must supply the exact output size you want.
+//
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+// If one or more required data shards are nil, ErrReconstructRequired will be returned.
+func (r *reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
+ // Do we have enough shards?
+ if len(shards) < r.DataShards {
+ return ErrTooFewShards
+ }
+ shards = shards[:r.DataShards]
+
+ // Do we have enough data?
+ size := 0
+ for _, shard := range shards {
+ if shard == nil {
+ return ErrReconstructRequired
+ }
+ size += len(shard)
+
+ // Do we have enough data already?
+ if size >= outSize {
+ break
+ }
+ }
+ if size < outSize {
+ return ErrShortData
+ }
+
+ // Copy data to dst
+ write := outSize
+ for _, shard := range shards {
+ if write < len(shard) {
+ _, err := dst.Write(shard[:write])
+ return err
+ }
+ n, err := dst.Write(shard)
+ if err != nil {
+ return err
+ }
+ write -= n
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/reedsolomon/streaming.go b/vendor/github.com/klauspost/reedsolomon/streaming.go
new file mode 100644
index 0000000..d048ba0
--- /dev/null
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
@@ -0,0 +1,603 @@
+/**
+ * Reed-Solomon Coding over 8-bit values.
+ *
+ * Copyright 2015, Klaus Post
+ * Copyright 2015, Backblaze, Inc.
+ */
+
+package reedsolomon
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "sync"
+)
+
+// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
+// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
+//
+// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
+// since the streaming interface has a start up overhead.
+//
+// For all operations, no readers and writers should not assume any order/size of
+// individual reads/writes.
+//
+// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
+// folder.
+type StreamEncoder interface {
+ // Encode parity shards for a set of data shards.
+ //
+ // Input is 'shards' containing readers for data shards followed by parity shards
+ // io.Writer.
+ //
+ // The number of shards must match the number given to NewStream().
+ //
+ // Each reader must supply the same number of bytes.
+ //
+ // The parity shards will be written to the writer.
+ // The number of bytes written will match the input size.
+ //
+ // If a data stream returns an error, a StreamReadError type error
+ // will be returned. If a parity writer returns an error, a
+ // StreamWriteError will be returned.
+ Encode(data []io.Reader, parity []io.Writer) error
+
+ // Verify returns true if the parity shards contain correct data.
+ //
+ // The number of shards must match the number total data+parity shards
+ // given to NewStream().
+ //
+ // Each reader must supply the same number of bytes.
+ // If a shard stream returns an error, a StreamReadError type error
+ // will be returned.
+ Verify(shards []io.Reader) (bool, error)
+
+ // Reconstruct will recreate the missing shards if possible.
+ //
+ // Given a list of valid shards (to read) and invalid shards (to write)
+ //
+ // You indicate that a shard is missing by setting it to nil in the 'valid'
+ // slice and at the same time setting a non-nil writer in "fill".
+ // An index cannot contain both non-nil 'valid' and 'fill' entry.
+ // If both are provided 'ErrReconstructMismatch' is returned.
+ //
+ // If there are too few shards to reconstruct the missing
+ // ones, ErrTooFewShards will be returned.
+ //
+ // The reconstructed shard set is complete, but integrity is not verified.
+ // Use the Verify function to check if data set is ok.
+ Reconstruct(valid []io.Reader, fill []io.Writer) error
+
+ // Split a an input stream into the number of shards given to the encoder.
+ //
+ // The data will be split into equally sized shards.
+ // If the data size isn't dividable by the number of shards,
+ // the last shard will contain extra zeros.
+ //
+ // You must supply the total size of your input.
+ // 'ErrShortData' will be returned if it is unable to retrieve the
+ // number of bytes indicated.
+ Split(data io.Reader, dst []io.Writer, size int64) (err error)
+
+ // Join the shards and write the data segment to dst.
+ //
+ // Only the data shards are considered.
+ //
+ // You must supply the exact output size you want.
+ // If there are to few shards given, ErrTooFewShards will be returned.
+ // If the total data size is less than outSize, ErrShortData will be returned.
+ Join(dst io.Writer, shards []io.Reader, outSize int64) error
+}
+
+// StreamReadError is returned when a read error is encountered
+// that relates to a supplied stream.
+// This will allow you to find out which reader has failed.
+type StreamReadError struct {
+ Err error // The error
+ Stream int // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamReadError) Error() string {
+ return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamReadError) String() string {
+ return s.Error()
+}
+
+// StreamWriteError is returned when a write error is encountered
+// that relates to a supplied stream. This will allow you to
+// find out which reader has failed.
+type StreamWriteError struct {
+ Err error // The error
+ Stream int // The stream number on which the error occurred
+}
+
+// Error returns the error as a string
+func (s StreamWriteError) Error() string {
+ return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
+}
+
+// String returns the error as a string
+func (s StreamWriteError) String() string {
+ return s.Error()
+}
+
+// rsStream contains a matrix for a specific
+// distribution of datashards and parity shards.
+// Construct if using NewStream()
+type rsStream struct {
+ r *reedSolomon
+ o options
+
+ // Shard reader
+ readShards func(dst [][]byte, in []io.Reader) error
+ // Shard writer
+ writeShards func(out []io.Writer, in [][]byte) error
+
+ blockPool sync.Pool
+}
+
+// NewStream creates a new encoder and initializes it to
+// the number of data shards and parity shards that
+// you want to use. You can reuse this encoder.
+// Note that the maximum number of data shards is 256.
+func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
+ r := rsStream{o: defaultOptions}
+ for _, opt := range o {
+ opt(&r.o)
+ }
+ // Override block size if shard size is set.
+ if r.o.streamBS == 0 && r.o.shardSize > 0 {
+ r.o.streamBS = r.o.shardSize
+ }
+ if r.o.streamBS <= 0 {
+ r.o.streamBS = 4 << 20
+ }
+ if r.o.shardSize == 0 && r.o.maxGoroutines == defaultOptions.maxGoroutines {
+ o = append(o, WithAutoGoroutines(r.o.streamBS))
+ }
+
+ enc, err := New(dataShards, parityShards, o...)
+ if err != nil {
+ return nil, err
+ }
+ r.r = enc.(*reedSolomon)
+
+ r.blockPool.New = func() interface{} {
+ out := make([][]byte, dataShards+parityShards)
+ for i := range out {
+ out[i] = make([]byte, r.o.streamBS)
+ }
+ return out
+ }
+ r.readShards = readShards
+ r.writeShards = writeShards
+ if r.o.concReads {
+ r.readShards = cReadShards
+ }
+ if r.o.concWrites {
+ r.writeShards = cWriteShards
+ }
+
+ return &r, err
+}
+
+// NewStreamC creates a new encoder and initializes it to
+// the number of data shards and parity shards given.
+//
+// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
+func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
+ return NewStream(dataShards, parityShards, append(o, WithConcurrentStreamReads(conReads), WithConcurrentStreamWrites(conWrites))...)
+}
+
+func (r *rsStream) createSlice() [][]byte {
+ out := r.blockPool.Get().([][]byte)
+ for i := range out {
+ out[i] = out[i][:r.o.streamBS]
+ }
+ return out
+}
+
+// Encodes parity shards for a set of data shards.
+//
+// Input is 'shards' containing readers for data shards followed by parity shards
+// io.Writer.
+//
+// The number of shards must match the number given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+//
+// The parity shards will be written to the writer.
+// The number of bytes written will match the input size.
+//
+// If a data stream returns an error, a StreamReadError type error
+// will be returned. If a parity writer returns an error, a
+// StreamWriteError will be returned.
+func (r *rsStream) Encode(data []io.Reader, parity []io.Writer) error {
+ if len(data) != r.r.DataShards {
+ return ErrTooFewShards
+ }
+
+ if len(parity) != r.r.ParityShards {
+ return ErrTooFewShards
+ }
+
+ all := r.createSlice()
+ defer r.blockPool.Put(all)
+ in := all[:r.r.DataShards]
+ out := all[r.r.DataShards:]
+ read := 0
+
+ for {
+ err := r.readShards(in, data)
+ switch err {
+ case nil:
+ case io.EOF:
+ if read == 0 {
+ return ErrShardNoData
+ }
+ return nil
+ default:
+ return err
+ }
+ out = trimShards(out, shardSize(in))
+ read += shardSize(in)
+ err = r.r.Encode(all)
+ if err != nil {
+ return err
+ }
+ err = r.writeShards(parity, out)
+ if err != nil {
+ return err
+ }
+ }
+}
+
+// Trim the shards so they are all the same size
+func trimShards(in [][]byte, size int) [][]byte {
+ for i := range in {
+ if len(in[i]) != 0 {
+ in[i] = in[i][0:size]
+ }
+ if len(in[i]) < size {
+ in[i] = in[i][:0]
+ }
+ }
+ return in
+}
+
+func readShards(dst [][]byte, in []io.Reader) error {
+ if len(in) != len(dst) {
+ panic("internal error: in and dst size do not match")
+ }
+ size := -1
+ for i := range in {
+ if in[i] == nil {
+ dst[i] = dst[i][:0]
+ continue
+ }
+ n, err := io.ReadFull(in[i], dst[i])
+ // The error is EOF only if no bytes were read.
+ // If an EOF happens after reading some but not all the bytes,
+ // ReadFull returns ErrUnexpectedEOF.
+ switch err {
+ case io.ErrUnexpectedEOF, io.EOF:
+ if size < 0 {
+ size = n
+ } else if n != size {
+ // Shard sizes must match.
+ return ErrShardSize
+ }
+ dst[i] = dst[i][0:n]
+ case nil:
+ continue
+ default:
+ return StreamReadError{Err: err, Stream: i}
+ }
+ }
+ if size == 0 {
+ return io.EOF
+ }
+ return nil
+}
+
+func writeShards(out []io.Writer, in [][]byte) error {
+ if len(out) != len(in) {
+ panic("internal error: in and out size do not match")
+ }
+ for i := range in {
+ if out[i] == nil {
+ continue
+ }
+ n, err := out[i].Write(in[i])
+ if err != nil {
+ return StreamWriteError{Err: err, Stream: i}
+ }
+ //
+ if n != len(in[i]) {
+ return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+ }
+ }
+ return nil
+}
+
+type readResult struct {
+ n int
+ size int
+ err error
+}
+
+// cReadShards reads shards concurrently
+func cReadShards(dst [][]byte, in []io.Reader) error {
+ if len(in) != len(dst) {
+ panic("internal error: in and dst size do not match")
+ }
+ var wg sync.WaitGroup
+ wg.Add(len(in))
+ res := make(chan readResult, len(in))
+ for i := range in {
+ if in[i] == nil {
+ dst[i] = dst[i][:0]
+ wg.Done()
+ continue
+ }
+ go func(i int) {
+ defer wg.Done()
+ n, err := io.ReadFull(in[i], dst[i])
+ // The error is EOF only if no bytes were read.
+ // If an EOF happens after reading some but not all the bytes,
+ // ReadFull returns ErrUnexpectedEOF.
+ res <- readResult{size: n, err: err, n: i}
+
+ }(i)
+ }
+ wg.Wait()
+ close(res)
+ size := -1
+ for r := range res {
+ switch r.err {
+ case io.ErrUnexpectedEOF, io.EOF:
+ if size < 0 {
+ size = r.size
+ } else if r.size != size {
+ // Shard sizes must match.
+ return ErrShardSize
+ }
+ dst[r.n] = dst[r.n][0:r.size]
+ case nil:
+ default:
+ return StreamReadError{Err: r.err, Stream: r.n}
+ }
+ }
+ if size == 0 {
+ return io.EOF
+ }
+ return nil
+}
+
+// cWriteShards writes shards concurrently
+func cWriteShards(out []io.Writer, in [][]byte) error {
+ if len(out) != len(in) {
+ panic("internal error: in and out size do not match")
+ }
+ var errs = make(chan error, len(out))
+ var wg sync.WaitGroup
+ wg.Add(len(out))
+ for i := range in {
+ go func(i int) {
+ defer wg.Done()
+ if out[i] == nil {
+ errs <- nil
+ return
+ }
+ n, err := out[i].Write(in[i])
+ if err != nil {
+ errs <- StreamWriteError{Err: err, Stream: i}
+ return
+ }
+ if n != len(in[i]) {
+ errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
+ }
+ }(i)
+ }
+ wg.Wait()
+ close(errs)
+ for err := range errs {
+ if err != nil {
+ return err
+ }
+ }
+
+ return nil
+}
+
+// Verify returns true if the parity shards contain correct data.
+//
+// The number of shards must match the number total data+parity shards
+// given to NewStream().
+//
+// Each reader must supply the same number of bytes.
+// If a shard stream returns an error, a StreamReadError type error
+// will be returned.
+func (r *rsStream) Verify(shards []io.Reader) (bool, error) {
+ if len(shards) != r.r.Shards {
+ return false, ErrTooFewShards
+ }
+
+ read := 0
+ all := r.createSlice()
+ defer r.blockPool.Put(all)
+ for {
+ err := r.readShards(all, shards)
+ if err == io.EOF {
+ if read == 0 {
+ return false, ErrShardNoData
+ }
+ return true, nil
+ }
+ if err != nil {
+ return false, err
+ }
+ read += shardSize(all)
+ ok, err := r.r.Verify(all)
+ if !ok || err != nil {
+ return ok, err
+ }
+ }
+}
+
+// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
+// "valid" and "fill" streams on the same index.
+// Therefore it is impossible to see if you consider the shard valid
+// or would like to have it reconstructed.
+var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
+
+// Reconstruct will recreate the missing shards if possible.
+//
+// Given a list of valid shards (to read) and invalid shards (to write)
+//
+// You indicate that a shard is missing by setting it to nil in the 'valid'
+// slice and at the same time setting a non-nil writer in "fill".
+// An index cannot contain both non-nil 'valid' and 'fill' entry.
+//
+// If there are too few shards to reconstruct the missing
+// ones, ErrTooFewShards will be returned.
+//
+// The reconstructed shard set is complete when explicitly asked for all missing shards.
+// However its integrity is not automatically verified.
+// Use the Verify function to check in case the data set is complete.
+func (r *rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
+ if len(valid) != r.r.Shards {
+ return ErrTooFewShards
+ }
+ if len(fill) != r.r.Shards {
+ return ErrTooFewShards
+ }
+
+ all := r.createSlice()
+ defer r.blockPool.Put(all)
+ reconDataOnly := true
+ for i := range valid {
+ if valid[i] != nil && fill[i] != nil {
+ return ErrReconstructMismatch
+ }
+ if i >= r.r.DataShards && fill[i] != nil {
+ reconDataOnly = false
+ }
+ }
+
+ read := 0
+ for {
+ err := r.readShards(all, valid)
+ if err == io.EOF {
+ if read == 0 {
+ return ErrShardNoData
+ }
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+ read += shardSize(all)
+ all = trimShards(all, shardSize(all))
+
+ if reconDataOnly {
+ err = r.r.ReconstructData(all) // just reconstruct missing data shards
+ } else {
+ err = r.r.Reconstruct(all) // reconstruct all missing shards
+ }
+ if err != nil {
+ return err
+ }
+ err = r.writeShards(fill, all)
+ if err != nil {
+ return err
+ }
+ }
+}
+
+// Join the shards and write the data segment to dst.
+//
+// Only the data shards are considered.
+//
+// You must supply the exact output size you want.
+// If there are to few shards given, ErrTooFewShards will be returned.
+// If the total data size is less than outSize, ErrShortData will be returned.
+func (r *rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
+ // Do we have enough shards?
+ if len(shards) < r.r.DataShards {
+ return ErrTooFewShards
+ }
+
+ // Trim off parity shards if any
+ shards = shards[:r.r.DataShards]
+ for i := range shards {
+ if shards[i] == nil {
+ return StreamReadError{Err: ErrShardNoData, Stream: i}
+ }
+ }
+ // Join all shards
+ src := io.MultiReader(shards...)
+
+ // Copy data to dst
+ n, err := io.CopyN(dst, src, outSize)
+ if err == io.EOF {
+ return ErrShortData
+ }
+ if err != nil {
+ return err
+ }
+ if n != outSize {
+ return ErrShortData
+ }
+ return nil
+}
+
+// Split a an input stream into the number of shards given to the encoder.
+//
+// The data will be split into equally sized shards.
+// If the data size isn't dividable by the number of shards,
+// the last shard will contain extra zeros.
+//
+// You must supply the total size of your input.
+// 'ErrShortData' will be returned if it is unable to retrieve the
+// number of bytes indicated.
+func (r *rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
+ if size == 0 {
+ return ErrShortData
+ }
+ if len(dst) != r.r.DataShards {
+ return ErrInvShardNum
+ }
+
+ for i := range dst {
+ if dst[i] == nil {
+ return StreamWriteError{Err: ErrShardNoData, Stream: i}
+ }
+ }
+
+ // Calculate number of bytes per shard.
+ perShard := (size + int64(r.r.DataShards) - 1) / int64(r.r.DataShards)
+
+ // Pad data to r.Shards*perShard.
+ padding := make([]byte, (int64(r.r.Shards)*perShard)-size)
+ data = io.MultiReader(data, bytes.NewBuffer(padding))
+
+ // Split into equal-length shards and copy.
+ for i := range dst {
+ n, err := io.CopyN(dst[i], data, perShard)
+ if err != io.EOF && err != nil {
+ return err
+ }
+ if n != perShard {
+ return ErrShortData
+ }
+ }
+
+ return nil
+}