diff --git a/go.mod b/go.mod index 6505848..565541e 100644 --- a/go.mod +++ b/go.mod @@ -12,10 +12,10 @@ require ( github.com/go-playground/universal-translator v0.18.1 // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/leodido/go-urn v1.2.1 // indirect - golang.org/x/crypto v0.21.0 // indirect + golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.18.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.33.0 // indirect ) diff --git a/go.sum b/go.sum index 4092899..89b7bbd 100644 --- a/go.sum +++ b/go.sum @@ -21,20 +21,20 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= -golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.5.0 h1:HuArIo48skDwlrvM3sEdHXElYslAMsf3KwRkkW4MC4s= golang.org/x/oauth2 v0.5.0/go.mod h1:9/XBHVqLaWO3/BRHs5jbpYCnOZVjj5V0ndyaAM7KB4I= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= -golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= diff --git a/vendor/golang.org/x/crypto/LICENSE b/vendor/golang.org/x/crypto/LICENSE index 6a66aea..2a7cf70 100644 --- a/vendor/golang.org/x/crypto/LICENSE +++ b/vendor/golang.org/x/crypto/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/crypto/sha3/doc.go b/vendor/golang.org/x/crypto/sha3/doc.go index decd8cf..bbf391f 100644 --- a/vendor/golang.org/x/crypto/sha3/doc.go +++ b/vendor/golang.org/x/crypto/sha3/doc.go @@ -5,6 +5,10 @@ // Package sha3 implements the SHA-3 fixed-output-length hash functions and // the SHAKE variable-output-length hash functions defined by FIPS-202. // +// All types in this package also implement [encoding.BinaryMarshaler], +// [encoding.BinaryAppender] and [encoding.BinaryUnmarshaler] to marshal and +// unmarshal the internal state of the hash. +// // Both types of hash function use the "sponge" construction and the Keccak // permutation. For a detailed specification see http://keccak.noekeon.org/ // @@ -59,4 +63,4 @@ // They produce output of the same length, with the same security strengths // against all attacks. This means, in particular, that SHA3-256 only has // 128-bit collision resistance, because its output length is 32 bytes. -package sha3 // import "golang.org/x/crypto/sha3" +package sha3 diff --git a/vendor/golang.org/x/crypto/sha3/hashes.go b/vendor/golang.org/x/crypto/sha3/hashes.go index 0d8043f..31fffbe 100644 --- a/vendor/golang.org/x/crypto/sha3/hashes.go +++ b/vendor/golang.org/x/crypto/sha3/hashes.go @@ -9,6 +9,7 @@ package sha3 // bytes. import ( + "crypto" "hash" ) @@ -16,53 +17,83 @@ import ( // Its generic security strength is 224 bits against preimage attacks, // and 112 bits against collision attacks. func New224() hash.Hash { - if h := new224Asm(); h != nil { - return h - } - return &state{rate: 144, outputLen: 28, dsbyte: 0x06} + return new224() } // New256 creates a new SHA3-256 hash. // Its generic security strength is 256 bits against preimage attacks, // and 128 bits against collision attacks. func New256() hash.Hash { - if h := new256Asm(); h != nil { - return h - } - return &state{rate: 136, outputLen: 32, dsbyte: 0x06} + return new256() } // New384 creates a new SHA3-384 hash. // Its generic security strength is 384 bits against preimage attacks, // and 192 bits against collision attacks. func New384() hash.Hash { - if h := new384Asm(); h != nil { - return h - } - return &state{rate: 104, outputLen: 48, dsbyte: 0x06} + return new384() } // New512 creates a new SHA3-512 hash. // Its generic security strength is 512 bits against preimage attacks, // and 256 bits against collision attacks. func New512() hash.Hash { - if h := new512Asm(); h != nil { - return h - } - return &state{rate: 72, outputLen: 64, dsbyte: 0x06} + return new512() +} + +func init() { + crypto.RegisterHash(crypto.SHA3_224, New224) + crypto.RegisterHash(crypto.SHA3_256, New256) + crypto.RegisterHash(crypto.SHA3_384, New384) + crypto.RegisterHash(crypto.SHA3_512, New512) +} + +const ( + dsbyteSHA3 = 0b00000110 + dsbyteKeccak = 0b00000001 + dsbyteShake = 0b00011111 + dsbyteCShake = 0b00000100 + + // rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in + // bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits. + rateK256 = (1600 - 256) / 8 + rateK448 = (1600 - 448) / 8 + rateK512 = (1600 - 512) / 8 + rateK768 = (1600 - 768) / 8 + rateK1024 = (1600 - 1024) / 8 +) + +func new224Generic() *state { + return &state{rate: rateK448, outputLen: 28, dsbyte: dsbyteSHA3} +} + +func new256Generic() *state { + return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteSHA3} +} + +func new384Generic() *state { + return &state{rate: rateK768, outputLen: 48, dsbyte: dsbyteSHA3} +} + +func new512Generic() *state { + return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteSHA3} } // NewLegacyKeccak256 creates a new Keccak-256 hash. // // Only use this function if you require compatibility with an existing cryptosystem // that uses non-standard padding. All other users should use New256 instead. -func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} } +func NewLegacyKeccak256() hash.Hash { + return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak} +} // NewLegacyKeccak512 creates a new Keccak-512 hash. // // Only use this function if you require compatibility with an existing cryptosystem // that uses non-standard padding. All other users should use New512 instead. -func NewLegacyKeccak512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x01} } +func NewLegacyKeccak512() hash.Hash { + return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak} +} // Sum224 returns the SHA3-224 digest of the data. func Sum224(data []byte) (digest [28]byte) { diff --git a/vendor/golang.org/x/crypto/sha3/hashes_generic.go b/vendor/golang.org/x/crypto/sha3/hashes_generic.go deleted file mode 100644 index fe8c847..0000000 --- a/vendor/golang.org/x/crypto/sha3/hashes_generic.go +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !gc || purego || !s390x - -package sha3 - -import ( - "hash" -) - -// new224Asm returns an assembly implementation of SHA3-224 if available, -// otherwise it returns nil. -func new224Asm() hash.Hash { return nil } - -// new256Asm returns an assembly implementation of SHA3-256 if available, -// otherwise it returns nil. -func new256Asm() hash.Hash { return nil } - -// new384Asm returns an assembly implementation of SHA3-384 if available, -// otherwise it returns nil. -func new384Asm() hash.Hash { return nil } - -// new512Asm returns an assembly implementation of SHA3-512 if available, -// otherwise it returns nil. -func new512Asm() hash.Hash { return nil } diff --git a/vendor/golang.org/x/crypto/sha3/hashes_noasm.go b/vendor/golang.org/x/crypto/sha3/hashes_noasm.go new file mode 100644 index 0000000..9d85fb6 --- /dev/null +++ b/vendor/golang.org/x/crypto/sha3/hashes_noasm.go @@ -0,0 +1,23 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !gc || purego || !s390x + +package sha3 + +func new224() *state { + return new224Generic() +} + +func new256() *state { + return new256Generic() +} + +func new384() *state { + return new384Generic() +} + +func new512() *state { + return new512Generic() +} diff --git a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s index 1f53938..99e2f16 100644 --- a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s +++ b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s @@ -1,390 +1,5419 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. +// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT. //go:build amd64 && !purego && gc -// This code was translated into a form compatible with 6a from the public -// domain sources at https://github.com/gvanas/KeccakCodePackage - -// Offsets in state -#define _ba (0*8) -#define _be (1*8) -#define _bi (2*8) -#define _bo (3*8) -#define _bu (4*8) -#define _ga (5*8) -#define _ge (6*8) -#define _gi (7*8) -#define _go (8*8) -#define _gu (9*8) -#define _ka (10*8) -#define _ke (11*8) -#define _ki (12*8) -#define _ko (13*8) -#define _ku (14*8) -#define _ma (15*8) -#define _me (16*8) -#define _mi (17*8) -#define _mo (18*8) -#define _mu (19*8) -#define _sa (20*8) -#define _se (21*8) -#define _si (22*8) -#define _so (23*8) -#define _su (24*8) - -// Temporary registers -#define rT1 AX - -// Round vars -#define rpState DI -#define rpStack SP - -#define rDa BX -#define rDe CX -#define rDi DX -#define rDo R8 -#define rDu R9 - -#define rBa R10 -#define rBe R11 -#define rBi R12 -#define rBo R13 -#define rBu R14 - -#define rCa SI -#define rCe BP -#define rCi rBi -#define rCo rBo -#define rCu R15 - -#define MOVQ_RBI_RCE MOVQ rBi, rCe -#define XORQ_RT1_RCA XORQ rT1, rCa -#define XORQ_RT1_RCE XORQ rT1, rCe -#define XORQ_RBA_RCU XORQ rBa, rCu -#define XORQ_RBE_RCU XORQ rBe, rCu -#define XORQ_RDU_RCU XORQ rDu, rCu -#define XORQ_RDA_RCA XORQ rDa, rCa -#define XORQ_RDE_RCE XORQ rDe, rCe - -#define mKeccakRound(iState, oState, rc, B_RBI_RCE, G_RT1_RCA, G_RT1_RCE, G_RBA_RCU, K_RT1_RCA, K_RT1_RCE, K_RBA_RCU, M_RT1_RCA, M_RT1_RCE, M_RBE_RCU, S_RDU_RCU, S_RDA_RCA, S_RDE_RCE) \ - /* Prepare round */ \ - MOVQ rCe, rDa; \ - ROLQ $1, rDa; \ - \ - MOVQ _bi(iState), rCi; \ - XORQ _gi(iState), rDi; \ - XORQ rCu, rDa; \ - XORQ _ki(iState), rCi; \ - XORQ _mi(iState), rDi; \ - XORQ rDi, rCi; \ - \ - MOVQ rCi, rDe; \ - ROLQ $1, rDe; \ - \ - MOVQ _bo(iState), rCo; \ - XORQ _go(iState), rDo; \ - XORQ rCa, rDe; \ - XORQ _ko(iState), rCo; \ - XORQ _mo(iState), rDo; \ - XORQ rDo, rCo; \ - \ - MOVQ rCo, rDi; \ - ROLQ $1, rDi; \ - \ - MOVQ rCu, rDo; \ - XORQ rCe, rDi; \ - ROLQ $1, rDo; \ - \ - MOVQ rCa, rDu; \ - XORQ rCi, rDo; \ - ROLQ $1, rDu; \ - \ - /* Result b */ \ - MOVQ _ba(iState), rBa; \ - MOVQ _ge(iState), rBe; \ - XORQ rCo, rDu; \ - MOVQ _ki(iState), rBi; \ - MOVQ _mo(iState), rBo; \ - MOVQ _su(iState), rBu; \ - XORQ rDe, rBe; \ - ROLQ $44, rBe; \ - XORQ rDi, rBi; \ - XORQ rDa, rBa; \ - ROLQ $43, rBi; \ - \ - MOVQ rBe, rCa; \ - MOVQ rc, rT1; \ - ORQ rBi, rCa; \ - XORQ rBa, rT1; \ - XORQ rT1, rCa; \ - MOVQ rCa, _ba(oState); \ - \ - XORQ rDu, rBu; \ - ROLQ $14, rBu; \ - MOVQ rBa, rCu; \ - ANDQ rBe, rCu; \ - XORQ rBu, rCu; \ - MOVQ rCu, _bu(oState); \ - \ - XORQ rDo, rBo; \ - ROLQ $21, rBo; \ - MOVQ rBo, rT1; \ - ANDQ rBu, rT1; \ - XORQ rBi, rT1; \ - MOVQ rT1, _bi(oState); \ - \ - NOTQ rBi; \ - ORQ rBa, rBu; \ - ORQ rBo, rBi; \ - XORQ rBo, rBu; \ - XORQ rBe, rBi; \ - MOVQ rBu, _bo(oState); \ - MOVQ rBi, _be(oState); \ - B_RBI_RCE; \ - \ - /* Result g */ \ - MOVQ _gu(iState), rBe; \ - XORQ rDu, rBe; \ - MOVQ _ka(iState), rBi; \ - ROLQ $20, rBe; \ - XORQ rDa, rBi; \ - ROLQ $3, rBi; \ - MOVQ _bo(iState), rBa; \ - MOVQ rBe, rT1; \ - ORQ rBi, rT1; \ - XORQ rDo, rBa; \ - MOVQ _me(iState), rBo; \ - MOVQ _si(iState), rBu; \ - ROLQ $28, rBa; \ - XORQ rBa, rT1; \ - MOVQ rT1, _ga(oState); \ - G_RT1_RCA; \ - \ - XORQ rDe, rBo; \ - ROLQ $45, rBo; \ - MOVQ rBi, rT1; \ - ANDQ rBo, rT1; \ - XORQ rBe, rT1; \ - MOVQ rT1, _ge(oState); \ - G_RT1_RCE; \ - \ - XORQ rDi, rBu; \ - ROLQ $61, rBu; \ - MOVQ rBu, rT1; \ - ORQ rBa, rT1; \ - XORQ rBo, rT1; \ - MOVQ rT1, _go(oState); \ - \ - ANDQ rBe, rBa; \ - XORQ rBu, rBa; \ - MOVQ rBa, _gu(oState); \ - NOTQ rBu; \ - G_RBA_RCU; \ - \ - ORQ rBu, rBo; \ - XORQ rBi, rBo; \ - MOVQ rBo, _gi(oState); \ - \ - /* Result k */ \ - MOVQ _be(iState), rBa; \ - MOVQ _gi(iState), rBe; \ - MOVQ _ko(iState), rBi; \ - MOVQ _mu(iState), rBo; \ - MOVQ _sa(iState), rBu; \ - XORQ rDi, rBe; \ - ROLQ $6, rBe; \ - XORQ rDo, rBi; \ - ROLQ $25, rBi; \ - MOVQ rBe, rT1; \ - ORQ rBi, rT1; \ - XORQ rDe, rBa; \ - ROLQ $1, rBa; \ - XORQ rBa, rT1; \ - MOVQ rT1, _ka(oState); \ - K_RT1_RCA; \ - \ - XORQ rDu, rBo; \ - ROLQ $8, rBo; \ - MOVQ rBi, rT1; \ - ANDQ rBo, rT1; \ - XORQ rBe, rT1; \ - MOVQ rT1, _ke(oState); \ - K_RT1_RCE; \ - \ - XORQ rDa, rBu; \ - ROLQ $18, rBu; \ - NOTQ rBo; \ - MOVQ rBo, rT1; \ - ANDQ rBu, rT1; \ - XORQ rBi, rT1; \ - MOVQ rT1, _ki(oState); \ - \ - MOVQ rBu, rT1; \ - ORQ rBa, rT1; \ - XORQ rBo, rT1; \ - MOVQ rT1, _ko(oState); \ - \ - ANDQ rBe, rBa; \ - XORQ rBu, rBa; \ - MOVQ rBa, _ku(oState); \ - K_RBA_RCU; \ - \ - /* Result m */ \ - MOVQ _ga(iState), rBe; \ - XORQ rDa, rBe; \ - MOVQ _ke(iState), rBi; \ - ROLQ $36, rBe; \ - XORQ rDe, rBi; \ - MOVQ _bu(iState), rBa; \ - ROLQ $10, rBi; \ - MOVQ rBe, rT1; \ - MOVQ _mi(iState), rBo; \ - ANDQ rBi, rT1; \ - XORQ rDu, rBa; \ - MOVQ _so(iState), rBu; \ - ROLQ $27, rBa; \ - XORQ rBa, rT1; \ - MOVQ rT1, _ma(oState); \ - M_RT1_RCA; \ - \ - XORQ rDi, rBo; \ - ROLQ $15, rBo; \ - MOVQ rBi, rT1; \ - ORQ rBo, rT1; \ - XORQ rBe, rT1; \ - MOVQ rT1, _me(oState); \ - M_RT1_RCE; \ - \ - XORQ rDo, rBu; \ - ROLQ $56, rBu; \ - NOTQ rBo; \ - MOVQ rBo, rT1; \ - ORQ rBu, rT1; \ - XORQ rBi, rT1; \ - MOVQ rT1, _mi(oState); \ - \ - ORQ rBa, rBe; \ - XORQ rBu, rBe; \ - MOVQ rBe, _mu(oState); \ - \ - ANDQ rBa, rBu; \ - XORQ rBo, rBu; \ - MOVQ rBu, _mo(oState); \ - M_RBE_RCU; \ - \ - /* Result s */ \ - MOVQ _bi(iState), rBa; \ - MOVQ _go(iState), rBe; \ - MOVQ _ku(iState), rBi; \ - XORQ rDi, rBa; \ - MOVQ _ma(iState), rBo; \ - ROLQ $62, rBa; \ - XORQ rDo, rBe; \ - MOVQ _se(iState), rBu; \ - ROLQ $55, rBe; \ - \ - XORQ rDu, rBi; \ - MOVQ rBa, rDu; \ - XORQ rDe, rBu; \ - ROLQ $2, rBu; \ - ANDQ rBe, rDu; \ - XORQ rBu, rDu; \ - MOVQ rDu, _su(oState); \ - \ - ROLQ $39, rBi; \ - S_RDU_RCU; \ - NOTQ rBe; \ - XORQ rDa, rBo; \ - MOVQ rBe, rDa; \ - ANDQ rBi, rDa; \ - XORQ rBa, rDa; \ - MOVQ rDa, _sa(oState); \ - S_RDA_RCA; \ - \ - ROLQ $41, rBo; \ - MOVQ rBi, rDe; \ - ORQ rBo, rDe; \ - XORQ rBe, rDe; \ - MOVQ rDe, _se(oState); \ - S_RDE_RCE; \ - \ - MOVQ rBo, rDi; \ - MOVQ rBu, rDo; \ - ANDQ rBu, rDi; \ - ORQ rBa, rDo; \ - XORQ rBi, rDi; \ - XORQ rBo, rDo; \ - MOVQ rDi, _si(oState); \ - MOVQ rDo, _so(oState) \ - // func keccakF1600(a *[25]uint64) -TEXT ·keccakF1600(SB), 0, $200-8 - MOVQ a+0(FP), rpState +TEXT ·keccakF1600(SB), $200-8 + MOVQ a+0(FP), DI // Convert the user state into an internal state - NOTQ _be(rpState) - NOTQ _bi(rpState) - NOTQ _go(rpState) - NOTQ _ki(rpState) - NOTQ _mi(rpState) - NOTQ _sa(rpState) + NOTQ 8(DI) + NOTQ 16(DI) + NOTQ 64(DI) + NOTQ 96(DI) + NOTQ 136(DI) + NOTQ 160(DI) // Execute the KeccakF permutation - MOVQ _ba(rpState), rCa - MOVQ _be(rpState), rCe - MOVQ _bu(rpState), rCu - - XORQ _ga(rpState), rCa - XORQ _ge(rpState), rCe - XORQ _gu(rpState), rCu - - XORQ _ka(rpState), rCa - XORQ _ke(rpState), rCe - XORQ _ku(rpState), rCu - - XORQ _ma(rpState), rCa - XORQ _me(rpState), rCe - XORQ _mu(rpState), rCu - - XORQ _sa(rpState), rCa - XORQ _se(rpState), rCe - MOVQ _si(rpState), rDi - MOVQ _so(rpState), rDo - XORQ _su(rpState), rCu - - mKeccakRound(rpState, rpStack, $0x0000000000000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x0000000000008082, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x800000000000808a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000080008000, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000000000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000000000008a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x0000000000000088, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x0000000080008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x000000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000008000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x800000000000008b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000000008089, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000008003, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000000008002, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000000080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x000000000000800a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x800000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000000008080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpState, rpStack, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE) - mKeccakRound(rpStack, rpState, $0x8000000080008008, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP) + MOVQ (DI), SI + MOVQ 8(DI), BP + MOVQ 32(DI), R15 + XORQ 40(DI), SI + XORQ 48(DI), BP + XORQ 72(DI), R15 + XORQ 80(DI), SI + XORQ 88(DI), BP + XORQ 112(DI), R15 + XORQ 120(DI), SI + XORQ 128(DI), BP + XORQ 152(DI), R15 + XORQ 160(DI), SI + XORQ 168(DI), BP + MOVQ 176(DI), DX + MOVQ 184(DI), R8 + XORQ 192(DI), R15 - // Revert the internal state to the user state - NOTQ _be(rpState) - NOTQ _bi(rpState) - NOTQ _go(rpState) - NOTQ _ki(rpState) - NOTQ _mi(rpState) - NOTQ _sa(rpState) + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000008082, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000000000808a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008000, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000808b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008081, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008009, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000008a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000000000088, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080008009, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000008000000a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000008000808b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000000000008b, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008089, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008003, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008002, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000000080, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x000000000000800a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x800000008000000a, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008081, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000000008080, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + MOVQ R12, BP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + XORQ R10, R15 + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + XORQ R11, R15 + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(DI), R12 + XORQ 56(DI), DX + XORQ R15, BX + XORQ 96(DI), R12 + XORQ 136(DI), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(DI), R13 + XORQ 64(DI), R8 + XORQ SI, CX + XORQ 104(DI), R13 + XORQ 144(DI), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (DI), R10 + MOVQ 48(DI), R11 + XORQ R13, R9 + MOVQ 96(DI), R12 + MOVQ 144(DI), R13 + MOVQ 192(DI), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x0000000080000001, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (SP) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(SP) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(SP) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(SP) + MOVQ R12, 8(SP) + MOVQ R12, BP + + // Result g + MOVQ 72(DI), R11 + XORQ R9, R11 + MOVQ 80(DI), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(DI), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(DI), R13 + MOVQ 176(DI), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(SP) + XORQ AX, SI + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(SP) + XORQ AX, BP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(SP) + NOTQ R14 + XORQ R10, R15 + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(SP) + + // Result k + MOVQ 8(DI), R10 + MOVQ 56(DI), R11 + MOVQ 104(DI), R12 + MOVQ 152(DI), R13 + MOVQ 160(DI), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(SP) + XORQ AX, SI + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(SP) + XORQ AX, BP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(SP) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(SP) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(SP) + XORQ R10, R15 + + // Result m + MOVQ 40(DI), R11 + XORQ BX, R11 + MOVQ 88(DI), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(DI), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(DI), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(DI), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(SP) + XORQ AX, SI + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(SP) + XORQ AX, BP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(SP) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(SP) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(SP) + XORQ R11, R15 + + // Result s + MOVQ 16(DI), R10 + MOVQ 64(DI), R11 + MOVQ 112(DI), R12 + XORQ DX, R10 + MOVQ 120(DI), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(DI), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(SP) + ROLQ $0x27, R12 + XORQ R9, R15 + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(SP) + XORQ BX, SI + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(SP) + XORQ CX, BP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(SP) + MOVQ R8, 184(SP) + + // Prepare round + MOVQ BP, BX + ROLQ $0x01, BX + MOVQ 16(SP), R12 + XORQ 56(SP), DX + XORQ R15, BX + XORQ 96(SP), R12 + XORQ 136(SP), DX + XORQ DX, R12 + MOVQ R12, CX + ROLQ $0x01, CX + MOVQ 24(SP), R13 + XORQ 64(SP), R8 + XORQ SI, CX + XORQ 104(SP), R13 + XORQ 144(SP), R8 + XORQ R8, R13 + MOVQ R13, DX + ROLQ $0x01, DX + MOVQ R15, R8 + XORQ BP, DX + ROLQ $0x01, R8 + MOVQ SI, R9 + XORQ R12, R8 + ROLQ $0x01, R9 + + // Result b + MOVQ (SP), R10 + MOVQ 48(SP), R11 + XORQ R13, R9 + MOVQ 96(SP), R12 + MOVQ 144(SP), R13 + MOVQ 192(SP), R14 + XORQ CX, R11 + ROLQ $0x2c, R11 + XORQ DX, R12 + XORQ BX, R10 + ROLQ $0x2b, R12 + MOVQ R11, SI + MOVQ $0x8000000080008008, AX + ORQ R12, SI + XORQ R10, AX + XORQ AX, SI + MOVQ SI, (DI) + XORQ R9, R14 + ROLQ $0x0e, R14 + MOVQ R10, R15 + ANDQ R11, R15 + XORQ R14, R15 + MOVQ R15, 32(DI) + XORQ R8, R13 + ROLQ $0x15, R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 16(DI) + NOTQ R12 + ORQ R10, R14 + ORQ R13, R12 + XORQ R13, R14 + XORQ R11, R12 + MOVQ R14, 24(DI) + MOVQ R12, 8(DI) + NOP + + // Result g + MOVQ 72(SP), R11 + XORQ R9, R11 + MOVQ 80(SP), R12 + ROLQ $0x14, R11 + XORQ BX, R12 + ROLQ $0x03, R12 + MOVQ 24(SP), R10 + MOVQ R11, AX + ORQ R12, AX + XORQ R8, R10 + MOVQ 128(SP), R13 + MOVQ 176(SP), R14 + ROLQ $0x1c, R10 + XORQ R10, AX + MOVQ AX, 40(DI) + NOP + XORQ CX, R13 + ROLQ $0x2d, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 48(DI) + NOP + XORQ DX, R14 + ROLQ $0x3d, R14 + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 64(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 72(DI) + NOTQ R14 + NOP + ORQ R14, R13 + XORQ R12, R13 + MOVQ R13, 56(DI) + + // Result k + MOVQ 8(SP), R10 + MOVQ 56(SP), R11 + MOVQ 104(SP), R12 + MOVQ 152(SP), R13 + MOVQ 160(SP), R14 + XORQ DX, R11 + ROLQ $0x06, R11 + XORQ R8, R12 + ROLQ $0x19, R12 + MOVQ R11, AX + ORQ R12, AX + XORQ CX, R10 + ROLQ $0x01, R10 + XORQ R10, AX + MOVQ AX, 80(DI) + NOP + XORQ R9, R13 + ROLQ $0x08, R13 + MOVQ R12, AX + ANDQ R13, AX + XORQ R11, AX + MOVQ AX, 88(DI) + NOP + XORQ BX, R14 + ROLQ $0x12, R14 + NOTQ R13 + MOVQ R13, AX + ANDQ R14, AX + XORQ R12, AX + MOVQ AX, 96(DI) + MOVQ R14, AX + ORQ R10, AX + XORQ R13, AX + MOVQ AX, 104(DI) + ANDQ R11, R10 + XORQ R14, R10 + MOVQ R10, 112(DI) + NOP + + // Result m + MOVQ 40(SP), R11 + XORQ BX, R11 + MOVQ 88(SP), R12 + ROLQ $0x24, R11 + XORQ CX, R12 + MOVQ 32(SP), R10 + ROLQ $0x0a, R12 + MOVQ R11, AX + MOVQ 136(SP), R13 + ANDQ R12, AX + XORQ R9, R10 + MOVQ 184(SP), R14 + ROLQ $0x1b, R10 + XORQ R10, AX + MOVQ AX, 120(DI) + NOP + XORQ DX, R13 + ROLQ $0x0f, R13 + MOVQ R12, AX + ORQ R13, AX + XORQ R11, AX + MOVQ AX, 128(DI) + NOP + XORQ R8, R14 + ROLQ $0x38, R14 + NOTQ R13 + MOVQ R13, AX + ORQ R14, AX + XORQ R12, AX + MOVQ AX, 136(DI) + ORQ R10, R11 + XORQ R14, R11 + MOVQ R11, 152(DI) + ANDQ R10, R14 + XORQ R13, R14 + MOVQ R14, 144(DI) + NOP + + // Result s + MOVQ 16(SP), R10 + MOVQ 64(SP), R11 + MOVQ 112(SP), R12 + XORQ DX, R10 + MOVQ 120(SP), R13 + ROLQ $0x3e, R10 + XORQ R8, R11 + MOVQ 168(SP), R14 + ROLQ $0x37, R11 + XORQ R9, R12 + MOVQ R10, R9 + XORQ CX, R14 + ROLQ $0x02, R14 + ANDQ R11, R9 + XORQ R14, R9 + MOVQ R9, 192(DI) + ROLQ $0x27, R12 + NOP + NOTQ R11 + XORQ BX, R13 + MOVQ R11, BX + ANDQ R12, BX + XORQ R10, BX + MOVQ BX, 160(DI) + NOP + ROLQ $0x29, R13 + MOVQ R12, CX + ORQ R13, CX + XORQ R11, CX + MOVQ CX, 168(DI) + NOP + MOVQ R13, DX + MOVQ R14, R8 + ANDQ R14, DX + ORQ R10, R8 + XORQ R12, DX + XORQ R13, R8 + MOVQ DX, 176(DI) + MOVQ R8, 184(DI) + + // Revert the internal state to the user state + NOTQ 8(DI) + NOTQ 16(DI) + NOTQ 64(DI) + NOTQ 96(DI) + NOTQ 136(DI) + NOTQ 160(DI) RET diff --git a/vendor/golang.org/x/crypto/sha3/register.go b/vendor/golang.org/x/crypto/sha3/register.go deleted file mode 100644 index addfd50..0000000 --- a/vendor/golang.org/x/crypto/sha3/register.go +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.4 - -package sha3 - -import ( - "crypto" -) - -func init() { - crypto.RegisterHash(crypto.SHA3_224, New224) - crypto.RegisterHash(crypto.SHA3_256, New256) - crypto.RegisterHash(crypto.SHA3_384, New384) - crypto.RegisterHash(crypto.SHA3_512, New512) -} diff --git a/vendor/golang.org/x/crypto/sha3/sha3.go b/vendor/golang.org/x/crypto/sha3/sha3.go index 4884d17..6658c44 100644 --- a/vendor/golang.org/x/crypto/sha3/sha3.go +++ b/vendor/golang.org/x/crypto/sha3/sha3.go @@ -4,6 +4,15 @@ package sha3 +import ( + "crypto/subtle" + "encoding/binary" + "errors" + "unsafe" + + "golang.org/x/sys/cpu" +) + // spongeDirection indicates the direction bytes are flowing through the sponge. type spongeDirection int @@ -14,17 +23,13 @@ const ( spongeSqueezing ) -const ( - // maxRate is the maximum size of the internal buffer. SHAKE-256 - // currently needs the largest buffer. - maxRate = 168 -) - type state struct { - // Generic sponge components. - a [25]uint64 // main state of the hash - buf []byte // points into storage - rate int // the number of bytes of state to use + a [1600 / 8]byte // main state of the hash + + // a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR + // into before running the permutation. If squeezing, it's the remaining + // output to produce before running the permutation. + n, rate int // dsbyte contains the "domain separation" bits and the first bit of // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the @@ -40,9 +45,6 @@ type state struct { // Extendable-Output Functions (May 2014)" dsbyte byte - storage storageBuf - - // Specific to SHA-3 and SHAKE. outputLen int // the default output size in bytes state spongeDirection // whether the sponge is absorbing or squeezing } @@ -54,103 +56,77 @@ func (d *state) BlockSize() int { return d.rate } func (d *state) Size() int { return d.outputLen } // Reset clears the internal state by zeroing the sponge state and -// the byte buffer, and setting Sponge.state to absorbing. +// the buffer indexes, and setting Sponge.state to absorbing. func (d *state) Reset() { // Zero the permutation's state. for i := range d.a { d.a[i] = 0 } d.state = spongeAbsorbing - d.buf = d.storage.asBytes()[:0] + d.n = 0 } func (d *state) clone() *state { ret := *d - if ret.state == spongeAbsorbing { - ret.buf = ret.storage.asBytes()[:len(ret.buf)] - } else { - ret.buf = ret.storage.asBytes()[d.rate-cap(d.buf) : d.rate] - } - return &ret } -// permute applies the KeccakF-1600 permutation. It handles -// any input-output buffering. +// permute applies the KeccakF-1600 permutation. func (d *state) permute() { - switch d.state { - case spongeAbsorbing: - // If we're absorbing, we need to xor the input into the state - // before applying the permutation. - xorIn(d, d.buf) - d.buf = d.storage.asBytes()[:0] - keccakF1600(&d.a) - case spongeSqueezing: - // If we're squeezing, we need to apply the permutation before - // copying more output. - keccakF1600(&d.a) - d.buf = d.storage.asBytes()[:d.rate] - copyOut(d, d.buf) + var a *[25]uint64 + if cpu.IsBigEndian { + a = new([25]uint64) + for i := range a { + a[i] = binary.LittleEndian.Uint64(d.a[i*8:]) + } + } else { + a = (*[25]uint64)(unsafe.Pointer(&d.a)) + } + + keccakF1600(a) + d.n = 0 + + if cpu.IsBigEndian { + for i := range a { + binary.LittleEndian.PutUint64(d.a[i*8:], a[i]) + } } } // pads appends the domain separation bits in dsbyte, applies // the multi-bitrate 10..1 padding rule, and permutes the state. -func (d *state) padAndPermute(dsbyte byte) { - if d.buf == nil { - d.buf = d.storage.asBytes()[:0] - } +func (d *state) padAndPermute() { // Pad with this instance's domain-separator bits. We know that there's - // at least one byte of space in d.buf because, if it were full, + // at least one byte of space in the sponge because, if it were full, // permute would have been called to empty it. dsbyte also contains the // first one bit for the padding. See the comment in the state struct. - d.buf = append(d.buf, dsbyte) - zerosStart := len(d.buf) - d.buf = d.storage.asBytes()[:d.rate] - for i := zerosStart; i < d.rate; i++ { - d.buf[i] = 0 - } + d.a[d.n] ^= d.dsbyte // This adds the final one bit for the padding. Because of the way that // bits are numbered from the LSB upwards, the final bit is the MSB of // the last byte. - d.buf[d.rate-1] ^= 0x80 + d.a[d.rate-1] ^= 0x80 // Apply the permutation d.permute() d.state = spongeSqueezing - d.buf = d.storage.asBytes()[:d.rate] - copyOut(d, d.buf) } // Write absorbs more data into the hash's state. It panics if any // output has already been read. -func (d *state) Write(p []byte) (written int, err error) { +func (d *state) Write(p []byte) (n int, err error) { if d.state != spongeAbsorbing { panic("sha3: Write after Read") } - if d.buf == nil { - d.buf = d.storage.asBytes()[:0] - } - written = len(p) + + n = len(p) for len(p) > 0 { - if len(d.buf) == 0 && len(p) >= d.rate { - // The fast path; absorb a full "rate" bytes of input and apply the permutation. - xorIn(d, p[:d.rate]) - p = p[d.rate:] - keccakF1600(&d.a) - } else { - // The slow path; buffer the input until we can fill the sponge, and then xor it in. - todo := d.rate - len(d.buf) - if todo > len(p) { - todo = len(p) - } - d.buf = append(d.buf, p[:todo]...) - p = p[todo:] - - // If the sponge is full, apply the permutation. - if len(d.buf) == d.rate { - d.permute() - } + x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p) + d.n += x + p = p[x:] + + // If the sponge is full, apply the permutation. + if d.n == d.rate { + d.permute() } } @@ -161,21 +137,21 @@ func (d *state) Write(p []byte) (written int, err error) { func (d *state) Read(out []byte) (n int, err error) { // If we're still absorbing, pad and apply the permutation. if d.state == spongeAbsorbing { - d.padAndPermute(d.dsbyte) + d.padAndPermute() } n = len(out) // Now, do the squeezing. for len(out) > 0 { - n := copy(out, d.buf) - d.buf = d.buf[n:] - out = out[n:] - // Apply the permutation if we've squeezed the sponge dry. - if len(d.buf) == 0 { + if d.n == d.rate { d.permute() } + + x := copy(out, d.a[d.n:d.rate]) + d.n += x + out = out[x:] } return @@ -195,3 +171,74 @@ func (d *state) Sum(in []byte) []byte { dup.Read(hash) return append(in, hash...) } + +const ( + magicSHA3 = "sha\x08" + magicShake = "sha\x09" + magicCShake = "sha\x0a" + magicKeccak = "sha\x0b" + // magic || rate || main state || n || sponge direction + marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1 +) + +func (d *state) MarshalBinary() ([]byte, error) { + return d.AppendBinary(make([]byte, 0, marshaledSize)) +} + +func (d *state) AppendBinary(b []byte) ([]byte, error) { + switch d.dsbyte { + case dsbyteSHA3: + b = append(b, magicSHA3...) + case dsbyteShake: + b = append(b, magicShake...) + case dsbyteCShake: + b = append(b, magicCShake...) + case dsbyteKeccak: + b = append(b, magicKeccak...) + default: + panic("unknown dsbyte") + } + // rate is at most 168, and n is at most rate. + b = append(b, byte(d.rate)) + b = append(b, d.a[:]...) + b = append(b, byte(d.n), byte(d.state)) + return b, nil +} + +func (d *state) UnmarshalBinary(b []byte) error { + if len(b) != marshaledSize { + return errors.New("sha3: invalid hash state") + } + + magic := string(b[:len(magicSHA3)]) + b = b[len(magicSHA3):] + switch { + case magic == magicSHA3 && d.dsbyte == dsbyteSHA3: + case magic == magicShake && d.dsbyte == dsbyteShake: + case magic == magicCShake && d.dsbyte == dsbyteCShake: + case magic == magicKeccak && d.dsbyte == dsbyteKeccak: + default: + return errors.New("sha3: invalid hash state identifier") + } + + rate := int(b[0]) + b = b[1:] + if rate != d.rate { + return errors.New("sha3: invalid hash state function") + } + + copy(d.a[:], b) + b = b[len(d.a):] + + n, state := int(b[0]), spongeDirection(b[1]) + if n > d.rate { + return errors.New("sha3: invalid hash state") + } + d.n = n + if state != spongeAbsorbing && state != spongeSqueezing { + return errors.New("sha3: invalid hash state") + } + d.state = state + + return nil +} diff --git a/vendor/golang.org/x/crypto/sha3/sha3_s390x.go b/vendor/golang.org/x/crypto/sha3/sha3_s390x.go index d861bca..00d8034 100644 --- a/vendor/golang.org/x/crypto/sha3/sha3_s390x.go +++ b/vendor/golang.org/x/crypto/sha3/sha3_s390x.go @@ -143,6 +143,12 @@ func (s *asmState) Write(b []byte) (int, error) { // Read squeezes an arbitrary number of bytes from the sponge. func (s *asmState) Read(out []byte) (n int, err error) { + // The 'compute last message digest' instruction only stores the digest + // at the first operand (dst) for SHAKE functions. + if s.function != shake_128 && s.function != shake_256 { + panic("sha3: can only call Read for SHAKE functions") + } + n = len(out) // need to pad if we were absorbing @@ -202,8 +208,17 @@ func (s *asmState) Sum(b []byte) []byte { // Hash the buffer. Note that we don't clear it because we // aren't updating the state. - klmd(s.function, &a, nil, s.buf) - return append(b, a[:s.outputLen]...) + switch s.function { + case sha3_224, sha3_256, sha3_384, sha3_512: + klmd(s.function, &a, nil, s.buf) + return append(b, a[:s.outputLen]...) + case shake_128, shake_256: + d := make([]byte, s.outputLen, 64) + klmd(s.function, &a, d, s.buf) + return append(b, d[:s.outputLen]...) + default: + panic("sha3: unknown function") + } } // Reset resets the Hash to its initial state. @@ -233,56 +248,56 @@ func (s *asmState) Clone() ShakeHash { return s.clone() } -// new224Asm returns an assembly implementation of SHA3-224 if available, -// otherwise it returns nil. -func new224Asm() hash.Hash { +// new224 returns an assembly implementation of SHA3-224 if available, +// otherwise it returns a generic implementation. +func new224() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_224) } - return nil + return new224Generic() } -// new256Asm returns an assembly implementation of SHA3-256 if available, -// otherwise it returns nil. -func new256Asm() hash.Hash { +// new256 returns an assembly implementation of SHA3-256 if available, +// otherwise it returns a generic implementation. +func new256() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_256) } - return nil + return new256Generic() } -// new384Asm returns an assembly implementation of SHA3-384 if available, -// otherwise it returns nil. -func new384Asm() hash.Hash { +// new384 returns an assembly implementation of SHA3-384 if available, +// otherwise it returns a generic implementation. +func new384() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_384) } - return nil + return new384Generic() } -// new512Asm returns an assembly implementation of SHA3-512 if available, -// otherwise it returns nil. -func new512Asm() hash.Hash { +// new512 returns an assembly implementation of SHA3-512 if available, +// otherwise it returns a generic implementation. +func new512() hash.Hash { if cpu.S390X.HasSHA3 { return newAsmState(sha3_512) } - return nil + return new512Generic() } -// newShake128Asm returns an assembly implementation of SHAKE-128 if available, -// otherwise it returns nil. -func newShake128Asm() ShakeHash { +// newShake128 returns an assembly implementation of SHAKE-128 if available, +// otherwise it returns a generic implementation. +func newShake128() ShakeHash { if cpu.S390X.HasSHA3 { return newAsmState(shake_128) } - return nil + return newShake128Generic() } -// newShake256Asm returns an assembly implementation of SHAKE-256 if available, -// otherwise it returns nil. -func newShake256Asm() ShakeHash { +// newShake256 returns an assembly implementation of SHAKE-256 if available, +// otherwise it returns a generic implementation. +func newShake256() ShakeHash { if cpu.S390X.HasSHA3 { return newAsmState(shake_256) } - return nil + return newShake256Generic() } diff --git a/vendor/golang.org/x/crypto/sha3/shake.go b/vendor/golang.org/x/crypto/sha3/shake.go index bb69984..a6b3a42 100644 --- a/vendor/golang.org/x/crypto/sha3/shake.go +++ b/vendor/golang.org/x/crypto/sha3/shake.go @@ -16,9 +16,12 @@ package sha3 // [2] https://doi.org/10.6028/NIST.SP.800-185 import ( + "bytes" "encoding/binary" + "errors" "hash" "io" + "math/bits" ) // ShakeHash defines the interface to hash functions that support @@ -50,44 +53,36 @@ type cshakeState struct { initBlock []byte } -// Consts for configuring initial SHA-3 state -const ( - dsbyteShake = 0x1f - dsbyteCShake = 0x04 - rate128 = 168 - rate256 = 136 -) +func bytepad(data []byte, rate int) []byte { + out := make([]byte, 0, 9+len(data)+rate-1) + out = append(out, leftEncode(uint64(rate))...) + out = append(out, data...) + if padlen := rate - len(out)%rate; padlen < rate { + out = append(out, make([]byte, padlen)...) + } + return out +} -func bytepad(input []byte, w int) []byte { - // leftEncode always returns max 9 bytes - buf := make([]byte, 0, 9+len(input)+w) - buf = append(buf, leftEncode(uint64(w))...) - buf = append(buf, input...) - padlen := w - (len(buf) % w) - return append(buf, make([]byte, padlen)...) -} - -func leftEncode(value uint64) []byte { - var b [9]byte - binary.BigEndian.PutUint64(b[1:], value) - // Trim all but last leading zero bytes - i := byte(1) - for i < 8 && b[i] == 0 { - i++ +func leftEncode(x uint64) []byte { + // Let n be the smallest positive integer for which 2^(8n) > x. + n := (bits.Len64(x) + 7) / 8 + if n == 0 { + n = 1 } - // Prepend number of encoded bytes - b[i-1] = 9 - i - return b[i-1:] + // Return n || x with n as a byte and x an n bytes in big-endian order. + b := make([]byte, 9) + binary.BigEndian.PutUint64(b[1:], x) + b = b[9-n-1:] + b[0] = byte(n) + return b } func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash { c := cshakeState{state: &state{rate: rate, outputLen: outputLen, dsbyte: dsbyte}} - - // leftEncode returns max 9 bytes - c.initBlock = make([]byte, 0, 9*2+len(N)+len(S)) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...) + c.initBlock = make([]byte, 0, 9+len(N)+9+len(S)) // leftEncode returns max 9 bytes + c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...) c.initBlock = append(c.initBlock, N...) - c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...) + c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...) c.initBlock = append(c.initBlock, S...) c.Write(bytepad(c.initBlock, c.rate)) return &c @@ -111,24 +106,50 @@ func (c *state) Clone() ShakeHash { return c.clone() } +func (c *cshakeState) MarshalBinary() ([]byte, error) { + return c.AppendBinary(make([]byte, 0, marshaledSize+len(c.initBlock))) +} + +func (c *cshakeState) AppendBinary(b []byte) ([]byte, error) { + b, err := c.state.AppendBinary(b) + if err != nil { + return nil, err + } + b = append(b, c.initBlock...) + return b, nil +} + +func (c *cshakeState) UnmarshalBinary(b []byte) error { + if len(b) <= marshaledSize { + return errors.New("sha3: invalid hash state") + } + if err := c.state.UnmarshalBinary(b[:marshaledSize]); err != nil { + return err + } + c.initBlock = bytes.Clone(b[marshaledSize:]) + return nil +} + // NewShake128 creates a new SHAKE128 variable-output-length ShakeHash. // Its generic security strength is 128 bits against all attacks if at // least 32 bytes of its output are used. func NewShake128() ShakeHash { - if h := newShake128Asm(); h != nil { - return h - } - return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake} + return newShake128() } // NewShake256 creates a new SHAKE256 variable-output-length ShakeHash. // Its generic security strength is 256 bits against all attacks if // at least 64 bytes of its output are used. func NewShake256() ShakeHash { - if h := newShake256Asm(); h != nil { - return h - } - return &state{rate: rate256, outputLen: 64, dsbyte: dsbyteShake} + return newShake256() +} + +func newShake128Generic() *state { + return &state{rate: rateK256, outputLen: 32, dsbyte: dsbyteShake} +} + +func newShake256Generic() *state { + return &state{rate: rateK512, outputLen: 64, dsbyte: dsbyteShake} } // NewCShake128 creates a new instance of cSHAKE128 variable-output-length ShakeHash, @@ -141,7 +162,7 @@ func NewCShake128(N, S []byte) ShakeHash { if len(N) == 0 && len(S) == 0 { return NewShake128() } - return newCShake(N, S, rate128, 32, dsbyteCShake) + return newCShake(N, S, rateK256, 32, dsbyteCShake) } // NewCShake256 creates a new instance of cSHAKE256 variable-output-length ShakeHash, @@ -154,7 +175,7 @@ func NewCShake256(N, S []byte) ShakeHash { if len(N) == 0 && len(S) == 0 { return NewShake256() } - return newCShake(N, S, rate256, 64, dsbyteCShake) + return newCShake(N, S, rateK512, 64, dsbyteCShake) } // ShakeSum128 writes an arbitrary-length digest of data into hash. diff --git a/vendor/golang.org/x/crypto/sha3/shake_generic.go b/vendor/golang.org/x/crypto/sha3/shake_generic.go deleted file mode 100644 index 8d31cf5..0000000 --- a/vendor/golang.org/x/crypto/sha3/shake_generic.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !gc || purego || !s390x - -package sha3 - -// newShake128Asm returns an assembly implementation of SHAKE-128 if available, -// otherwise it returns nil. -func newShake128Asm() ShakeHash { - return nil -} - -// newShake256Asm returns an assembly implementation of SHAKE-256 if available, -// otherwise it returns nil. -func newShake256Asm() ShakeHash { - return nil -} diff --git a/vendor/golang.org/x/crypto/sha3/shake_noasm.go b/vendor/golang.org/x/crypto/sha3/shake_noasm.go new file mode 100644 index 0000000..4276ba4 --- /dev/null +++ b/vendor/golang.org/x/crypto/sha3/shake_noasm.go @@ -0,0 +1,15 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !gc || purego || !s390x + +package sha3 + +func newShake128() *state { + return newShake128Generic() +} + +func newShake256() *state { + return newShake256Generic() +} diff --git a/vendor/golang.org/x/crypto/sha3/xor.go b/vendor/golang.org/x/crypto/sha3/xor.go deleted file mode 100644 index 7337cca..0000000 --- a/vendor/golang.org/x/crypto/sha3/xor.go +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (!amd64 && !386 && !ppc64le) || purego - -package sha3 - -// A storageBuf is an aligned array of maxRate bytes. -type storageBuf [maxRate]byte - -func (b *storageBuf) asBytes() *[maxRate]byte { - return (*[maxRate]byte)(b) -} - -var ( - xorIn = xorInGeneric - copyOut = copyOutGeneric - xorInUnaligned = xorInGeneric - copyOutUnaligned = copyOutGeneric -) - -const xorImplementationUnaligned = "generic" diff --git a/vendor/golang.org/x/crypto/sha3/xor_generic.go b/vendor/golang.org/x/crypto/sha3/xor_generic.go deleted file mode 100644 index 8d94771..0000000 --- a/vendor/golang.org/x/crypto/sha3/xor_generic.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package sha3 - -import "encoding/binary" - -// xorInGeneric xors the bytes in buf into the state; it -// makes no non-portable assumptions about memory layout -// or alignment. -func xorInGeneric(d *state, buf []byte) { - n := len(buf) / 8 - - for i := 0; i < n; i++ { - a := binary.LittleEndian.Uint64(buf) - d.a[i] ^= a - buf = buf[8:] - } -} - -// copyOutGeneric copies uint64s to a byte buffer. -func copyOutGeneric(d *state, b []byte) { - for i := 0; len(b) >= 8; i++ { - binary.LittleEndian.PutUint64(b, d.a[i]) - b = b[8:] - } -} diff --git a/vendor/golang.org/x/crypto/sha3/xor_unaligned.go b/vendor/golang.org/x/crypto/sha3/xor_unaligned.go deleted file mode 100644 index 870e2d1..0000000 --- a/vendor/golang.org/x/crypto/sha3/xor_unaligned.go +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build (amd64 || 386 || ppc64le) && !purego - -package sha3 - -import "unsafe" - -// A storageBuf is an aligned array of maxRate bytes. -type storageBuf [maxRate / 8]uint64 - -func (b *storageBuf) asBytes() *[maxRate]byte { - return (*[maxRate]byte)(unsafe.Pointer(b)) -} - -// xorInUnaligned uses unaligned reads and writes to update d.a to contain d.a -// XOR buf. -func xorInUnaligned(d *state, buf []byte) { - n := len(buf) - bw := (*[maxRate / 8]uint64)(unsafe.Pointer(&buf[0]))[: n/8 : n/8] - if n >= 72 { - d.a[0] ^= bw[0] - d.a[1] ^= bw[1] - d.a[2] ^= bw[2] - d.a[3] ^= bw[3] - d.a[4] ^= bw[4] - d.a[5] ^= bw[5] - d.a[6] ^= bw[6] - d.a[7] ^= bw[7] - d.a[8] ^= bw[8] - } - if n >= 104 { - d.a[9] ^= bw[9] - d.a[10] ^= bw[10] - d.a[11] ^= bw[11] - d.a[12] ^= bw[12] - } - if n >= 136 { - d.a[13] ^= bw[13] - d.a[14] ^= bw[14] - d.a[15] ^= bw[15] - d.a[16] ^= bw[16] - } - if n >= 144 { - d.a[17] ^= bw[17] - } - if n >= 168 { - d.a[18] ^= bw[18] - d.a[19] ^= bw[19] - d.a[20] ^= bw[20] - } -} - -func copyOutUnaligned(d *state, buf []byte) { - ab := (*[maxRate]uint8)(unsafe.Pointer(&d.a[0])) - copy(buf, ab[:]) -} - -var ( - xorIn = xorInUnaligned - copyOut = copyOutUnaligned -) - -const xorImplementationUnaligned = "unaligned" diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE index 6a66aea..2a7cf70 100644 --- a/vendor/golang.org/x/sys/LICENSE +++ b/vendor/golang.org/x/sys/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s new file mode 100644 index 0000000..ec2acfe --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s @@ -0,0 +1,17 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +#include "textflag.h" + +TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctl(SB) +GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) + +TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctlbyname(SB) +GLOBL ·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB) diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go index 4756ad5..02609d5 100644 --- a/vendor/golang.org/x/sys/cpu/cpu.go +++ b/vendor/golang.org/x/sys/cpu/cpu.go @@ -103,7 +103,10 @@ var ARM64 struct { HasASIMDDP bool // Advanced SIMD double precision instruction set HasSHA512 bool // SHA512 hardware implementation HasSVE bool // Scalable Vector Extensions + HasSVE2 bool // Scalable Vector Extensions 2 HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32 + HasDIT bool // Data Independent Timing support + HasI8MM bool // Advanced SIMD Int8 matrix multiplication instructions _ CacheLinePad } @@ -198,6 +201,25 @@ var S390X struct { _ CacheLinePad } +// RISCV64 contains the supported CPU features and performance characteristics for riscv64 +// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate +// the presence of RISC-V extensions. +// +// It is safe to assume that all the RV64G extensions are supported and so they are omitted from +// this structure. As riscv64 Go programs require at least RV64G, the code that populates +// this structure cannot run successfully if some of the RV64G extensions are missing. +// The struct is padded to avoid false sharing. +var RISCV64 struct { + _ CacheLinePad + HasFastMisaligned bool // Fast misaligned accesses + HasC bool // Compressed instruction-set extension + HasV bool // Vector extension compatible with RVV 1.0 + HasZba bool // Address generation instructions extension + HasZbb bool // Basic bit-manipulation extension + HasZbs bool // Single-bit instructions extension + _ CacheLinePad +} + func init() { archInit() initOptions() diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go index f3eb993..af2aa99 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go @@ -28,6 +28,7 @@ func initOptions() { {Name: "sm3", Feature: &ARM64.HasSM3}, {Name: "sm4", Feature: &ARM64.HasSM4}, {Name: "sve", Feature: &ARM64.HasSVE}, + {Name: "sve2", Feature: &ARM64.HasSVE2}, {Name: "crc32", Feature: &ARM64.HasCRC32}, {Name: "atomics", Feature: &ARM64.HasATOMICS}, {Name: "asimdhp", Feature: &ARM64.HasASIMDHP}, @@ -37,6 +38,8 @@ func initOptions() { {Name: "dcpop", Feature: &ARM64.HasDCPOP}, {Name: "asimddp", Feature: &ARM64.HasASIMDDP}, {Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM}, + {Name: "dit", Feature: &ARM64.HasDIT}, + {Name: "i8mm", Feature: &ARM64.HasI8MM}, } } @@ -144,6 +147,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { ARM64.HasLRCPC = true } + switch extractBits(isar1, 52, 55) { + case 1: + ARM64.HasI8MM = true + } + // ID_AA64PFR0_EL1 switch extractBits(pfr0, 16, 19) { case 0: @@ -164,6 +172,20 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { switch extractBits(pfr0, 32, 35) { case 1: ARM64.HasSVE = true + + parseARM64SVERegister(getzfr0()) + } + + switch extractBits(pfr0, 48, 51) { + case 1: + ARM64.HasDIT = true + } +} + +func parseARM64SVERegister(zfr0 uint64) { + switch extractBits(zfr0, 0, 3) { + case 1: + ARM64.HasSVE2 = true } } diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.s b/vendor/golang.org/x/sys/cpu/cpu_arm64.s index fcb9a38..22cc998 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_arm64.s +++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.s @@ -29,3 +29,11 @@ TEXT ·getpfr0(SB),NOSPLIT,$0-8 WORD $0xd5380400 MOVD R0, ret+0(FP) RET + +// func getzfr0() uint64 +TEXT ·getzfr0(SB),NOSPLIT,$0-8 + // get SVE Feature Register 0 into x0 + // mrs x0, ID_AA64ZFR0_EL1 = d5380480 + WORD $0xd5380480 + MOVD R0, ret+0(FP) + RET diff --git a/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go new file mode 100644 index 0000000..b838cb9 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +package cpu + +// darwinSupportsAVX512 checks Darwin kernel for AVX512 support via sysctl +// call (see issue 43089). It also restricts AVX512 support for Darwin to +// kernel version 21.3.0 (MacOS 12.2.0) or later (see issue 49233). +// +// Background: +// Darwin implements a special mechanism to economize on thread state when +// AVX512 specific registers are not in use. This scheme minimizes state when +// preempting threads that haven't yet used any AVX512 instructions, but adds +// special requirements to check for AVX512 hardware support at runtime (e.g. +// via sysctl call or commpage inspection). See issue 43089 and link below for +// full background: +// https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.1.10/osfmk/i386/fpu.c#L214-L240 +// +// Additionally, all versions of the Darwin kernel from 19.6.0 through 21.2.0 +// (corresponding to MacOS 10.15.6 - 12.1) have a bug that can cause corruption +// of the AVX512 mask registers (K0-K7) upon signal return. For this reason +// AVX512 is considered unsafe to use on Darwin for kernel versions prior to +// 21.3.0, where a fix has been confirmed. See issue 49233 for full background. +func darwinSupportsAVX512() bool { + return darwinSysctlEnabled([]byte("hw.optional.avx512f\x00")) && darwinKernelVersionCheck(21, 3, 0) +} + +// Ensure Darwin kernel version is at least major.minor.patch, avoiding dependencies +func darwinKernelVersionCheck(major, minor, patch int) bool { + var release [256]byte + err := darwinOSRelease(&release) + if err != nil { + return false + } + + var mmp [3]int + c := 0 +Loop: + for _, b := range release[:] { + switch { + case b >= '0' && b <= '9': + mmp[c] = 10*mmp[c] + int(b-'0') + case b == '.': + c++ + if c > 2 { + return false + } + case b == 0: + break Loop + default: + return false + } + } + if c != 2 { + return false + } + return mmp[0] > major || mmp[0] == major && (mmp[1] > minor || mmp[1] == minor && mmp[2] >= patch) +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go index a8acd3e..6ac6e1e 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go @@ -9,3 +9,4 @@ package cpu func getisar0() uint64 func getisar1() uint64 func getpfr0() uint64 +func getzfr0() uint64 diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go index 910728f..32a4451 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go @@ -6,10 +6,10 @@ package cpu -// cpuid is implemented in cpu_x86.s for gc compiler +// cpuid is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) -// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler +// xgetbv with ecx = 0 is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func xgetbv() (eax, edx uint32) diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s similarity index 94% rename from vendor/golang.org/x/sys/cpu/cpu_x86.s rename to vendor/golang.org/x/sys/cpu/cpu_gc_x86.s index 7d7ba33..ce208ce 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_x86.s +++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s @@ -18,7 +18,7 @@ TEXT ·cpuid(SB), NOSPLIT, $0-24 RET // func xgetbv() (eax, edx uint32) -TEXT ·xgetbv(SB),NOSPLIT,$0-8 +TEXT ·xgetbv(SB), NOSPLIT, $0-8 MOVL $0, CX XGETBV MOVL AX, eax+0(FP) diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go index 99c60fe..170d21d 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go @@ -23,9 +23,3 @@ func xgetbv() (eax, edx uint32) { gccgoXgetbv(&a, &d) return a, d } - -// gccgo doesn't build on Darwin, per: -// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76 -func darwinSupportsAVX512() bool { - return false -} diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go index a968b80..f1caf0f 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go @@ -35,6 +35,10 @@ const ( hwcap_SHA512 = 1 << 21 hwcap_SVE = 1 << 22 hwcap_ASIMDFHM = 1 << 23 + hwcap_DIT = 1 << 24 + + hwcap2_SVE2 = 1 << 1 + hwcap2_I8MM = 1 << 13 ) // linuxKernelCanEmulateCPUID reports whether we're running @@ -104,6 +108,11 @@ func doinit() { ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) ARM64.HasSVE = isSet(hwCap, hwcap_SVE) ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) + ARM64.HasDIT = isSet(hwCap, hwcap_DIT) + + // HWCAP2 feature bits + ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2) + ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM) } func isSet(hwc uint, value uint) bool { diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go index cd63e73..7d902b6 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x +//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64 package cpu diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go new file mode 100644 index 0000000..cb4a0c5 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go @@ -0,0 +1,137 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cpu + +import ( + "syscall" + "unsafe" +) + +// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe +// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available. +// +// A note on detection of the Vector extension using HWCAP. +// +// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. +// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe +// syscall is not available then neither is the Vector extension (which needs kernel support). +// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. +// However, some RISC-V board manufacturers ship boards with an older kernel on top of which +// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe +// patches. These kernels advertise support for the Vector extension using HWCAP. Falling +// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not +// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. +// +// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by +// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board +// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified +// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use +// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector +// extension are binary incompatible. HWCAP can then not be used in isolation to populate the +// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. +// +// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector +// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype +// register. This check would allow us to safely detect version 1.0 of the Vector extension +// with HWCAP, if riscv_hwprobe were not available. However, the check cannot +// be added until the assembler supports the Vector instructions. +// +// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the +// extensions it advertises support for are explicitly versioned. It's also worth noting that +// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba. +// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority +// of RISC-V extensions. +// +// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. + +// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must +// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall +// here. + +const ( + // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. + riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 + riscv_HWPROBE_IMA_C = 0x2 + riscv_HWPROBE_IMA_V = 0x4 + riscv_HWPROBE_EXT_ZBA = 0x8 + riscv_HWPROBE_EXT_ZBB = 0x10 + riscv_HWPROBE_EXT_ZBS = 0x20 + riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 + riscv_HWPROBE_MISALIGNED_FAST = 0x3 + riscv_HWPROBE_MISALIGNED_MASK = 0x7 +) + +const ( + // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. + sys_RISCV_HWPROBE = 258 +) + +// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. +type riscvHWProbePairs struct { + key int64 + value uint64 +} + +const ( + // CPU features + hwcap_RISCV_ISA_C = 1 << ('C' - 'A') +) + +func doinit() { + // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key + // field should be initialised with one of the key constants defined above, e.g., + // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. + // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. + + pairs := []riscvHWProbePairs{ + {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, + {riscv_HWPROBE_KEY_CPUPERF_0, 0}, + } + + // This call only indicates that extensions are supported if they are implemented on all cores. + if riscvHWProbe(pairs, 0) { + if pairs[0].key != -1 { + v := uint(pairs[0].value) + RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C) + RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) + RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA) + RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB) + RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS) + } + if pairs[1].key != -1 { + v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK + RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST + } + } + + // Let's double check with HWCAP if the C extension does not appear to be supported. + // This may happen if we're running on a kernel older than 6.4. + + if !RISCV64.HasC { + RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C) + } +} + +func isSet(hwc uint, value uint) bool { + return hwc&value != 0 +} + +// riscvHWProbe is a simplified version of the generated wrapper function found in +// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the +// cpuCount and cpus parameters which we do not need. We always want to pass 0 for +// these parameters here so the kernel only reports the extensions that are present +// on all cores. +func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { + var _zero uintptr + var p0 unsafe.Pointer + if len(pairs) > 0 { + p0 = unsafe.Pointer(&pairs[0]) + } else { + p0 = unsafe.Pointer(&_zero) + } + + _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0) + return e1 == 0 +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_x86.go b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go new file mode 100644 index 0000000..a0fd7e2 --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64p32 || (amd64 && (!darwin || !gc)) + +package cpu + +func darwinSupportsAVX512() bool { + panic("only implemented for gc && amd64 && darwin") +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go index 7f0c79c..aca3199 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go +++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go @@ -8,4 +8,13 @@ package cpu const cacheLineSize = 64 -func initOptions() {} +func initOptions() { + options = []option{ + {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, + {Name: "c", Feature: &RISCV64.HasC}, + {Name: "v", Feature: &RISCV64.HasV}, + {Name: "zba", Feature: &RISCV64.HasZba}, + {Name: "zbb", Feature: &RISCV64.HasZbb}, + {Name: "zbs", Feature: &RISCV64.HasZbs}, + } +} diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go index c29f5e4..600a680 100644 --- a/vendor/golang.org/x/sys/cpu/cpu_x86.go +++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go @@ -92,10 +92,8 @@ func archInit() { osSupportsAVX = isSet(1, eax) && isSet(2, eax) if runtime.GOOS == "darwin" { - // Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers. - // Since users can't rely on mask register contents, let's not advertise AVX-512 support. - // See issue 49233. - osSupportsAVX512 = false + // Darwin requires special AVX512 checks, see cpu_darwin_x86.go + osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512() } else { // Check if OPMASK and ZMM registers have OS support. osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax) diff --git a/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go new file mode 100644 index 0000000..4d0888b --- /dev/null +++ b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go @@ -0,0 +1,98 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Minimal copy of x/sys/unix so the cpu package can make a +// system call on Darwin without depending on x/sys/unix. + +//go:build darwin && amd64 && gc + +package cpu + +import ( + "syscall" + "unsafe" +) + +type _C_int int32 + +// adapted from unix.Uname() at x/sys/unix/syscall_darwin.go L419 +func darwinOSRelease(release *[256]byte) error { + // from x/sys/unix/zerrors_openbsd_amd64.go + const ( + CTL_KERN = 0x1 + KERN_OSRELEASE = 0x2 + ) + + mib := []_C_int{CTL_KERN, KERN_OSRELEASE} + n := unsafe.Sizeof(*release) + + return sysctl(mib, &release[0], &n, nil, 0) +} + +type Errno = syscall.Errno + +var _zero uintptr // Single-word zero for use when we need a valid pointer to 0 bytes. + +// from x/sys/unix/zsyscall_darwin_amd64.go L791-807 +func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + var _p0 unsafe.Pointer + if len(mib) > 0 { + _p0 = unsafe.Pointer(&mib[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + if _, _, err := syscall_syscall6( + libc_sysctl_trampoline_addr, + uintptr(_p0), + uintptr(len(mib)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + ); err != 0 { + return err + } + + return nil +} + +var libc_sysctl_trampoline_addr uintptr + +// adapted from internal/cpu/cpu_arm64_darwin.go +func darwinSysctlEnabled(name []byte) bool { + out := int32(0) + nout := unsafe.Sizeof(out) + if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil { + return false + } + return out > 0 +} + +//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib" + +var libc_sysctlbyname_trampoline_addr uintptr + +// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix +func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + if _, _, err := syscall_syscall6( + libc_sysctlbyname_trampoline_addr, + uintptr(unsafe.Pointer(name)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + 0, + ); err != 0 { + return err + } + + return nil +} + +//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib" + +// Implemented in the runtime package (runtime/sys_darwin.go) +func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) + +//go:linkname syscall_syscall6 syscall.syscall6 diff --git a/vendor/golang.org/x/text/LICENSE b/vendor/golang.org/x/text/LICENSE index 6a66aea..2a7cf70 100644 --- a/vendor/golang.org/x/text/LICENSE +++ b/vendor/golang.org/x/text/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2009 The Go Authors. All rights reserved. +Copyright 2009 The Go Authors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer. copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of Google Inc. nor the names of its + * Neither the name of Google LLC nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/vendor/modules.txt b/vendor/modules.txt index 6251b40..6f6be68 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -14,8 +14,8 @@ github.com/golang/protobuf/proto # github.com/leodido/go-urn v1.2.1 ## explicit; go 1.13 github.com/leodido/go-urn -# golang.org/x/crypto v0.21.0 -## explicit; go 1.18 +# golang.org/x/crypto v0.31.0 +## explicit; go 1.20 golang.org/x/crypto/sha3 # golang.org/x/net v0.23.0 ## explicit; go 1.18 @@ -25,10 +25,10 @@ golang.org/x/net/context golang.org/x/oauth2 golang.org/x/oauth2/clientcredentials golang.org/x/oauth2/internal -# golang.org/x/sys v0.18.0 +# golang.org/x/sys v0.28.0 ## explicit; go 1.18 golang.org/x/sys/cpu -# golang.org/x/text v0.14.0 +# golang.org/x/text v0.21.0 ## explicit; go 1.18 golang.org/x/text/internal/language golang.org/x/text/internal/language/compact