Browse Source

staging/skein: move all threefish block functions to one file

move all threefish block functions to one file, remove unneeded include

Signed-off-by: Jake Edge <jake@lwn.net>
Acked-by: Jason Cooper <jason@lakedaemon.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Jake Edge 11 years ago
parent
commit
0109ce7304

+ 1 - 3
drivers/staging/skein/Makefile

@@ -5,7 +5,5 @@ obj-$(CONFIG_CRYPTO_SKEIN) +=   skein.o \
 				skein_api.o \
 				skein_block.o
 
-obj-$(CONFIG_CRYPTO_THREEFISH) += threefish_1024_block.o \
-				  threefish_256_block.o \
-				  threefish_512_block.o \
+obj-$(CONFIG_CRYPTO_THREEFISH) += threefish_block.o \
 				  threefish_api.o

+ 0 - 1139
drivers/staging/skein/threefish_256_block.c

@@ -1,1139 +0,0 @@
-#include <linux/string.h>
-#include "threefish_api.h"
-
-
-void threefish_encrypt_256(struct threefish_key *key_ctx, u64 *input,
-			   u64 *output)
-{
-	u64 b0 = input[0], b1 = input[1],
-	    b2 = input[2], b3 = input[3];
-	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
-	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
-	    k4 = key_ctx->key[4];
-	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
-	    t2 = key_ctx->tweak[2];
-
-	b1 += k1 + t0;
-	b0 += b1 + k0;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k3;
-	b2 += b3 + k2 + t1;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k2 + t1;
-	b0 += b1 + k1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k4 + 1;
-	b2 += b3 + k3 + t2;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k3 + t2;
-	b0 += b1 + k2;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k0 + 2;
-	b2 += b3 + k4 + t0;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k4 + t0;
-	b0 += b1 + k3;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k1 + 3;
-	b2 += b3 + k0 + t1;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k0 + t1;
-	b0 += b1 + k4;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k2 + 4;
-	b2 += b3 + k1 + t2;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k1 + t2;
-	b0 += b1 + k0;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k3 + 5;
-	b2 += b3 + k2 + t0;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k2 + t0;
-	b0 += b1 + k1;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k4 + 6;
-	b2 += b3 + k3 + t1;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k3 + t1;
-	b0 += b1 + k2;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k0 + 7;
-	b2 += b3 + k4 + t2;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k4 + t2;
-	b0 += b1 + k3;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k1 + 8;
-	b2 += b3 + k0 + t0;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k0 + t0;
-	b0 += b1 + k4;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k2 + 9;
-	b2 += b3 + k1 + t1;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k1 + t1;
-	b0 += b1 + k0;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k3 + 10;
-	b2 += b3 + k2 + t2;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k2 + t2;
-	b0 += b1 + k1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k4 + 11;
-	b2 += b3 + k3 + t0;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k3 + t0;
-	b0 += b1 + k2;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k0 + 12;
-	b2 += b3 + k4 + t1;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k4 + t1;
-	b0 += b1 + k3;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k1 + 13;
-	b2 += b3 + k0 + t2;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k0 + t2;
-	b0 += b1 + k4;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k2 + 14;
-	b2 += b3 + k1 + t0;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k1 + t0;
-	b0 += b1 + k0;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k3 + 15;
-	b2 += b3 + k2 + t1;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-
-	b1 += k2 + t1;
-	b0 += b1 + k1;
-	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
-
-	b3 += k4 + 16;
-	b2 += b3 + k3 + t2;
-	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
-
-	b1 += k3 + t2;
-	b0 += b1 + k2;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
-
-	b3 += k0 + 17;
-	b2 += b3 + k4 + t0;
-	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
-
-	b0 += b1;
-	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
-
-	b2 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
-
-	b0 += b3;
-	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
-
-	b2 += b1;
-	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
-
-	output[0] = b0 + k3;
-	output[1] = b1 + k4 + t0;
-	output[2] = b2 + k0 + t1;
-	output[3] = b3 + k1 + 18;
-}
-
-void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
-			   u64 *output)
-{
-	u64 b0 = input[0], b1 = input[1],
-	    b2 = input[2], b3 = input[3];
-	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
-	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
-	    k4 = key_ctx->key[4];
-	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
-	    t2 = key_ctx->tweak[2];
-
-	u64 tmp;
-
-	b0 -= k3;
-	b1 -= k4 + t0;
-	b2 -= k0 + t1;
-	b3 -= k1 + 18;
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k2;
-	b1 -= k3 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k4 + t0;
-	b3 -= k0 + 17;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k1;
-	b1 -= k2 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k3 + t2;
-	b3 -= k4 + 16;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k0;
-	b1 -= k1 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k2 + t1;
-	b3 -= k3 + 15;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k4;
-	b1 -= k0 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k1 + t0;
-	b3 -= k2 + 14;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k3;
-	b1 -= k4 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k0 + t2;
-	b3 -= k1 + 13;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k2;
-	b1 -= k3 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k4 + t1;
-	b3 -= k0 + 12;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k1;
-	b1 -= k2 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k3 + t0;
-	b3 -= k4 + 11;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k0;
-	b1 -= k1 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k2 + t2;
-	b3 -= k3 + 10;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k4;
-	b1 -= k0 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k1 + t1;
-	b3 -= k2 + 9;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k3;
-	b1 -= k4 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k0 + t0;
-	b3 -= k1 + 8;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k2;
-	b1 -= k3 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k4 + t2;
-	b3 -= k0 + 7;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k1;
-	b1 -= k2 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k3 + t1;
-	b3 -= k4 + 6;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k0;
-	b1 -= k1 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k2 + t0;
-	b3 -= k3 + 5;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k4;
-	b1 -= k0 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k1 + t2;
-	b3 -= k2 + 4;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k3;
-	b1 -= k4 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k0 + t1;
-	b3 -= k1 + 3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k2;
-	b1 -= k3 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k4 + t0;
-	b3 -= k0 + 2;
-
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 32) | (tmp << (64 - 32));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 32) | (tmp << (64 - 32));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 58) | (tmp << (64 - 58));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 12) | (tmp << (64 - 12));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b0 -= b1 + k1;
-	b1 -= k2 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b3 + k3 + t2;
-	b3 -= k4 + 1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 5) | (tmp << (64 - 5));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 37) | (tmp << (64 - 37));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 23) | (tmp << (64 - 23));
-	b0 -= b1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 40) | (tmp << (64 - 40));
-	b2 -= b3;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 52) | (tmp << (64 - 52));
-	b0 -= b3;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 57) | (tmp << (64 - 57));
-	b2 -= b1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 14) | (tmp << (64 - 14));
-	b0 -= b1 + k0;
-	b1 -= k1 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 16) | (tmp << (64 - 16));
-	b2 -= b3 + k2 + t1;
-	b3 -= k3;
-
-	output[0] = b0;
-	output[1] = b1;
-	output[2] = b2;
-	output[3] = b3;
-}

+ 0 - 2225
drivers/staging/skein/threefish_512_block.c

@@ -1,2225 +0,0 @@
-#include <linux/string.h>
-#include "threefish_api.h"
-
-
-void threefish_encrypt_512(struct threefish_key *key_ctx, u64 *input,
-			   u64 *output)
-{
-	u64 b0 = input[0], b1 = input[1],
-	    b2 = input[2], b3 = input[3],
-	    b4 = input[4], b5 = input[5],
-	    b6 = input[6], b7 = input[7];
-	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
-	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
-	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
-	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
-	    k8 = key_ctx->key[8];
-	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
-	    t2 = key_ctx->tweak[2];
-
-	b1 += k1;
-	b0 += b1 + k0;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k3;
-	b2 += b3 + k2;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k5 + t0;
-	b4 += b5 + k4;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k7;
-	b6 += b7 + k6 + t1;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k2;
-	b0 += b1 + k1;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k4;
-	b2 += b3 + k3;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k6 + t1;
-	b4 += b5 + k5;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k8 + 1;
-	b6 += b7 + k7 + t2;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k3;
-	b0 += b1 + k2;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k5;
-	b2 += b3 + k4;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k7 + t2;
-	b4 += b5 + k6;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k0 + 2;
-	b6 += b7 + k8 + t0;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k4;
-	b0 += b1 + k3;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k6;
-	b2 += b3 + k5;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k8 + t0;
-	b4 += b5 + k7;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k1 + 3;
-	b6 += b7 + k0 + t1;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k5;
-	b0 += b1 + k4;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k7;
-	b2 += b3 + k6;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k0 + t1;
-	b4 += b5 + k8;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k2 + 4;
-	b6 += b7 + k1 + t2;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k6;
-	b0 += b1 + k5;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k8;
-	b2 += b3 + k7;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k1 + t2;
-	b4 += b5 + k0;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k3 + 5;
-	b6 += b7 + k2 + t0;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k7;
-	b0 += b1 + k6;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k0;
-	b2 += b3 + k8;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k2 + t0;
-	b4 += b5 + k1;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k4 + 6;
-	b6 += b7 + k3 + t1;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k8;
-	b0 += b1 + k7;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k1;
-	b2 += b3 + k0;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k3 + t1;
-	b4 += b5 + k2;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k5 + 7;
-	b6 += b7 + k4 + t2;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k0;
-	b0 += b1 + k8;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k2;
-	b2 += b3 + k1;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k4 + t2;
-	b4 += b5 + k3;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k6 + 8;
-	b6 += b7 + k5 + t0;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k1;
-	b0 += b1 + k0;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k3;
-	b2 += b3 + k2;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k5 + t0;
-	b4 += b5 + k4;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k7 + 9;
-	b6 += b7 + k6 + t1;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k2;
-	b0 += b1 + k1;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k4;
-	b2 += b3 + k3;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k6 + t1;
-	b4 += b5 + k5;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k8 + 10;
-	b6 += b7 + k7 + t2;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k3;
-	b0 += b1 + k2;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k5;
-	b2 += b3 + k4;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k7 + t2;
-	b4 += b5 + k6;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k0 + 11;
-	b6 += b7 + k8 + t0;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k4;
-	b0 += b1 + k3;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k6;
-	b2 += b3 + k5;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k8 + t0;
-	b4 += b5 + k7;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k1 + 12;
-	b6 += b7 + k0 + t1;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k5;
-	b0 += b1 + k4;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k7;
-	b2 += b3 + k6;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k0 + t1;
-	b4 += b5 + k8;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k2 + 13;
-	b6 += b7 + k1 + t2;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k6;
-	b0 += b1 + k5;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k8;
-	b2 += b3 + k7;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k1 + t2;
-	b4 += b5 + k0;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k3 + 14;
-	b6 += b7 + k2 + t0;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k7;
-	b0 += b1 + k6;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k0;
-	b2 += b3 + k8;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k2 + t0;
-	b4 += b5 + k1;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k4 + 15;
-	b6 += b7 + k3 + t1;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	b1 += k8;
-	b0 += b1 + k7;
-	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
-
-	b3 += k1;
-	b2 += b3 + k0;
-	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
-
-	b5 += k3 + t1;
-	b4 += b5 + k2;
-	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
-
-	b7 += k5 + 16;
-	b6 += b7 + k4 + t2;
-	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
-
-	b1 += k0;
-	b0 += b1 + k8;
-	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
-
-	b3 += k2;
-	b2 += b3 + k1;
-	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
-
-	b5 += k4 + t2;
-	b4 += b5 + k3;
-	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
-
-	b7 += k6 + 17;
-	b6 += b7 + k5 + t0;
-	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
-
-	b2 += b1;
-	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
-
-	b4 += b7;
-	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
-
-	b6 += b5;
-	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
-
-	b0 += b3;
-	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
-
-	b4 += b1;
-	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
-
-	b6 += b3;
-	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
-
-	b0 += b5;
-	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
-
-	b2 += b7;
-	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
-
-	b6 += b1;
-	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
-
-	b0 += b7;
-	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
-
-	b2 += b5;
-	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
-
-	b4 += b3;
-	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
-
-	output[0] = b0 + k0;
-	output[1] = b1 + k1;
-	output[2] = b2 + k2;
-	output[3] = b3 + k3;
-	output[4] = b4 + k4;
-	output[5] = b5 + k5 + t0;
-	output[6] = b6 + k6 + t1;
-	output[7] = b7 + k7 + 18;
-}
-
-void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
-			   u64 *output)
-{
-	u64 b0 = input[0], b1 = input[1],
-	    b2 = input[2], b3 = input[3],
-	    b4 = input[4], b5 = input[5],
-	    b6 = input[6], b7 = input[7];
-	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
-	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
-	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
-	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
-	    k8 = key_ctx->key[8];
-	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
-	    t2 = key_ctx->tweak[2];
-
-	u64 tmp;
-
-	b0 -= k0;
-	b1 -= k1;
-	b2 -= k2;
-	b3 -= k3;
-	b4 -= k4;
-	b5 -= k5 + t0;
-	b6 -= k6 + t1;
-	b7 -= k7 + 18;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k5 + t0;
-	b7 -= k6 + 17;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k3;
-	b5 -= k4 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k1;
-	b3 -= k2;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k8;
-	b1 -= k0;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k4 + t2;
-	b7 -= k5 + 16;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k2;
-	b5 -= k3 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k0;
-	b3 -= k1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k7;
-	b1 -= k8;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k3 + t1;
-	b7 -= k4 + 15;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k1;
-	b5 -= k2 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k8;
-	b3 -= k0;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k6;
-	b1 -= k7;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k2 + t0;
-	b7 -= k3 + 14;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k0;
-	b5 -= k1 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k7;
-	b3 -= k8;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k5;
-	b1 -= k6;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k1 + t2;
-	b7 -= k2 + 13;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k8;
-	b5 -= k0 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k6;
-	b3 -= k7;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k4;
-	b1 -= k5;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k0 + t1;
-	b7 -= k1 + 12;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k7;
-	b5 -= k8 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k5;
-	b3 -= k6;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k3;
-	b1 -= k4;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k8 + t0;
-	b7 -= k0 + 11;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k6;
-	b5 -= k7 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k4;
-	b3 -= k5;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k2;
-	b1 -= k3;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k7 + t2;
-	b7 -= k8 + 10;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k5;
-	b5 -= k6 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k3;
-	b3 -= k4;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k1;
-	b1 -= k2;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k6 + t1;
-	b7 -= k7 + 9;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k4;
-	b5 -= k5 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k2;
-	b3 -= k3;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k0;
-	b1 -= k1;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k5 + t0;
-	b7 -= k6 + 8;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k3;
-	b5 -= k4 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k1;
-	b3 -= k2;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k8;
-	b1 -= k0;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k4 + t2;
-	b7 -= k5 + 7;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k2;
-	b5 -= k3 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k0;
-	b3 -= k1;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k7;
-	b1 -= k8;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k3 + t1;
-	b7 -= k4 + 6;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k1;
-	b5 -= k2 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k8;
-	b3 -= k0;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k6;
-	b1 -= k7;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k2 + t0;
-	b7 -= k3 + 5;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k0;
-	b5 -= k1 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k7;
-	b3 -= k8;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k5;
-	b1 -= k6;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k1 + t2;
-	b7 -= k2 + 4;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k8;
-	b5 -= k0 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k6;
-	b3 -= k7;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k4;
-	b1 -= k5;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k0 + t1;
-	b7 -= k1 + 3;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k7;
-	b5 -= k8 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k5;
-	b3 -= k6;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k3;
-	b1 -= k4;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k8 + t0;
-	b7 -= k0 + 2;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k6;
-	b5 -= k7 + t2;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k4;
-	b3 -= k5;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k2;
-	b1 -= k3;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 22) | (tmp << (64 - 22));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 56) | (tmp << (64 - 56));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 35) | (tmp << (64 - 35));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 8) | (tmp << (64 - 8));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 43) | (tmp << (64 - 43));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 29) | (tmp << (64 - 29));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 25) | (tmp << (64 - 25));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 17) | (tmp << (64 - 17));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 10) | (tmp << (64 - 10));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 50) | (tmp << (64 - 50));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 13) | (tmp << (64 - 13));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 24) | (tmp << (64 - 24));
-	b6 -= b7 + k7 + t2;
-	b7 -= k8 + 1;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 34) | (tmp << (64 - 34));
-	b4 -= b5 + k5;
-	b5 -= k6 + t1;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 30) | (tmp << (64 - 30));
-	b2 -= b3 + k3;
-	b3 -= k4;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 39) | (tmp << (64 - 39));
-	b0 -= b1 + k1;
-	b1 -= k2;
-
-	tmp = b3 ^ b4;
-	b3 = (tmp >> 56) | (tmp << (64 - 56));
-	b4 -= b3;
-
-	tmp = b5 ^ b2;
-	b5 = (tmp >> 54) | (tmp << (64 - 54));
-	b2 -= b5;
-
-	tmp = b7 ^ b0;
-	b7 = (tmp >> 9) | (tmp << (64 - 9));
-	b0 -= b7;
-
-	tmp = b1 ^ b6;
-	b1 = (tmp >> 44) | (tmp << (64 - 44));
-	b6 -= b1;
-
-	tmp = b7 ^ b2;
-	b7 = (tmp >> 39) | (tmp << (64 - 39));
-	b2 -= b7;
-
-	tmp = b5 ^ b0;
-	b5 = (tmp >> 36) | (tmp << (64 - 36));
-	b0 -= b5;
-
-	tmp = b3 ^ b6;
-	b3 = (tmp >> 49) | (tmp << (64 - 49));
-	b6 -= b3;
-
-	tmp = b1 ^ b4;
-	b1 = (tmp >> 17) | (tmp << (64 - 17));
-	b4 -= b1;
-
-	tmp = b3 ^ b0;
-	b3 = (tmp >> 42) | (tmp << (64 - 42));
-	b0 -= b3;
-
-	tmp = b5 ^ b6;
-	b5 = (tmp >> 14) | (tmp << (64 - 14));
-	b6 -= b5;
-
-	tmp = b7 ^ b4;
-	b7 = (tmp >> 27) | (tmp << (64 - 27));
-	b4 -= b7;
-
-	tmp = b1 ^ b2;
-	b1 = (tmp >> 33) | (tmp << (64 - 33));
-	b2 -= b1;
-
-	tmp = b7 ^ b6;
-	b7 = (tmp >> 37) | (tmp << (64 - 37));
-	b6 -= b7 + k6 + t1;
-	b7 -= k7;
-
-	tmp = b5 ^ b4;
-	b5 = (tmp >> 19) | (tmp << (64 - 19));
-	b4 -= b5 + k4;
-	b5 -= k5 + t0;
-
-	tmp = b3 ^ b2;
-	b3 = (tmp >> 36) | (tmp << (64 - 36));
-	b2 -= b3 + k2;
-	b3 -= k3;
-
-	tmp = b1 ^ b0;
-	b1 = (tmp >> 46) | (tmp << (64 - 46));
-	b0 -= b1 + k0;
-	b1 -= k1;
-
-	output[0] = b0;
-	output[1] = b1;
-	output[2] = b2;
-	output[3] = b3;
-
-	output[7] = b7;
-	output[6] = b6;
-	output[5] = b5;
-	output[4] = b4;
-}

+ 3357 - 1
drivers/staging/skein/threefish_1024_block.c → drivers/staging/skein/threefish_block.c

@@ -1,6 +1,3362 @@
-#include <linux/string.h>
 #include "threefish_api.h"
 
+void threefish_encrypt_256(struct threefish_key *key_ctx, u64 *input,
+			   u64 *output)
+{
+	u64 b0 = input[0], b1 = input[1],
+	    b2 = input[2], b3 = input[3];
+	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
+	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
+	    k4 = key_ctx->key[4];
+	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
+	    t2 = key_ctx->tweak[2];
+
+	b1 += k1 + t0;
+	b0 += b1 + k0;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k3;
+	b2 += b3 + k2 + t1;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k2 + t1;
+	b0 += b1 + k1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k4 + 1;
+	b2 += b3 + k3 + t2;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k3 + t2;
+	b0 += b1 + k2;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k0 + 2;
+	b2 += b3 + k4 + t0;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k4 + t0;
+	b0 += b1 + k3;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k1 + 3;
+	b2 += b3 + k0 + t1;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k0 + t1;
+	b0 += b1 + k4;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k2 + 4;
+	b2 += b3 + k1 + t2;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k1 + t2;
+	b0 += b1 + k0;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k3 + 5;
+	b2 += b3 + k2 + t0;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k2 + t0;
+	b0 += b1 + k1;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k4 + 6;
+	b2 += b3 + k3 + t1;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k3 + t1;
+	b0 += b1 + k2;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k0 + 7;
+	b2 += b3 + k4 + t2;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k4 + t2;
+	b0 += b1 + k3;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k1 + 8;
+	b2 += b3 + k0 + t0;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k0 + t0;
+	b0 += b1 + k4;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k2 + 9;
+	b2 += b3 + k1 + t1;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k1 + t1;
+	b0 += b1 + k0;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k3 + 10;
+	b2 += b3 + k2 + t2;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k2 + t2;
+	b0 += b1 + k1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k4 + 11;
+	b2 += b3 + k3 + t0;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k3 + t0;
+	b0 += b1 + k2;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k0 + 12;
+	b2 += b3 + k4 + t1;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k4 + t1;
+	b0 += b1 + k3;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k1 + 13;
+	b2 += b3 + k0 + t2;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k0 + t2;
+	b0 += b1 + k4;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k2 + 14;
+	b2 += b3 + k1 + t0;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k1 + t0;
+	b0 += b1 + k0;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k3 + 15;
+	b2 += b3 + k2 + t1;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+
+	b1 += k2 + t1;
+	b0 += b1 + k1;
+	b1 = ((b1 << 14) | (b1 >> (64 - 14))) ^ b0;
+
+	b3 += k4 + 16;
+	b2 += b3 + k3 + t2;
+	b3 = ((b3 << 16) | (b3 >> (64 - 16))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 52) | (b3 >> (64 - 52))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 57) | (b1 >> (64 - 57))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 23) | (b1 >> (64 - 23))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 40) | (b3 >> (64 - 40))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 5) | (b3 >> (64 - 5))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 37) | (b1 >> (64 - 37))) ^ b2;
+
+	b1 += k3 + t2;
+	b0 += b1 + k2;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b0;
+
+	b3 += k0 + 17;
+	b2 += b3 + k4 + t0;
+	b3 = ((b3 << 33) | (b3 >> (64 - 33))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 46) | (b3 >> (64 - 46))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 12) | (b1 >> (64 - 12))) ^ b2;
+
+	b0 += b1;
+	b1 = ((b1 << 58) | (b1 >> (64 - 58))) ^ b0;
+
+	b2 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b2;
+
+	b0 += b3;
+	b3 = ((b3 << 32) | (b3 >> (64 - 32))) ^ b0;
+
+	b2 += b1;
+	b1 = ((b1 << 32) | (b1 >> (64 - 32))) ^ b2;
+
+	output[0] = b0 + k3;
+	output[1] = b1 + k4 + t0;
+	output[2] = b2 + k0 + t1;
+	output[3] = b3 + k1 + 18;
+}
+
+void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
+			   u64 *output)
+{
+	u64 b0 = input[0], b1 = input[1],
+	    b2 = input[2], b3 = input[3];
+	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
+	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
+	    k4 = key_ctx->key[4];
+	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
+	    t2 = key_ctx->tweak[2];
+
+	u64 tmp;
+
+	b0 -= k3;
+	b1 -= k4 + t0;
+	b2 -= k0 + t1;
+	b3 -= k1 + 18;
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k2;
+	b1 -= k3 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k4 + t0;
+	b3 -= k0 + 17;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k1;
+	b1 -= k2 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k3 + t2;
+	b3 -= k4 + 16;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k0;
+	b1 -= k1 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k2 + t1;
+	b3 -= k3 + 15;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k4;
+	b1 -= k0 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k1 + t0;
+	b3 -= k2 + 14;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k3;
+	b1 -= k4 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k0 + t2;
+	b3 -= k1 + 13;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k2;
+	b1 -= k3 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k4 + t1;
+	b3 -= k0 + 12;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k1;
+	b1 -= k2 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k3 + t0;
+	b3 -= k4 + 11;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k0;
+	b1 -= k1 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k2 + t2;
+	b3 -= k3 + 10;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k4;
+	b1 -= k0 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k1 + t1;
+	b3 -= k2 + 9;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k3;
+	b1 -= k4 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k0 + t0;
+	b3 -= k1 + 8;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k2;
+	b1 -= k3 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k4 + t2;
+	b3 -= k0 + 7;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k1;
+	b1 -= k2 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k3 + t1;
+	b3 -= k4 + 6;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k0;
+	b1 -= k1 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k2 + t0;
+	b3 -= k3 + 5;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k4;
+	b1 -= k0 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k1 + t2;
+	b3 -= k2 + 4;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k3;
+	b1 -= k4 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k0 + t1;
+	b3 -= k1 + 3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k2;
+	b1 -= k3 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k4 + t0;
+	b3 -= k0 + 2;
+
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 32) | (tmp << (64 - 32));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 32) | (tmp << (64 - 32));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 58) | (tmp << (64 - 58));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 12) | (tmp << (64 - 12));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b0 -= b1 + k1;
+	b1 -= k2 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b3 + k3 + t2;
+	b3 -= k4 + 1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 5) | (tmp << (64 - 5));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 37) | (tmp << (64 - 37));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 23) | (tmp << (64 - 23));
+	b0 -= b1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 40) | (tmp << (64 - 40));
+	b2 -= b3;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 52) | (tmp << (64 - 52));
+	b0 -= b3;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 57) | (tmp << (64 - 57));
+	b2 -= b1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 14) | (tmp << (64 - 14));
+	b0 -= b1 + k0;
+	b1 -= k1 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 16) | (tmp << (64 - 16));
+	b2 -= b3 + k2 + t1;
+	b3 -= k3;
+
+	output[0] = b0;
+	output[1] = b1;
+	output[2] = b2;
+	output[3] = b3;
+}
+
+void threefish_encrypt_512(struct threefish_key *key_ctx, u64 *input,
+			   u64 *output)
+{
+	u64 b0 = input[0], b1 = input[1],
+	    b2 = input[2], b3 = input[3],
+	    b4 = input[4], b5 = input[5],
+	    b6 = input[6], b7 = input[7];
+	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
+	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
+	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
+	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
+	    k8 = key_ctx->key[8];
+	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
+	    t2 = key_ctx->tweak[2];
+
+	b1 += k1;
+	b0 += b1 + k0;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k3;
+	b2 += b3 + k2;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k5 + t0;
+	b4 += b5 + k4;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k7;
+	b6 += b7 + k6 + t1;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k2;
+	b0 += b1 + k1;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k4;
+	b2 += b3 + k3;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k6 + t1;
+	b4 += b5 + k5;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k8 + 1;
+	b6 += b7 + k7 + t2;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k3;
+	b0 += b1 + k2;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k5;
+	b2 += b3 + k4;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k7 + t2;
+	b4 += b5 + k6;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k0 + 2;
+	b6 += b7 + k8 + t0;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k4;
+	b0 += b1 + k3;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k6;
+	b2 += b3 + k5;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k8 + t0;
+	b4 += b5 + k7;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k1 + 3;
+	b6 += b7 + k0 + t1;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k5;
+	b0 += b1 + k4;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k7;
+	b2 += b3 + k6;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k0 + t1;
+	b4 += b5 + k8;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k2 + 4;
+	b6 += b7 + k1 + t2;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k6;
+	b0 += b1 + k5;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k8;
+	b2 += b3 + k7;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k1 + t2;
+	b4 += b5 + k0;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k3 + 5;
+	b6 += b7 + k2 + t0;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k7;
+	b0 += b1 + k6;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k0;
+	b2 += b3 + k8;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k2 + t0;
+	b4 += b5 + k1;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k4 + 6;
+	b6 += b7 + k3 + t1;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k8;
+	b0 += b1 + k7;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k1;
+	b2 += b3 + k0;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k3 + t1;
+	b4 += b5 + k2;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k5 + 7;
+	b6 += b7 + k4 + t2;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k0;
+	b0 += b1 + k8;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k2;
+	b2 += b3 + k1;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k4 + t2;
+	b4 += b5 + k3;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k6 + 8;
+	b6 += b7 + k5 + t0;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k1;
+	b0 += b1 + k0;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k3;
+	b2 += b3 + k2;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k5 + t0;
+	b4 += b5 + k4;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k7 + 9;
+	b6 += b7 + k6 + t1;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k2;
+	b0 += b1 + k1;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k4;
+	b2 += b3 + k3;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k6 + t1;
+	b4 += b5 + k5;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k8 + 10;
+	b6 += b7 + k7 + t2;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k3;
+	b0 += b1 + k2;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k5;
+	b2 += b3 + k4;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k7 + t2;
+	b4 += b5 + k6;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k0 + 11;
+	b6 += b7 + k8 + t0;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k4;
+	b0 += b1 + k3;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k6;
+	b2 += b3 + k5;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k8 + t0;
+	b4 += b5 + k7;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k1 + 12;
+	b6 += b7 + k0 + t1;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k5;
+	b0 += b1 + k4;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k7;
+	b2 += b3 + k6;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k0 + t1;
+	b4 += b5 + k8;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k2 + 13;
+	b6 += b7 + k1 + t2;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k6;
+	b0 += b1 + k5;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k8;
+	b2 += b3 + k7;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k1 + t2;
+	b4 += b5 + k0;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k3 + 14;
+	b6 += b7 + k2 + t0;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k7;
+	b0 += b1 + k6;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k0;
+	b2 += b3 + k8;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k2 + t0;
+	b4 += b5 + k1;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k4 + 15;
+	b6 += b7 + k3 + t1;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	b1 += k8;
+	b0 += b1 + k7;
+	b1 = ((b1 << 46) | (b1 >> (64 - 46))) ^ b0;
+
+	b3 += k1;
+	b2 += b3 + k0;
+	b3 = ((b3 << 36) | (b3 >> (64 - 36))) ^ b2;
+
+	b5 += k3 + t1;
+	b4 += b5 + k2;
+	b5 = ((b5 << 19) | (b5 >> (64 - 19))) ^ b4;
+
+	b7 += k5 + 16;
+	b6 += b7 + k4 + t2;
+	b7 = ((b7 << 37) | (b7 >> (64 - 37))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 33) | (b1 >> (64 - 33))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 27) | (b7 >> (64 - 27))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 14) | (b5 >> (64 - 14))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 42) | (b3 >> (64 - 42))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 17) | (b1 >> (64 - 17))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 49) | (b3 >> (64 - 49))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 36) | (b5 >> (64 - 36))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 39) | (b7 >> (64 - 39))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 44) | (b1 >> (64 - 44))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 9) | (b7 >> (64 - 9))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 54) | (b5 >> (64 - 54))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 56) | (b3 >> (64 - 56))) ^ b4;
+
+	b1 += k0;
+	b0 += b1 + k8;
+	b1 = ((b1 << 39) | (b1 >> (64 - 39))) ^ b0;
+
+	b3 += k2;
+	b2 += b3 + k1;
+	b3 = ((b3 << 30) | (b3 >> (64 - 30))) ^ b2;
+
+	b5 += k4 + t2;
+	b4 += b5 + k3;
+	b5 = ((b5 << 34) | (b5 >> (64 - 34))) ^ b4;
+
+	b7 += k6 + 17;
+	b6 += b7 + k5 + t0;
+	b7 = ((b7 << 24) | (b7 >> (64 - 24))) ^ b6;
+
+	b2 += b1;
+	b1 = ((b1 << 13) | (b1 >> (64 - 13))) ^ b2;
+
+	b4 += b7;
+	b7 = ((b7 << 50) | (b7 >> (64 - 50))) ^ b4;
+
+	b6 += b5;
+	b5 = ((b5 << 10) | (b5 >> (64 - 10))) ^ b6;
+
+	b0 += b3;
+	b3 = ((b3 << 17) | (b3 >> (64 - 17))) ^ b0;
+
+	b4 += b1;
+	b1 = ((b1 << 25) | (b1 >> (64 - 25))) ^ b4;
+
+	b6 += b3;
+	b3 = ((b3 << 29) | (b3 >> (64 - 29))) ^ b6;
+
+	b0 += b5;
+	b5 = ((b5 << 39) | (b5 >> (64 - 39))) ^ b0;
+
+	b2 += b7;
+	b7 = ((b7 << 43) | (b7 >> (64 - 43))) ^ b2;
+
+	b6 += b1;
+	b1 = ((b1 << 8) | (b1 >> (64 - 8))) ^ b6;
+
+	b0 += b7;
+	b7 = ((b7 << 35) | (b7 >> (64 - 35))) ^ b0;
+
+	b2 += b5;
+	b5 = ((b5 << 56) | (b5 >> (64 - 56))) ^ b2;
+
+	b4 += b3;
+	b3 = ((b3 << 22) | (b3 >> (64 - 22))) ^ b4;
+
+	output[0] = b0 + k0;
+	output[1] = b1 + k1;
+	output[2] = b2 + k2;
+	output[3] = b3 + k3;
+	output[4] = b4 + k4;
+	output[5] = b5 + k5 + t0;
+	output[6] = b6 + k6 + t1;
+	output[7] = b7 + k7 + 18;
+}
+
+void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
+			   u64 *output)
+{
+	u64 b0 = input[0], b1 = input[1],
+	    b2 = input[2], b3 = input[3],
+	    b4 = input[4], b5 = input[5],
+	    b6 = input[6], b7 = input[7];
+	u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
+	    k2 = key_ctx->key[2], k3 = key_ctx->key[3],
+	    k4 = key_ctx->key[4], k5 = key_ctx->key[5],
+	    k6 = key_ctx->key[6], k7 = key_ctx->key[7],
+	    k8 = key_ctx->key[8];
+	u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
+	    t2 = key_ctx->tweak[2];
+
+	u64 tmp;
+
+	b0 -= k0;
+	b1 -= k1;
+	b2 -= k2;
+	b3 -= k3;
+	b4 -= k4;
+	b5 -= k5 + t0;
+	b6 -= k6 + t1;
+	b7 -= k7 + 18;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k5 + t0;
+	b7 -= k6 + 17;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k3;
+	b5 -= k4 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k1;
+	b3 -= k2;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k8;
+	b1 -= k0;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k4 + t2;
+	b7 -= k5 + 16;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k2;
+	b5 -= k3 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k0;
+	b3 -= k1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k7;
+	b1 -= k8;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k3 + t1;
+	b7 -= k4 + 15;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k1;
+	b5 -= k2 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k8;
+	b3 -= k0;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k6;
+	b1 -= k7;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k2 + t0;
+	b7 -= k3 + 14;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k0;
+	b5 -= k1 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k7;
+	b3 -= k8;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k5;
+	b1 -= k6;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k1 + t2;
+	b7 -= k2 + 13;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k8;
+	b5 -= k0 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k6;
+	b3 -= k7;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k4;
+	b1 -= k5;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k0 + t1;
+	b7 -= k1 + 12;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k7;
+	b5 -= k8 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k5;
+	b3 -= k6;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k3;
+	b1 -= k4;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k8 + t0;
+	b7 -= k0 + 11;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k6;
+	b5 -= k7 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k4;
+	b3 -= k5;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k2;
+	b1 -= k3;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k7 + t2;
+	b7 -= k8 + 10;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k5;
+	b5 -= k6 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k3;
+	b3 -= k4;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k1;
+	b1 -= k2;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k6 + t1;
+	b7 -= k7 + 9;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k4;
+	b5 -= k5 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k2;
+	b3 -= k3;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k0;
+	b1 -= k1;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k5 + t0;
+	b7 -= k6 + 8;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k3;
+	b5 -= k4 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k1;
+	b3 -= k2;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k8;
+	b1 -= k0;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k4 + t2;
+	b7 -= k5 + 7;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k2;
+	b5 -= k3 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k0;
+	b3 -= k1;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k7;
+	b1 -= k8;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k3 + t1;
+	b7 -= k4 + 6;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k1;
+	b5 -= k2 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k8;
+	b3 -= k0;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k6;
+	b1 -= k7;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k2 + t0;
+	b7 -= k3 + 5;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k0;
+	b5 -= k1 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k7;
+	b3 -= k8;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k5;
+	b1 -= k6;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k1 + t2;
+	b7 -= k2 + 4;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k8;
+	b5 -= k0 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k6;
+	b3 -= k7;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k4;
+	b1 -= k5;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k0 + t1;
+	b7 -= k1 + 3;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k7;
+	b5 -= k8 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k5;
+	b3 -= k6;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k3;
+	b1 -= k4;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k8 + t0;
+	b7 -= k0 + 2;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k6;
+	b5 -= k7 + t2;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k4;
+	b3 -= k5;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k2;
+	b1 -= k3;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 22) | (tmp << (64 - 22));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 56) | (tmp << (64 - 56));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 35) | (tmp << (64 - 35));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 8) | (tmp << (64 - 8));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 43) | (tmp << (64 - 43));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 29) | (tmp << (64 - 29));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 25) | (tmp << (64 - 25));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 17) | (tmp << (64 - 17));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 10) | (tmp << (64 - 10));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 50) | (tmp << (64 - 50));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 13) | (tmp << (64 - 13));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 24) | (tmp << (64 - 24));
+	b6 -= b7 + k7 + t2;
+	b7 -= k8 + 1;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 34) | (tmp << (64 - 34));
+	b4 -= b5 + k5;
+	b5 -= k6 + t1;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 30) | (tmp << (64 - 30));
+	b2 -= b3 + k3;
+	b3 -= k4;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 39) | (tmp << (64 - 39));
+	b0 -= b1 + k1;
+	b1 -= k2;
+
+	tmp = b3 ^ b4;
+	b3 = (tmp >> 56) | (tmp << (64 - 56));
+	b4 -= b3;
+
+	tmp = b5 ^ b2;
+	b5 = (tmp >> 54) | (tmp << (64 - 54));
+	b2 -= b5;
+
+	tmp = b7 ^ b0;
+	b7 = (tmp >> 9) | (tmp << (64 - 9));
+	b0 -= b7;
+
+	tmp = b1 ^ b6;
+	b1 = (tmp >> 44) | (tmp << (64 - 44));
+	b6 -= b1;
+
+	tmp = b7 ^ b2;
+	b7 = (tmp >> 39) | (tmp << (64 - 39));
+	b2 -= b7;
+
+	tmp = b5 ^ b0;
+	b5 = (tmp >> 36) | (tmp << (64 - 36));
+	b0 -= b5;
+
+	tmp = b3 ^ b6;
+	b3 = (tmp >> 49) | (tmp << (64 - 49));
+	b6 -= b3;
+
+	tmp = b1 ^ b4;
+	b1 = (tmp >> 17) | (tmp << (64 - 17));
+	b4 -= b1;
+
+	tmp = b3 ^ b0;
+	b3 = (tmp >> 42) | (tmp << (64 - 42));
+	b0 -= b3;
+
+	tmp = b5 ^ b6;
+	b5 = (tmp >> 14) | (tmp << (64 - 14));
+	b6 -= b5;
+
+	tmp = b7 ^ b4;
+	b7 = (tmp >> 27) | (tmp << (64 - 27));
+	b4 -= b7;
+
+	tmp = b1 ^ b2;
+	b1 = (tmp >> 33) | (tmp << (64 - 33));
+	b2 -= b1;
+
+	tmp = b7 ^ b6;
+	b7 = (tmp >> 37) | (tmp << (64 - 37));
+	b6 -= b7 + k6 + t1;
+	b7 -= k7;
+
+	tmp = b5 ^ b4;
+	b5 = (tmp >> 19) | (tmp << (64 - 19));
+	b4 -= b5 + k4;
+	b5 -= k5 + t0;
+
+	tmp = b3 ^ b2;
+	b3 = (tmp >> 36) | (tmp << (64 - 36));
+	b2 -= b3 + k2;
+	b3 -= k3;
+
+	tmp = b1 ^ b0;
+	b1 = (tmp >> 46) | (tmp << (64 - 46));
+	b0 -= b1 + k0;
+	b1 -= k1;
+
+	output[0] = b0;
+	output[1] = b1;
+	output[2] = b2;
+	output[3] = b3;
+
+	output[7] = b7;
+	output[6] = b6;
+	output[5] = b5;
+	output[4] = b4;
+}
 
 void threefish_encrypt_1024(struct threefish_key *key_ctx, u64 *input,
 			    u64 *output)