/*
 * Copyright (c) Paul Stankovski
 * Free for all non-commercial use unless this directive conflicts with
 * other applicable copyright statement(s), patent holders, laws or such.
 */
#include "black_box_trivium.h"
#include "assert_utils.h"
#include <stdio.h>
#ifdef _DEBUG
#include <mem.h>
#include "rdtsc.h"
#include "bitslice_utils.h"
#endif

#define SHIFT(REG, steps) ((REG##1 << (64 - (steps))) | (REG##2 >> (steps)))
#define MUL(REG) (SHIFT(REG, 1) & SHIFT(REG, 2))

#define S66 SHIFT(A, 93 - 66)
#define S69 SHIFT(A, 93 - 69)
#define S93 A2

#define S162 SHIFT(B, 177 - 162)
#define S171 SHIFT(B, 177 - 171)
#define S177 B2

#define S243 SHIFT(C, 288 - 243)
#define S264 SHIFT(C, 288 - 264)
#define S288 C2

#define R \
    t1  = S93;    t2  = S177;   t3  = S243; \
    t1 ^= S66;    t2 ^= S162;   t3 ^= S288; \
    OUT(*out++ op t1 ^ t2 ^ t3;) \
    t1 ^= MUL(A); t2 ^= MUL(B); t3 ^= MUL(C); \
    t1 ^= S171;   t2 ^= S264;   t3 ^= S69; \
    A2 = (t3 << 29) | A1; A1 = t3 >> (64 - 29); \
    B2 = (t1 << 20) | B1; B1 = t1 >> (64 - 20); \
    C2 = (t2 << 47) | C1; C1 = t2 >> (64 - 47);

#define ROUND R
#define ROUNDS R R R R R R R R R R R R R R R R R R

#define INIT_REGISTERS \
  t2 = *((UINT64*)(key)); \
  t1 = key[8] | (key[9] << 8); \
  \
  A1 = (t1 << 13) | (t2 >> (64 - 13)); \
  A2 = t2 << 13; \
  \
  t2 = *((UINT64*)(iv)); \
  t1 = iv[8] | (iv[9] << 8); \
  \
  B1 = (t1 << 4) | (t2 >> (64 - 4)); \
  B2 = t2 << 4; \
  \
  C1 = 0; \
  C2 = 0x0000000000000007;

#define DECLARE_VARIABLES(buf) \
  UINT64 A1, A2, B1, B2, C1, C2; /* state */ \
  UINT64 t1, t2, t3; /* temporary update variables */ \
  UINT64 *out = (UINT64*)buf; \
  int i, numOutputBlocks = numOutputBytes / 8;

#define VERIFY_INPUT_VALUES \
  if (numOutputBytes == 0) return 0; \
  if ((numOutputBytes & 7) != 0) return -1;


void getRoundIndices(int round, int subRound, int *p, int *q, int *r, int *s, int *t) {
  int a, b, c, d, e;

  ASSERT(round >=0 && round < 4*288, "Invalid round!");
  ASSERT(subRound >=0 && subRound < 3, "Invalid subround!");

  if (subRound == 0) {
    a = 93; b = 66; c = 171; d = 91; e = 92;
  } else if (subRound == 1) {
    a = 177; b = 162; c = 264; d = 175; e = 176;
  } else if (subRound == 2) {
    a = 288; b = 243; c = 69; d = 286; e = 287;
  }

  *p = (4*288 - round + (a) - 1) % 288;
  *q = (4*288 - round + (b) - 1) % 288;
  *r = (4*288 - round + (c) - 1) % 288;
  *s = (4*288 - round + (d) - 1) % 288;
  *t = (4*288 - round + (e) - 1) % 288;
  ASSERT(0 <= *p && *p < 288, "Invalid p index!\n");
  ASSERT(0 <= *q && *q < 288, "Invalid q index!\n");
  ASSERT(0 <= *r && *r < 288, "Invalid r index!\n");
  ASSERT(0 <= *s && *s < 288, "Invalid s index!\n");
  ASSERT(0 <= *t && *t < 288, "Invalid t index!\n");
}

/******************************************************************************
 *
 * Little endian implementation of Trivium
 *
 * A, B and C represent the three registers containing bits in ranges
 *
 * A:   S1 -  S93
 * B:  S94 - S177
 * C: S178 - S288
 *
 * A1 contains bits S1-S29 in the low end (bit S29 is the lsb of A1), and
 * A2 contains bits S30 - S93 (bit S30 @ msb, S93 @ lsb), and so on.
 *
 * Key and IV are loaded according to the updated specification:
 * key80 -> S1, ..., key1 -> S80, 0 -> S81, ..., 0 -> S93
 * iv80 -> S94, ..., iv1 -> S173, 0 -> S174, ..., 0 -> S177
 * 0 -> S178, ..., 0 -> S285, 1 -> S286, 1 -> S287, 1 -> S288
 *
 * Verified against a handful of test vectors
 *
 ******************************************************************************/
int trivium(const BYTE *key, const BYTE *iv, BYTE *buf, unsigned int numOutputBytes) {
  DECLARE_VARIABLES(buf)
  VERIFY_INPUT_VALUES
  INIT_REGISTERS
#define OUT(x)
#define op =
  ROUNDS
  for (i=0; i<numOutputBlocks; i++) {
#undef OUT
#define OUT(x) x
    ROUND
  }
  return 0;
}

int trivium_withInitOutput(const BYTE *key, const BYTE *iv, BYTE *buf, unsigned int numOutputBytes) {
  DECLARE_VARIABLES(buf)
  VERIFY_INPUT_VALUES
  INIT_REGISTERS
  for (i=0; i<numOutputBlocks; i++) {
    ROUND
  }
  return 0;
}

int trivium_xor(const BYTE *key, const BYTE *iv, BYTE *buf, unsigned int numOutputBytes) {
  DECLARE_VARIABLES(buf)
  VERIFY_INPUT_VALUES
  INIT_REGISTERS
#undef OUT
#define OUT(x)
  ROUNDS
  for (i=0; i<numOutputBlocks; i++) {
#undef OUT
#define OUT(x) x
#undef op
#define op ^=
    ROUND
  }
  return 0;
}

int trivium_xor_withInitOutput(const BYTE *key, const BYTE *iv, BYTE *buf, unsigned int numOutputBytes) {
  DECLARE_VARIABLES(buf)
  VERIFY_INPUT_VALUES
  INIT_REGISTERS
  for (i=0; i<numOutputBlocks; i++) {
    ROUND
  }
  return 0;
}



int trivium_encrypt_xor(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  DECLARE_VARIABLES(outBuf)
  VERIFY_INPUT_VALUES
  if (numInputBytes < numOutputBytes) return -1;
  INIT_REGISTERS
#undef OUT
#define OUT(x)
  ROUNDS
  for (i=0; i<numOutputBlocks; i++) {
#undef OUT
#define OUT(x) x
#undef op
#define op ^= *inBuf++ ^
    ROUND
  }
  return 0;
}

int trivium_encrypt_xor_withInitOutput(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  DECLARE_VARIABLES(outBuf)
  VERIFY_INPUT_VALUES
  if (numInputBytes + 18 * 8 < numOutputBytes) return -1;
  INIT_REGISTERS
#undef op
#define op ^=
  ROUNDS
  for (i=0; i<numOutputBlocks-18; i++) {
#undef op
#define op ^= *inBuf++ ^
    ROUND
  }
  return 0;
}

#undef S66
#undef S69
#undef S93
#undef S162
#undef S171
#undef S177
#undef S243
#undef S264
#undef S288
#undef op
#undef OUT
#undef R
#undef DECLARE_VARIABLES
#undef VERIFY_INPUT_VALUES
#undef INIT_REGISTERS


#define PRINT_ROUND \
    int p[3], q[3], r[3], s[3], t[3]; \
    getRoundIndices(i, 0, &p[0], &q[0], &r[0], &s[0], &t[0]); \
    getRoundIndices(i, 1, &p[1], &q[1], &r[1], &s[1], &t[1]); \
    getRoundIndices(i, 2, &p[2], &q[2], &r[2], &s[2], &t[2]); \
    printf("  S%d ^= S%d; S%d ^= S%d; S%d ^= S%d;", p[0]+1, q[0]+1, p[1]+1, q[1]+1, p[2]+1, q[2]+1); \
    printf(" OUT(*out++ op S%d ^ S%d ^ S%d;)", p[0]+1, p[1]+1, p[2]+1); \
    printf(" S%d ^= (S%d & S%d) ^ S%d;", p[0]+1, s[0]+1, t[0]+1, r[0]+1); \
    printf(" S%d ^= (S%d & S%d) ^ S%d;", p[1]+1, s[1]+1, t[1]+1, r[1]+1); \
    printf(" S%d ^= (S%d & S%d) ^ S%d; %s\n", p[2]+1, s[2]+1, t[2]+1, r[2]+1, (i+1) % 72 != 0 ? "\\" : "");

#ifdef _DEBUG
void generateBitslicedTriviumCode(void) {
  int i;

  printf("#define C1 \\\n");       for (i=  0; i< 72; i++) { PRINT_ROUND }
  printf("\n#define C2 \\\n");     for (i= 72; i<144; i++) { PRINT_ROUND }
  printf("\n#define C3 \\\n");     for (i=144; i<216; i++) { PRINT_ROUND }
  printf("\n#define C4 \\\n");     for (i=216; i<288; i++) { PRINT_ROUND }
  printf("\n\n#define OUT(x)\n");
  printf("#define op =\n");
  printf("#define FULL_CYCLE_NO_OUT C1 C2 C3 C4\n\n");
  printf("#undef OUT\n");
  printf("#define OUT(x) x\n");
  printf("#define FULL_CYCLE_OUT C1 C2 C3 C4\n\n");
  printf("#undef op\n");
  printf("#define op ^=\n");
  printf("#define FULL_CYCLE_XOR C1 C2 C3 C4\n\n");

  printf("void triviumBitsliced(const UINT64 *key, const UINT64 *iv, UINT64 *out) {\n  UINT64\n  ");
  for (i=1; i<288; i++) {
    printf("%sS%d, ", (i < 10 ? "  " : (i < 100 ? " " : "")), i);
    if (i % 10 == 0)
      printf("\n    ");
  }
  printf("S288;\n\n  /* A */\n  ");
  for (i=80; i>0; i--)
    printf("%sS%d = *key%s;%s", i < 10 ? " " : "", i, i > 1 ? "++" : "", (80-i+1) % 10 == 0 ? "\n  " : " ");
  for (i=81; i<94; i++)
    printf("S%d = 0; ", i);
  printf("\n\n  /* B */\n  ");
  for (i=173; i>93; i--)
    printf("%sS%d = *iv%s;%s", i < 100 ? " " : "", i, i > 94 ? "++" : "", (173-i+1) % 10 == 0 ? "\n  " : " ");
  for (i=174; i<178; i++)
    printf("S%d = 0; ", i);

    printf("\n\n  /* C */%73s", "");
  for (i=178; i<286; i++) printf("S%d = 0;%s", i, (i+1) % 10 == 0 ? "\n  " : " ");
  printf("\n  S288 = S287 = S286 = 0xFFFFFFFFFFFFFFFF;\n\n", i);

  printf("  /* initialization rounds */\n");
  printf("  FULL_CYCLE_OUT\n");
  printf("  FULL_CYCLE_OUT\n");
  printf("  FULL_CYCLE_OUT\n");
  printf("  FULL_CYCLE_OUT\n\n");

  printf("  /* keystream rounds */\n");
  printf("  *out++ op S93 ^ S177 ^ S243 ^ S66 ^ S162 ^ S288;\n");
  printf("  *out++ op S92 ^ S176 ^ S242 ^ S65 ^ S161 ^ S287;\n");
  printf("  *out++ op S91 ^ S175 ^ S241 ^ S64 ^ S160 ^ S286;\n");
  printf("  *out++ op S90 ^ S174 ^ S240 ^ S63 ^ S159 ^ S285;\n");
  printf("  *out++ op S89 ^ S173 ^ S239 ^ S62 ^ S158 ^ S284;\n");
  printf("  *out++ op S88 ^ S172 ^ S238 ^ S61 ^ S157 ^ S283;\n");
  printf("  *out++ op S87 ^ S171 ^ S237 ^ S60 ^ S156 ^ S282;\n");
  printf("  *out   op S86 ^ S170 ^ S236 ^ S59 ^ S155 ^ S281;\n");
  printf("}\n");
}
#endif

#define C1 \
  S93 ^= S66; S177 ^= S162; S288 ^= S243; OUT(*out++ op S93 ^ S177 ^ S288;) S93 ^= (S91 & S92) ^ S171; S177 ^= (S175 & S176) ^ S264; S288 ^= (S286 & S287) ^ S69; \
  S92 ^= S65; S176 ^= S161; S287 ^= S242; OUT(*out++ op S92 ^ S176 ^ S287;) S92 ^= (S90 & S91) ^ S170; S176 ^= (S174 & S175) ^ S263; S287 ^= (S285 & S286) ^ S68; \
  S91 ^= S64; S175 ^= S160; S286 ^= S241; OUT(*out++ op S91 ^ S175 ^ S286;) S91 ^= (S89 & S90) ^ S169; S175 ^= (S173 & S174) ^ S262; S286 ^= (S284 & S285) ^ S67; \
  S90 ^= S63; S174 ^= S159; S285 ^= S240; OUT(*out++ op S90 ^ S174 ^ S285;) S90 ^= (S88 & S89) ^ S168; S174 ^= (S172 & S173) ^ S261; S285 ^= (S283 & S284) ^ S66; \
  S89 ^= S62; S173 ^= S158; S284 ^= S239; OUT(*out++ op S89 ^ S173 ^ S284;) S89 ^= (S87 & S88) ^ S167; S173 ^= (S171 & S172) ^ S260; S284 ^= (S282 & S283) ^ S65; \
  S88 ^= S61; S172 ^= S157; S283 ^= S238; OUT(*out++ op S88 ^ S172 ^ S283;) S88 ^= (S86 & S87) ^ S166; S172 ^= (S170 & S171) ^ S259; S283 ^= (S281 & S282) ^ S64; \
  S87 ^= S60; S171 ^= S156; S282 ^= S237; OUT(*out++ op S87 ^ S171 ^ S282;) S87 ^= (S85 & S86) ^ S165; S171 ^= (S169 & S170) ^ S258; S282 ^= (S280 & S281) ^ S63; \
  S86 ^= S59; S170 ^= S155; S281 ^= S236; OUT(*out++ op S86 ^ S170 ^ S281;) S86 ^= (S84 & S85) ^ S164; S170 ^= (S168 & S169) ^ S257; S281 ^= (S279 & S280) ^ S62; \
  S85 ^= S58; S169 ^= S154; S280 ^= S235; OUT(*out++ op S85 ^ S169 ^ S280;) S85 ^= (S83 & S84) ^ S163; S169 ^= (S167 & S168) ^ S256; S280 ^= (S278 & S279) ^ S61; \
  S84 ^= S57; S168 ^= S153; S279 ^= S234; OUT(*out++ op S84 ^ S168 ^ S279;) S84 ^= (S82 & S83) ^ S162; S168 ^= (S166 & S167) ^ S255; S279 ^= (S277 & S278) ^ S60; \
  S83 ^= S56; S167 ^= S152; S278 ^= S233; OUT(*out++ op S83 ^ S167 ^ S278;) S83 ^= (S81 & S82) ^ S161; S167 ^= (S165 & S166) ^ S254; S278 ^= (S276 & S277) ^ S59; \
  S82 ^= S55; S166 ^= S151; S277 ^= S232; OUT(*out++ op S82 ^ S166 ^ S277;) S82 ^= (S80 & S81) ^ S160; S166 ^= (S164 & S165) ^ S253; S277 ^= (S275 & S276) ^ S58; \
  S81 ^= S54; S165 ^= S150; S276 ^= S231; OUT(*out++ op S81 ^ S165 ^ S276;) S81 ^= (S79 & S80) ^ S159; S165 ^= (S163 & S164) ^ S252; S276 ^= (S274 & S275) ^ S57; \
  S80 ^= S53; S164 ^= S149; S275 ^= S230; OUT(*out++ op S80 ^ S164 ^ S275;) S80 ^= (S78 & S79) ^ S158; S164 ^= (S162 & S163) ^ S251; S275 ^= (S273 & S274) ^ S56; \
  S79 ^= S52; S163 ^= S148; S274 ^= S229; OUT(*out++ op S79 ^ S163 ^ S274;) S79 ^= (S77 & S78) ^ S157; S163 ^= (S161 & S162) ^ S250; S274 ^= (S272 & S273) ^ S55; \
  S78 ^= S51; S162 ^= S147; S273 ^= S228; OUT(*out++ op S78 ^ S162 ^ S273;) S78 ^= (S76 & S77) ^ S156; S162 ^= (S160 & S161) ^ S249; S273 ^= (S271 & S272) ^ S54; \
  S77 ^= S50; S161 ^= S146; S272 ^= S227; OUT(*out++ op S77 ^ S161 ^ S272;) S77 ^= (S75 & S76) ^ S155; S161 ^= (S159 & S160) ^ S248; S272 ^= (S270 & S271) ^ S53; \
  S76 ^= S49; S160 ^= S145; S271 ^= S226; OUT(*out++ op S76 ^ S160 ^ S271;) S76 ^= (S74 & S75) ^ S154; S160 ^= (S158 & S159) ^ S247; S271 ^= (S269 & S270) ^ S52; \
  S75 ^= S48; S159 ^= S144; S270 ^= S225; OUT(*out++ op S75 ^ S159 ^ S270;) S75 ^= (S73 & S74) ^ S153; S159 ^= (S157 & S158) ^ S246; S270 ^= (S268 & S269) ^ S51; \
  S74 ^= S47; S158 ^= S143; S269 ^= S224; OUT(*out++ op S74 ^ S158 ^ S269;) S74 ^= (S72 & S73) ^ S152; S158 ^= (S156 & S157) ^ S245; S269 ^= (S267 & S268) ^ S50; \
  S73 ^= S46; S157 ^= S142; S268 ^= S223; OUT(*out++ op S73 ^ S157 ^ S268;) S73 ^= (S71 & S72) ^ S151; S157 ^= (S155 & S156) ^ S244; S268 ^= (S266 & S267) ^ S49; \
  S72 ^= S45; S156 ^= S141; S267 ^= S222; OUT(*out++ op S72 ^ S156 ^ S267;) S72 ^= (S70 & S71) ^ S150; S156 ^= (S154 & S155) ^ S243; S267 ^= (S265 & S266) ^ S48; \
  S71 ^= S44; S155 ^= S140; S266 ^= S221; OUT(*out++ op S71 ^ S155 ^ S266;) S71 ^= (S69 & S70) ^ S149; S155 ^= (S153 & S154) ^ S242; S266 ^= (S264 & S265) ^ S47; \
  S70 ^= S43; S154 ^= S139; S265 ^= S220; OUT(*out++ op S70 ^ S154 ^ S265;) S70 ^= (S68 & S69) ^ S148; S154 ^= (S152 & S153) ^ S241; S265 ^= (S263 & S264) ^ S46; \
  S69 ^= S42; S153 ^= S138; S264 ^= S219; OUT(*out++ op S69 ^ S153 ^ S264;) S69 ^= (S67 & S68) ^ S147; S153 ^= (S151 & S152) ^ S240; S264 ^= (S262 & S263) ^ S45; \
  S68 ^= S41; S152 ^= S137; S263 ^= S218; OUT(*out++ op S68 ^ S152 ^ S263;) S68 ^= (S66 & S67) ^ S146; S152 ^= (S150 & S151) ^ S239; S263 ^= (S261 & S262) ^ S44; \
  S67 ^= S40; S151 ^= S136; S262 ^= S217; OUT(*out++ op S67 ^ S151 ^ S262;) S67 ^= (S65 & S66) ^ S145; S151 ^= (S149 & S150) ^ S238; S262 ^= (S260 & S261) ^ S43; \
  S66 ^= S39; S150 ^= S135; S261 ^= S216; OUT(*out++ op S66 ^ S150 ^ S261;) S66 ^= (S64 & S65) ^ S144; S150 ^= (S148 & S149) ^ S237; S261 ^= (S259 & S260) ^ S42; \
  S65 ^= S38; S149 ^= S134; S260 ^= S215; OUT(*out++ op S65 ^ S149 ^ S260;) S65 ^= (S63 & S64) ^ S143; S149 ^= (S147 & S148) ^ S236; S260 ^= (S258 & S259) ^ S41; \
  S64 ^= S37; S148 ^= S133; S259 ^= S214; OUT(*out++ op S64 ^ S148 ^ S259;) S64 ^= (S62 & S63) ^ S142; S148 ^= (S146 & S147) ^ S235; S259 ^= (S257 & S258) ^ S40; \
  S63 ^= S36; S147 ^= S132; S258 ^= S213; OUT(*out++ op S63 ^ S147 ^ S258;) S63 ^= (S61 & S62) ^ S141; S147 ^= (S145 & S146) ^ S234; S258 ^= (S256 & S257) ^ S39; \
  S62 ^= S35; S146 ^= S131; S257 ^= S212; OUT(*out++ op S62 ^ S146 ^ S257;) S62 ^= (S60 & S61) ^ S140; S146 ^= (S144 & S145) ^ S233; S257 ^= (S255 & S256) ^ S38; \
  S61 ^= S34; S145 ^= S130; S256 ^= S211; OUT(*out++ op S61 ^ S145 ^ S256;) S61 ^= (S59 & S60) ^ S139; S145 ^= (S143 & S144) ^ S232; S256 ^= (S254 & S255) ^ S37; \
  S60 ^= S33; S144 ^= S129; S255 ^= S210; OUT(*out++ op S60 ^ S144 ^ S255;) S60 ^= (S58 & S59) ^ S138; S144 ^= (S142 & S143) ^ S231; S255 ^= (S253 & S254) ^ S36; \
  S59 ^= S32; S143 ^= S128; S254 ^= S209; OUT(*out++ op S59 ^ S143 ^ S254;) S59 ^= (S57 & S58) ^ S137; S143 ^= (S141 & S142) ^ S230; S254 ^= (S252 & S253) ^ S35; \
  S58 ^= S31; S142 ^= S127; S253 ^= S208; OUT(*out++ op S58 ^ S142 ^ S253;) S58 ^= (S56 & S57) ^ S136; S142 ^= (S140 & S141) ^ S229; S253 ^= (S251 & S252) ^ S34; \
  S57 ^= S30; S141 ^= S126; S252 ^= S207; OUT(*out++ op S57 ^ S141 ^ S252;) S57 ^= (S55 & S56) ^ S135; S141 ^= (S139 & S140) ^ S228; S252 ^= (S250 & S251) ^ S33; \
  S56 ^= S29; S140 ^= S125; S251 ^= S206; OUT(*out++ op S56 ^ S140 ^ S251;) S56 ^= (S54 & S55) ^ S134; S140 ^= (S138 & S139) ^ S227; S251 ^= (S249 & S250) ^ S32; \
  S55 ^= S28; S139 ^= S124; S250 ^= S205; OUT(*out++ op S55 ^ S139 ^ S250;) S55 ^= (S53 & S54) ^ S133; S139 ^= (S137 & S138) ^ S226; S250 ^= (S248 & S249) ^ S31; \
  S54 ^= S27; S138 ^= S123; S249 ^= S204; OUT(*out++ op S54 ^ S138 ^ S249;) S54 ^= (S52 & S53) ^ S132; S138 ^= (S136 & S137) ^ S225; S249 ^= (S247 & S248) ^ S30; \
  S53 ^= S26; S137 ^= S122; S248 ^= S203; OUT(*out++ op S53 ^ S137 ^ S248;) S53 ^= (S51 & S52) ^ S131; S137 ^= (S135 & S136) ^ S224; S248 ^= (S246 & S247) ^ S29; \
  S52 ^= S25; S136 ^= S121; S247 ^= S202; OUT(*out++ op S52 ^ S136 ^ S247;) S52 ^= (S50 & S51) ^ S130; S136 ^= (S134 & S135) ^ S223; S247 ^= (S245 & S246) ^ S28; \
  S51 ^= S24; S135 ^= S120; S246 ^= S201; OUT(*out++ op S51 ^ S135 ^ S246;) S51 ^= (S49 & S50) ^ S129; S135 ^= (S133 & S134) ^ S222; S246 ^= (S244 & S245) ^ S27; \
  S50 ^= S23; S134 ^= S119; S245 ^= S200; OUT(*out++ op S50 ^ S134 ^ S245;) S50 ^= (S48 & S49) ^ S128; S134 ^= (S132 & S133) ^ S221; S245 ^= (S243 & S244) ^ S26; \
  S49 ^= S22; S133 ^= S118; S244 ^= S199; OUT(*out++ op S49 ^ S133 ^ S244;) S49 ^= (S47 & S48) ^ S127; S133 ^= (S131 & S132) ^ S220; S244 ^= (S242 & S243) ^ S25; \
  S48 ^= S21; S132 ^= S117; S243 ^= S198; OUT(*out++ op S48 ^ S132 ^ S243;) S48 ^= (S46 & S47) ^ S126; S132 ^= (S130 & S131) ^ S219; S243 ^= (S241 & S242) ^ S24; \
  S47 ^= S20; S131 ^= S116; S242 ^= S197; OUT(*out++ op S47 ^ S131 ^ S242;) S47 ^= (S45 & S46) ^ S125; S131 ^= (S129 & S130) ^ S218; S242 ^= (S240 & S241) ^ S23; \
  S46 ^= S19; S130 ^= S115; S241 ^= S196; OUT(*out++ op S46 ^ S130 ^ S241;) S46 ^= (S44 & S45) ^ S124; S130 ^= (S128 & S129) ^ S217; S241 ^= (S239 & S240) ^ S22; \
  S45 ^= S18; S129 ^= S114; S240 ^= S195; OUT(*out++ op S45 ^ S129 ^ S240;) S45 ^= (S43 & S44) ^ S123; S129 ^= (S127 & S128) ^ S216; S240 ^= (S238 & S239) ^ S21; \
  S44 ^= S17; S128 ^= S113; S239 ^= S194; OUT(*out++ op S44 ^ S128 ^ S239;) S44 ^= (S42 & S43) ^ S122; S128 ^= (S126 & S127) ^ S215; S239 ^= (S237 & S238) ^ S20; \
  S43 ^= S16; S127 ^= S112; S238 ^= S193; OUT(*out++ op S43 ^ S127 ^ S238;) S43 ^= (S41 & S42) ^ S121; S127 ^= (S125 & S126) ^ S214; S238 ^= (S236 & S237) ^ S19; \
  S42 ^= S15; S126 ^= S111; S237 ^= S192; OUT(*out++ op S42 ^ S126 ^ S237;) S42 ^= (S40 & S41) ^ S120; S126 ^= (S124 & S125) ^ S213; S237 ^= (S235 & S236) ^ S18; \
  S41 ^= S14; S125 ^= S110; S236 ^= S191; OUT(*out++ op S41 ^ S125 ^ S236;) S41 ^= (S39 & S40) ^ S119; S125 ^= (S123 & S124) ^ S212; S236 ^= (S234 & S235) ^ S17; \
  S40 ^= S13; S124 ^= S109; S235 ^= S190; OUT(*out++ op S40 ^ S124 ^ S235;) S40 ^= (S38 & S39) ^ S118; S124 ^= (S122 & S123) ^ S211; S235 ^= (S233 & S234) ^ S16; \
  S39 ^= S12; S123 ^= S108; S234 ^= S189; OUT(*out++ op S39 ^ S123 ^ S234;) S39 ^= (S37 & S38) ^ S117; S123 ^= (S121 & S122) ^ S210; S234 ^= (S232 & S233) ^ S15; \
  S38 ^= S11; S122 ^= S107; S233 ^= S188; OUT(*out++ op S38 ^ S122 ^ S233;) S38 ^= (S36 & S37) ^ S116; S122 ^= (S120 & S121) ^ S209; S233 ^= (S231 & S232) ^ S14; \
  S37 ^= S10; S121 ^= S106; S232 ^= S187; OUT(*out++ op S37 ^ S121 ^ S232;) S37 ^= (S35 & S36) ^ S115; S121 ^= (S119 & S120) ^ S208; S232 ^= (S230 & S231) ^ S13; \
  S36 ^= S9; S120 ^= S105; S231 ^= S186; OUT(*out++ op S36 ^ S120 ^ S231;) S36 ^= (S34 & S35) ^ S114; S120 ^= (S118 & S119) ^ S207; S231 ^= (S229 & S230) ^ S12; \
  S35 ^= S8; S119 ^= S104; S230 ^= S185; OUT(*out++ op S35 ^ S119 ^ S230;) S35 ^= (S33 & S34) ^ S113; S119 ^= (S117 & S118) ^ S206; S230 ^= (S228 & S229) ^ S11; \
  S34 ^= S7; S118 ^= S103; S229 ^= S184; OUT(*out++ op S34 ^ S118 ^ S229;) S34 ^= (S32 & S33) ^ S112; S118 ^= (S116 & S117) ^ S205; S229 ^= (S227 & S228) ^ S10; \
  S33 ^= S6; S117 ^= S102; S228 ^= S183; OUT(*out++ op S33 ^ S117 ^ S228;) S33 ^= (S31 & S32) ^ S111; S117 ^= (S115 & S116) ^ S204; S228 ^= (S226 & S227) ^ S9; \
  S32 ^= S5; S116 ^= S101; S227 ^= S182; OUT(*out++ op S32 ^ S116 ^ S227;) S32 ^= (S30 & S31) ^ S110; S116 ^= (S114 & S115) ^ S203; S227 ^= (S225 & S226) ^ S8; \
  S31 ^= S4; S115 ^= S100; S226 ^= S181; OUT(*out++ op S31 ^ S115 ^ S226;) S31 ^= (S29 & S30) ^ S109; S115 ^= (S113 & S114) ^ S202; S226 ^= (S224 & S225) ^ S7; \
  S30 ^= S3; S114 ^= S99; S225 ^= S180; OUT(*out++ op S30 ^ S114 ^ S225;) S30 ^= (S28 & S29) ^ S108; S114 ^= (S112 & S113) ^ S201; S225 ^= (S223 & S224) ^ S6; \
  S29 ^= S2; S113 ^= S98; S224 ^= S179; OUT(*out++ op S29 ^ S113 ^ S224;) S29 ^= (S27 & S28) ^ S107; S113 ^= (S111 & S112) ^ S200; S224 ^= (S222 & S223) ^ S5; \
  S28 ^= S1; S112 ^= S97; S223 ^= S178; OUT(*out++ op S28 ^ S112 ^ S223;) S28 ^= (S26 & S27) ^ S106; S112 ^= (S110 & S111) ^ S199; S223 ^= (S221 & S222) ^ S4; \
  S27 ^= S288; S111 ^= S96; S222 ^= S177; OUT(*out++ op S27 ^ S111 ^ S222;) S27 ^= (S25 & S26) ^ S105; S111 ^= (S109 & S110) ^ S198; S222 ^= (S220 & S221) ^ S3; \
  S26 ^= S287; S110 ^= S95; S221 ^= S176; OUT(*out++ op S26 ^ S110 ^ S221;) S26 ^= (S24 & S25) ^ S104; S110 ^= (S108 & S109) ^ S197; S221 ^= (S219 & S220) ^ S2; \
  S25 ^= S286; S109 ^= S94; S220 ^= S175; OUT(*out++ op S25 ^ S109 ^ S220;) S25 ^= (S23 & S24) ^ S103; S109 ^= (S107 & S108) ^ S196; S220 ^= (S218 & S219) ^ S1; \
  S24 ^= S285; S108 ^= S93; S219 ^= S174; OUT(*out++ op S24 ^ S108 ^ S219;) S24 ^= (S22 & S23) ^ S102; S108 ^= (S106 & S107) ^ S195; S219 ^= (S217 & S218) ^ S288; \
  S23 ^= S284; S107 ^= S92; S218 ^= S173; OUT(*out++ op S23 ^ S107 ^ S218;) S23 ^= (S21 & S22) ^ S101; S107 ^= (S105 & S106) ^ S194; S218 ^= (S216 & S217) ^ S287; \
  S22 ^= S283; S106 ^= S91; S217 ^= S172; OUT(*out++ op S22 ^ S106 ^ S217;) S22 ^= (S20 & S21) ^ S100; S106 ^= (S104 & S105) ^ S193; S217 ^= (S215 & S216) ^ S286;

#define C2 \
  S21 ^= S282; S105 ^= S90; S216 ^= S171; OUT(*out++ op S21 ^ S105 ^ S216;) S21 ^= (S19 & S20) ^ S99; S105 ^= (S103 & S104) ^ S192; S216 ^= (S214 & S215) ^ S285; \
  S20 ^= S281; S104 ^= S89; S215 ^= S170; OUT(*out++ op S20 ^ S104 ^ S215;) S20 ^= (S18 & S19) ^ S98; S104 ^= (S102 & S103) ^ S191; S215 ^= (S213 & S214) ^ S284; \
  S19 ^= S280; S103 ^= S88; S214 ^= S169; OUT(*out++ op S19 ^ S103 ^ S214;) S19 ^= (S17 & S18) ^ S97; S103 ^= (S101 & S102) ^ S190; S214 ^= (S212 & S213) ^ S283; \
  S18 ^= S279; S102 ^= S87; S213 ^= S168; OUT(*out++ op S18 ^ S102 ^ S213;) S18 ^= (S16 & S17) ^ S96; S102 ^= (S100 & S101) ^ S189; S213 ^= (S211 & S212) ^ S282; \
  S17 ^= S278; S101 ^= S86; S212 ^= S167; OUT(*out++ op S17 ^ S101 ^ S212;) S17 ^= (S15 & S16) ^ S95; S101 ^= (S99 & S100) ^ S188; S212 ^= (S210 & S211) ^ S281; \
  S16 ^= S277; S100 ^= S85; S211 ^= S166; OUT(*out++ op S16 ^ S100 ^ S211;) S16 ^= (S14 & S15) ^ S94; S100 ^= (S98 & S99) ^ S187; S211 ^= (S209 & S210) ^ S280; \
  S15 ^= S276; S99 ^= S84; S210 ^= S165; OUT(*out++ op S15 ^ S99 ^ S210;) S15 ^= (S13 & S14) ^ S93; S99 ^= (S97 & S98) ^ S186; S210 ^= (S208 & S209) ^ S279; \
  S14 ^= S275; S98 ^= S83; S209 ^= S164; OUT(*out++ op S14 ^ S98 ^ S209;) S14 ^= (S12 & S13) ^ S92; S98 ^= (S96 & S97) ^ S185; S209 ^= (S207 & S208) ^ S278; \
  S13 ^= S274; S97 ^= S82; S208 ^= S163; OUT(*out++ op S13 ^ S97 ^ S208;) S13 ^= (S11 & S12) ^ S91; S97 ^= (S95 & S96) ^ S184; S208 ^= (S206 & S207) ^ S277; \
  S12 ^= S273; S96 ^= S81; S207 ^= S162; OUT(*out++ op S12 ^ S96 ^ S207;) S12 ^= (S10 & S11) ^ S90; S96 ^= (S94 & S95) ^ S183; S207 ^= (S205 & S206) ^ S276; \
  S11 ^= S272; S95 ^= S80; S206 ^= S161; OUT(*out++ op S11 ^ S95 ^ S206;) S11 ^= (S9 & S10) ^ S89; S95 ^= (S93 & S94) ^ S182; S206 ^= (S204 & S205) ^ S275; \
  S10 ^= S271; S94 ^= S79; S205 ^= S160; OUT(*out++ op S10 ^ S94 ^ S205;) S10 ^= (S8 & S9) ^ S88; S94 ^= (S92 & S93) ^ S181; S205 ^= (S203 & S204) ^ S274; \
  S9 ^= S270; S93 ^= S78; S204 ^= S159; OUT(*out++ op S9 ^ S93 ^ S204;) S9 ^= (S7 & S8) ^ S87; S93 ^= (S91 & S92) ^ S180; S204 ^= (S202 & S203) ^ S273; \
  S8 ^= S269; S92 ^= S77; S203 ^= S158; OUT(*out++ op S8 ^ S92 ^ S203;) S8 ^= (S6 & S7) ^ S86; S92 ^= (S90 & S91) ^ S179; S203 ^= (S201 & S202) ^ S272; \
  S7 ^= S268; S91 ^= S76; S202 ^= S157; OUT(*out++ op S7 ^ S91 ^ S202;) S7 ^= (S5 & S6) ^ S85; S91 ^= (S89 & S90) ^ S178; S202 ^= (S200 & S201) ^ S271; \
  S6 ^= S267; S90 ^= S75; S201 ^= S156; OUT(*out++ op S6 ^ S90 ^ S201;) S6 ^= (S4 & S5) ^ S84; S90 ^= (S88 & S89) ^ S177; S201 ^= (S199 & S200) ^ S270; \
  S5 ^= S266; S89 ^= S74; S200 ^= S155; OUT(*out++ op S5 ^ S89 ^ S200;) S5 ^= (S3 & S4) ^ S83; S89 ^= (S87 & S88) ^ S176; S200 ^= (S198 & S199) ^ S269; \
  S4 ^= S265; S88 ^= S73; S199 ^= S154; OUT(*out++ op S4 ^ S88 ^ S199;) S4 ^= (S2 & S3) ^ S82; S88 ^= (S86 & S87) ^ S175; S199 ^= (S197 & S198) ^ S268; \
  S3 ^= S264; S87 ^= S72; S198 ^= S153; OUT(*out++ op S3 ^ S87 ^ S198;) S3 ^= (S1 & S2) ^ S81; S87 ^= (S85 & S86) ^ S174; S198 ^= (S196 & S197) ^ S267; \
  S2 ^= S263; S86 ^= S71; S197 ^= S152; OUT(*out++ op S2 ^ S86 ^ S197;) S2 ^= (S288 & S1) ^ S80; S86 ^= (S84 & S85) ^ S173; S197 ^= (S195 & S196) ^ S266; \
  S1 ^= S262; S85 ^= S70; S196 ^= S151; OUT(*out++ op S1 ^ S85 ^ S196;) S1 ^= (S287 & S288) ^ S79; S85 ^= (S83 & S84) ^ S172; S196 ^= (S194 & S195) ^ S265; \
  S288 ^= S261; S84 ^= S69; S195 ^= S150; OUT(*out++ op S288 ^ S84 ^ S195;) S288 ^= (S286 & S287) ^ S78; S84 ^= (S82 & S83) ^ S171; S195 ^= (S193 & S194) ^ S264; \
  S287 ^= S260; S83 ^= S68; S194 ^= S149; OUT(*out++ op S287 ^ S83 ^ S194;) S287 ^= (S285 & S286) ^ S77; S83 ^= (S81 & S82) ^ S170; S194 ^= (S192 & S193) ^ S263; \
  S286 ^= S259; S82 ^= S67; S193 ^= S148; OUT(*out++ op S286 ^ S82 ^ S193;) S286 ^= (S284 & S285) ^ S76; S82 ^= (S80 & S81) ^ S169; S193 ^= (S191 & S192) ^ S262; \
  S285 ^= S258; S81 ^= S66; S192 ^= S147; OUT(*out++ op S285 ^ S81 ^ S192;) S285 ^= (S283 & S284) ^ S75; S81 ^= (S79 & S80) ^ S168; S192 ^= (S190 & S191) ^ S261; \
  S284 ^= S257; S80 ^= S65; S191 ^= S146; OUT(*out++ op S284 ^ S80 ^ S191;) S284 ^= (S282 & S283) ^ S74; S80 ^= (S78 & S79) ^ S167; S191 ^= (S189 & S190) ^ S260; \
  S283 ^= S256; S79 ^= S64; S190 ^= S145; OUT(*out++ op S283 ^ S79 ^ S190;) S283 ^= (S281 & S282) ^ S73; S79 ^= (S77 & S78) ^ S166; S190 ^= (S188 & S189) ^ S259; \
  S282 ^= S255; S78 ^= S63; S189 ^= S144; OUT(*out++ op S282 ^ S78 ^ S189;) S282 ^= (S280 & S281) ^ S72; S78 ^= (S76 & S77) ^ S165; S189 ^= (S187 & S188) ^ S258; \
  S281 ^= S254; S77 ^= S62; S188 ^= S143; OUT(*out++ op S281 ^ S77 ^ S188;) S281 ^= (S279 & S280) ^ S71; S77 ^= (S75 & S76) ^ S164; S188 ^= (S186 & S187) ^ S257; \
  S280 ^= S253; S76 ^= S61; S187 ^= S142; OUT(*out++ op S280 ^ S76 ^ S187;) S280 ^= (S278 & S279) ^ S70; S76 ^= (S74 & S75) ^ S163; S187 ^= (S185 & S186) ^ S256; \
  S279 ^= S252; S75 ^= S60; S186 ^= S141; OUT(*out++ op S279 ^ S75 ^ S186;) S279 ^= (S277 & S278) ^ S69; S75 ^= (S73 & S74) ^ S162; S186 ^= (S184 & S185) ^ S255; \
  S278 ^= S251; S74 ^= S59; S185 ^= S140; OUT(*out++ op S278 ^ S74 ^ S185;) S278 ^= (S276 & S277) ^ S68; S74 ^= (S72 & S73) ^ S161; S185 ^= (S183 & S184) ^ S254; \
  S277 ^= S250; S73 ^= S58; S184 ^= S139; OUT(*out++ op S277 ^ S73 ^ S184;) S277 ^= (S275 & S276) ^ S67; S73 ^= (S71 & S72) ^ S160; S184 ^= (S182 & S183) ^ S253; \
  S276 ^= S249; S72 ^= S57; S183 ^= S138; OUT(*out++ op S276 ^ S72 ^ S183;) S276 ^= (S274 & S275) ^ S66; S72 ^= (S70 & S71) ^ S159; S183 ^= (S181 & S182) ^ S252; \
  S275 ^= S248; S71 ^= S56; S182 ^= S137; OUT(*out++ op S275 ^ S71 ^ S182;) S275 ^= (S273 & S274) ^ S65; S71 ^= (S69 & S70) ^ S158; S182 ^= (S180 & S181) ^ S251; \
  S274 ^= S247; S70 ^= S55; S181 ^= S136; OUT(*out++ op S274 ^ S70 ^ S181;) S274 ^= (S272 & S273) ^ S64; S70 ^= (S68 & S69) ^ S157; S181 ^= (S179 & S180) ^ S250; \
  S273 ^= S246; S69 ^= S54; S180 ^= S135; OUT(*out++ op S273 ^ S69 ^ S180;) S273 ^= (S271 & S272) ^ S63; S69 ^= (S67 & S68) ^ S156; S180 ^= (S178 & S179) ^ S249; \
  S272 ^= S245; S68 ^= S53; S179 ^= S134; OUT(*out++ op S272 ^ S68 ^ S179;) S272 ^= (S270 & S271) ^ S62; S68 ^= (S66 & S67) ^ S155; S179 ^= (S177 & S178) ^ S248; \
  S271 ^= S244; S67 ^= S52; S178 ^= S133; OUT(*out++ op S271 ^ S67 ^ S178;) S271 ^= (S269 & S270) ^ S61; S67 ^= (S65 & S66) ^ S154; S178 ^= (S176 & S177) ^ S247; \
  S270 ^= S243; S66 ^= S51; S177 ^= S132; OUT(*out++ op S270 ^ S66 ^ S177;) S270 ^= (S268 & S269) ^ S60; S66 ^= (S64 & S65) ^ S153; S177 ^= (S175 & S176) ^ S246; \
  S269 ^= S242; S65 ^= S50; S176 ^= S131; OUT(*out++ op S269 ^ S65 ^ S176;) S269 ^= (S267 & S268) ^ S59; S65 ^= (S63 & S64) ^ S152; S176 ^= (S174 & S175) ^ S245; \
  S268 ^= S241; S64 ^= S49; S175 ^= S130; OUT(*out++ op S268 ^ S64 ^ S175;) S268 ^= (S266 & S267) ^ S58; S64 ^= (S62 & S63) ^ S151; S175 ^= (S173 & S174) ^ S244; \
  S267 ^= S240; S63 ^= S48; S174 ^= S129; OUT(*out++ op S267 ^ S63 ^ S174;) S267 ^= (S265 & S266) ^ S57; S63 ^= (S61 & S62) ^ S150; S174 ^= (S172 & S173) ^ S243; \
  S266 ^= S239; S62 ^= S47; S173 ^= S128; OUT(*out++ op S266 ^ S62 ^ S173;) S266 ^= (S264 & S265) ^ S56; S62 ^= (S60 & S61) ^ S149; S173 ^= (S171 & S172) ^ S242; \
  S265 ^= S238; S61 ^= S46; S172 ^= S127; OUT(*out++ op S265 ^ S61 ^ S172;) S265 ^= (S263 & S264) ^ S55; S61 ^= (S59 & S60) ^ S148; S172 ^= (S170 & S171) ^ S241; \
  S264 ^= S237; S60 ^= S45; S171 ^= S126; OUT(*out++ op S264 ^ S60 ^ S171;) S264 ^= (S262 & S263) ^ S54; S60 ^= (S58 & S59) ^ S147; S171 ^= (S169 & S170) ^ S240; \
  S263 ^= S236; S59 ^= S44; S170 ^= S125; OUT(*out++ op S263 ^ S59 ^ S170;) S263 ^= (S261 & S262) ^ S53; S59 ^= (S57 & S58) ^ S146; S170 ^= (S168 & S169) ^ S239; \
  S262 ^= S235; S58 ^= S43; S169 ^= S124; OUT(*out++ op S262 ^ S58 ^ S169;) S262 ^= (S260 & S261) ^ S52; S58 ^= (S56 & S57) ^ S145; S169 ^= (S167 & S168) ^ S238; \
  S261 ^= S234; S57 ^= S42; S168 ^= S123; OUT(*out++ op S261 ^ S57 ^ S168;) S261 ^= (S259 & S260) ^ S51; S57 ^= (S55 & S56) ^ S144; S168 ^= (S166 & S167) ^ S237; \
  S260 ^= S233; S56 ^= S41; S167 ^= S122; OUT(*out++ op S260 ^ S56 ^ S167;) S260 ^= (S258 & S259) ^ S50; S56 ^= (S54 & S55) ^ S143; S167 ^= (S165 & S166) ^ S236; \
  S259 ^= S232; S55 ^= S40; S166 ^= S121; OUT(*out++ op S259 ^ S55 ^ S166;) S259 ^= (S257 & S258) ^ S49; S55 ^= (S53 & S54) ^ S142; S166 ^= (S164 & S165) ^ S235; \
  S258 ^= S231; S54 ^= S39; S165 ^= S120; OUT(*out++ op S258 ^ S54 ^ S165;) S258 ^= (S256 & S257) ^ S48; S54 ^= (S52 & S53) ^ S141; S165 ^= (S163 & S164) ^ S234; \
  S257 ^= S230; S53 ^= S38; S164 ^= S119; OUT(*out++ op S257 ^ S53 ^ S164;) S257 ^= (S255 & S256) ^ S47; S53 ^= (S51 & S52) ^ S140; S164 ^= (S162 & S163) ^ S233; \
  S256 ^= S229; S52 ^= S37; S163 ^= S118; OUT(*out++ op S256 ^ S52 ^ S163;) S256 ^= (S254 & S255) ^ S46; S52 ^= (S50 & S51) ^ S139; S163 ^= (S161 & S162) ^ S232; \
  S255 ^= S228; S51 ^= S36; S162 ^= S117; OUT(*out++ op S255 ^ S51 ^ S162;) S255 ^= (S253 & S254) ^ S45; S51 ^= (S49 & S50) ^ S138; S162 ^= (S160 & S161) ^ S231; \
  S254 ^= S227; S50 ^= S35; S161 ^= S116; OUT(*out++ op S254 ^ S50 ^ S161;) S254 ^= (S252 & S253) ^ S44; S50 ^= (S48 & S49) ^ S137; S161 ^= (S159 & S160) ^ S230; \
  S253 ^= S226; S49 ^= S34; S160 ^= S115; OUT(*out++ op S253 ^ S49 ^ S160;) S253 ^= (S251 & S252) ^ S43; S49 ^= (S47 & S48) ^ S136; S160 ^= (S158 & S159) ^ S229; \
  S252 ^= S225; S48 ^= S33; S159 ^= S114; OUT(*out++ op S252 ^ S48 ^ S159;) S252 ^= (S250 & S251) ^ S42; S48 ^= (S46 & S47) ^ S135; S159 ^= (S157 & S158) ^ S228; \
  S251 ^= S224; S47 ^= S32; S158 ^= S113; OUT(*out++ op S251 ^ S47 ^ S158;) S251 ^= (S249 & S250) ^ S41; S47 ^= (S45 & S46) ^ S134; S158 ^= (S156 & S157) ^ S227; \
  S250 ^= S223; S46 ^= S31; S157 ^= S112; OUT(*out++ op S250 ^ S46 ^ S157;) S250 ^= (S248 & S249) ^ S40; S46 ^= (S44 & S45) ^ S133; S157 ^= (S155 & S156) ^ S226; \
  S249 ^= S222; S45 ^= S30; S156 ^= S111; OUT(*out++ op S249 ^ S45 ^ S156;) S249 ^= (S247 & S248) ^ S39; S45 ^= (S43 & S44) ^ S132; S156 ^= (S154 & S155) ^ S225; \
  S248 ^= S221; S44 ^= S29; S155 ^= S110; OUT(*out++ op S248 ^ S44 ^ S155;) S248 ^= (S246 & S247) ^ S38; S44 ^= (S42 & S43) ^ S131; S155 ^= (S153 & S154) ^ S224; \
  S247 ^= S220; S43 ^= S28; S154 ^= S109; OUT(*out++ op S247 ^ S43 ^ S154;) S247 ^= (S245 & S246) ^ S37; S43 ^= (S41 & S42) ^ S130; S154 ^= (S152 & S153) ^ S223; \
  S246 ^= S219; S42 ^= S27; S153 ^= S108; OUT(*out++ op S246 ^ S42 ^ S153;) S246 ^= (S244 & S245) ^ S36; S42 ^= (S40 & S41) ^ S129; S153 ^= (S151 & S152) ^ S222; \
  S245 ^= S218; S41 ^= S26; S152 ^= S107; OUT(*out++ op S245 ^ S41 ^ S152;) S245 ^= (S243 & S244) ^ S35; S41 ^= (S39 & S40) ^ S128; S152 ^= (S150 & S151) ^ S221; \
  S244 ^= S217; S40 ^= S25; S151 ^= S106; OUT(*out++ op S244 ^ S40 ^ S151;) S244 ^= (S242 & S243) ^ S34; S40 ^= (S38 & S39) ^ S127; S151 ^= (S149 & S150) ^ S220; \
  S243 ^= S216; S39 ^= S24; S150 ^= S105; OUT(*out++ op S243 ^ S39 ^ S150;) S243 ^= (S241 & S242) ^ S33; S39 ^= (S37 & S38) ^ S126; S150 ^= (S148 & S149) ^ S219; \
  S242 ^= S215; S38 ^= S23; S149 ^= S104; OUT(*out++ op S242 ^ S38 ^ S149;) S242 ^= (S240 & S241) ^ S32; S38 ^= (S36 & S37) ^ S125; S149 ^= (S147 & S148) ^ S218; \
  S241 ^= S214; S37 ^= S22; S148 ^= S103; OUT(*out++ op S241 ^ S37 ^ S148;) S241 ^= (S239 & S240) ^ S31; S37 ^= (S35 & S36) ^ S124; S148 ^= (S146 & S147) ^ S217; \
  S240 ^= S213; S36 ^= S21; S147 ^= S102; OUT(*out++ op S240 ^ S36 ^ S147;) S240 ^= (S238 & S239) ^ S30; S36 ^= (S34 & S35) ^ S123; S147 ^= (S145 & S146) ^ S216; \
  S239 ^= S212; S35 ^= S20; S146 ^= S101; OUT(*out++ op S239 ^ S35 ^ S146;) S239 ^= (S237 & S238) ^ S29; S35 ^= (S33 & S34) ^ S122; S146 ^= (S144 & S145) ^ S215; \
  S238 ^= S211; S34 ^= S19; S145 ^= S100; OUT(*out++ op S238 ^ S34 ^ S145;) S238 ^= (S236 & S237) ^ S28; S34 ^= (S32 & S33) ^ S121; S145 ^= (S143 & S144) ^ S214;

#define C3 \
  S237 ^= S210; S33 ^= S18; S144 ^= S99; OUT(*out++ op S237 ^ S33 ^ S144;) S237 ^= (S235 & S236) ^ S27; S33 ^= (S31 & S32) ^ S120; S144 ^= (S142 & S143) ^ S213; \
  S236 ^= S209; S32 ^= S17; S143 ^= S98; OUT(*out++ op S236 ^ S32 ^ S143;) S236 ^= (S234 & S235) ^ S26; S32 ^= (S30 & S31) ^ S119; S143 ^= (S141 & S142) ^ S212; \
  S235 ^= S208; S31 ^= S16; S142 ^= S97; OUT(*out++ op S235 ^ S31 ^ S142;) S235 ^= (S233 & S234) ^ S25; S31 ^= (S29 & S30) ^ S118; S142 ^= (S140 & S141) ^ S211; \
  S234 ^= S207; S30 ^= S15; S141 ^= S96; OUT(*out++ op S234 ^ S30 ^ S141;) S234 ^= (S232 & S233) ^ S24; S30 ^= (S28 & S29) ^ S117; S141 ^= (S139 & S140) ^ S210; \
  S233 ^= S206; S29 ^= S14; S140 ^= S95; OUT(*out++ op S233 ^ S29 ^ S140;) S233 ^= (S231 & S232) ^ S23; S29 ^= (S27 & S28) ^ S116; S140 ^= (S138 & S139) ^ S209; \
  S232 ^= S205; S28 ^= S13; S139 ^= S94; OUT(*out++ op S232 ^ S28 ^ S139;) S232 ^= (S230 & S231) ^ S22; S28 ^= (S26 & S27) ^ S115; S139 ^= (S137 & S138) ^ S208; \
  S231 ^= S204; S27 ^= S12; S138 ^= S93; OUT(*out++ op S231 ^ S27 ^ S138;) S231 ^= (S229 & S230) ^ S21; S27 ^= (S25 & S26) ^ S114; S138 ^= (S136 & S137) ^ S207; \
  S230 ^= S203; S26 ^= S11; S137 ^= S92; OUT(*out++ op S230 ^ S26 ^ S137;) S230 ^= (S228 & S229) ^ S20; S26 ^= (S24 & S25) ^ S113; S137 ^= (S135 & S136) ^ S206; \
  S229 ^= S202; S25 ^= S10; S136 ^= S91; OUT(*out++ op S229 ^ S25 ^ S136;) S229 ^= (S227 & S228) ^ S19; S25 ^= (S23 & S24) ^ S112; S136 ^= (S134 & S135) ^ S205; \
  S228 ^= S201; S24 ^= S9; S135 ^= S90; OUT(*out++ op S228 ^ S24 ^ S135;) S228 ^= (S226 & S227) ^ S18; S24 ^= (S22 & S23) ^ S111; S135 ^= (S133 & S134) ^ S204; \
  S227 ^= S200; S23 ^= S8; S134 ^= S89; OUT(*out++ op S227 ^ S23 ^ S134;) S227 ^= (S225 & S226) ^ S17; S23 ^= (S21 & S22) ^ S110; S134 ^= (S132 & S133) ^ S203; \
  S226 ^= S199; S22 ^= S7; S133 ^= S88; OUT(*out++ op S226 ^ S22 ^ S133;) S226 ^= (S224 & S225) ^ S16; S22 ^= (S20 & S21) ^ S109; S133 ^= (S131 & S132) ^ S202; \
  S225 ^= S198; S21 ^= S6; S132 ^= S87; OUT(*out++ op S225 ^ S21 ^ S132;) S225 ^= (S223 & S224) ^ S15; S21 ^= (S19 & S20) ^ S108; S132 ^= (S130 & S131) ^ S201; \
  S224 ^= S197; S20 ^= S5; S131 ^= S86; OUT(*out++ op S224 ^ S20 ^ S131;) S224 ^= (S222 & S223) ^ S14; S20 ^= (S18 & S19) ^ S107; S131 ^= (S129 & S130) ^ S200; \
  S223 ^= S196; S19 ^= S4; S130 ^= S85; OUT(*out++ op S223 ^ S19 ^ S130;) S223 ^= (S221 & S222) ^ S13; S19 ^= (S17 & S18) ^ S106; S130 ^= (S128 & S129) ^ S199; \
  S222 ^= S195; S18 ^= S3; S129 ^= S84; OUT(*out++ op S222 ^ S18 ^ S129;) S222 ^= (S220 & S221) ^ S12; S18 ^= (S16 & S17) ^ S105; S129 ^= (S127 & S128) ^ S198; \
  S221 ^= S194; S17 ^= S2; S128 ^= S83; OUT(*out++ op S221 ^ S17 ^ S128;) S221 ^= (S219 & S220) ^ S11; S17 ^= (S15 & S16) ^ S104; S128 ^= (S126 & S127) ^ S197; \
  S220 ^= S193; S16 ^= S1; S127 ^= S82; OUT(*out++ op S220 ^ S16 ^ S127;) S220 ^= (S218 & S219) ^ S10; S16 ^= (S14 & S15) ^ S103; S127 ^= (S125 & S126) ^ S196; \
  S219 ^= S192; S15 ^= S288; S126 ^= S81; OUT(*out++ op S219 ^ S15 ^ S126;) S219 ^= (S217 & S218) ^ S9; S15 ^= (S13 & S14) ^ S102; S126 ^= (S124 & S125) ^ S195; \
  S218 ^= S191; S14 ^= S287; S125 ^= S80; OUT(*out++ op S218 ^ S14 ^ S125;) S218 ^= (S216 & S217) ^ S8; S14 ^= (S12 & S13) ^ S101; S125 ^= (S123 & S124) ^ S194; \
  S217 ^= S190; S13 ^= S286; S124 ^= S79; OUT(*out++ op S217 ^ S13 ^ S124;) S217 ^= (S215 & S216) ^ S7; S13 ^= (S11 & S12) ^ S100; S124 ^= (S122 & S123) ^ S193; \
  S216 ^= S189; S12 ^= S285; S123 ^= S78; OUT(*out++ op S216 ^ S12 ^ S123;) S216 ^= (S214 & S215) ^ S6; S12 ^= (S10 & S11) ^ S99; S123 ^= (S121 & S122) ^ S192; \
  S215 ^= S188; S11 ^= S284; S122 ^= S77; OUT(*out++ op S215 ^ S11 ^ S122;) S215 ^= (S213 & S214) ^ S5; S11 ^= (S9 & S10) ^ S98; S122 ^= (S120 & S121) ^ S191; \
  S214 ^= S187; S10 ^= S283; S121 ^= S76; OUT(*out++ op S214 ^ S10 ^ S121;) S214 ^= (S212 & S213) ^ S4; S10 ^= (S8 & S9) ^ S97; S121 ^= (S119 & S120) ^ S190; \
  S213 ^= S186; S9 ^= S282; S120 ^= S75; OUT(*out++ op S213 ^ S9 ^ S120;) S213 ^= (S211 & S212) ^ S3; S9 ^= (S7 & S8) ^ S96; S120 ^= (S118 & S119) ^ S189; \
  S212 ^= S185; S8 ^= S281; S119 ^= S74; OUT(*out++ op S212 ^ S8 ^ S119;) S212 ^= (S210 & S211) ^ S2; S8 ^= (S6 & S7) ^ S95; S119 ^= (S117 & S118) ^ S188; \
  S211 ^= S184; S7 ^= S280; S118 ^= S73; OUT(*out++ op S211 ^ S7 ^ S118;) S211 ^= (S209 & S210) ^ S1; S7 ^= (S5 & S6) ^ S94; S118 ^= (S116 & S117) ^ S187; \
  S210 ^= S183; S6 ^= S279; S117 ^= S72; OUT(*out++ op S210 ^ S6 ^ S117;) S210 ^= (S208 & S209) ^ S288; S6 ^= (S4 & S5) ^ S93; S117 ^= (S115 & S116) ^ S186; \
  S209 ^= S182; S5 ^= S278; S116 ^= S71; OUT(*out++ op S209 ^ S5 ^ S116;) S209 ^= (S207 & S208) ^ S287; S5 ^= (S3 & S4) ^ S92; S116 ^= (S114 & S115) ^ S185; \
  S208 ^= S181; S4 ^= S277; S115 ^= S70; OUT(*out++ op S208 ^ S4 ^ S115;) S208 ^= (S206 & S207) ^ S286; S4 ^= (S2 & S3) ^ S91; S115 ^= (S113 & S114) ^ S184; \
  S207 ^= S180; S3 ^= S276; S114 ^= S69; OUT(*out++ op S207 ^ S3 ^ S114;) S207 ^= (S205 & S206) ^ S285; S3 ^= (S1 & S2) ^ S90; S114 ^= (S112 & S113) ^ S183; \
  S206 ^= S179; S2 ^= S275; S113 ^= S68; OUT(*out++ op S206 ^ S2 ^ S113;) S206 ^= (S204 & S205) ^ S284; S2 ^= (S288 & S1) ^ S89; S113 ^= (S111 & S112) ^ S182; \
  S205 ^= S178; S1 ^= S274; S112 ^= S67; OUT(*out++ op S205 ^ S1 ^ S112;) S205 ^= (S203 & S204) ^ S283; S1 ^= (S287 & S288) ^ S88; S112 ^= (S110 & S111) ^ S181; \
  S204 ^= S177; S288 ^= S273; S111 ^= S66; OUT(*out++ op S204 ^ S288 ^ S111;) S204 ^= (S202 & S203) ^ S282; S288 ^= (S286 & S287) ^ S87; S111 ^= (S109 & S110) ^ S180; \
  S203 ^= S176; S287 ^= S272; S110 ^= S65; OUT(*out++ op S203 ^ S287 ^ S110;) S203 ^= (S201 & S202) ^ S281; S287 ^= (S285 & S286) ^ S86; S110 ^= (S108 & S109) ^ S179; \
  S202 ^= S175; S286 ^= S271; S109 ^= S64; OUT(*out++ op S202 ^ S286 ^ S109;) S202 ^= (S200 & S201) ^ S280; S286 ^= (S284 & S285) ^ S85; S109 ^= (S107 & S108) ^ S178; \
  S201 ^= S174; S285 ^= S270; S108 ^= S63; OUT(*out++ op S201 ^ S285 ^ S108;) S201 ^= (S199 & S200) ^ S279; S285 ^= (S283 & S284) ^ S84; S108 ^= (S106 & S107) ^ S177; \
  S200 ^= S173; S284 ^= S269; S107 ^= S62; OUT(*out++ op S200 ^ S284 ^ S107;) S200 ^= (S198 & S199) ^ S278; S284 ^= (S282 & S283) ^ S83; S107 ^= (S105 & S106) ^ S176; \
  S199 ^= S172; S283 ^= S268; S106 ^= S61; OUT(*out++ op S199 ^ S283 ^ S106;) S199 ^= (S197 & S198) ^ S277; S283 ^= (S281 & S282) ^ S82; S106 ^= (S104 & S105) ^ S175; \
  S198 ^= S171; S282 ^= S267; S105 ^= S60; OUT(*out++ op S198 ^ S282 ^ S105;) S198 ^= (S196 & S197) ^ S276; S282 ^= (S280 & S281) ^ S81; S105 ^= (S103 & S104) ^ S174; \
  S197 ^= S170; S281 ^= S266; S104 ^= S59; OUT(*out++ op S197 ^ S281 ^ S104;) S197 ^= (S195 & S196) ^ S275; S281 ^= (S279 & S280) ^ S80; S104 ^= (S102 & S103) ^ S173; \
  S196 ^= S169; S280 ^= S265; S103 ^= S58; OUT(*out++ op S196 ^ S280 ^ S103;) S196 ^= (S194 & S195) ^ S274; S280 ^= (S278 & S279) ^ S79; S103 ^= (S101 & S102) ^ S172; \
  S195 ^= S168; S279 ^= S264; S102 ^= S57; OUT(*out++ op S195 ^ S279 ^ S102;) S195 ^= (S193 & S194) ^ S273; S279 ^= (S277 & S278) ^ S78; S102 ^= (S100 & S101) ^ S171; \
  S194 ^= S167; S278 ^= S263; S101 ^= S56; OUT(*out++ op S194 ^ S278 ^ S101;) S194 ^= (S192 & S193) ^ S272; S278 ^= (S276 & S277) ^ S77; S101 ^= (S99 & S100) ^ S170; \
  S193 ^= S166; S277 ^= S262; S100 ^= S55; OUT(*out++ op S193 ^ S277 ^ S100;) S193 ^= (S191 & S192) ^ S271; S277 ^= (S275 & S276) ^ S76; S100 ^= (S98 & S99) ^ S169; \
  S192 ^= S165; S276 ^= S261; S99 ^= S54; OUT(*out++ op S192 ^ S276 ^ S99;) S192 ^= (S190 & S191) ^ S270; S276 ^= (S274 & S275) ^ S75; S99 ^= (S97 & S98) ^ S168; \
  S191 ^= S164; S275 ^= S260; S98 ^= S53; OUT(*out++ op S191 ^ S275 ^ S98;) S191 ^= (S189 & S190) ^ S269; S275 ^= (S273 & S274) ^ S74; S98 ^= (S96 & S97) ^ S167; \
  S190 ^= S163; S274 ^= S259; S97 ^= S52; OUT(*out++ op S190 ^ S274 ^ S97;) S190 ^= (S188 & S189) ^ S268; S274 ^= (S272 & S273) ^ S73; S97 ^= (S95 & S96) ^ S166; \
  S189 ^= S162; S273 ^= S258; S96 ^= S51; OUT(*out++ op S189 ^ S273 ^ S96;) S189 ^= (S187 & S188) ^ S267; S273 ^= (S271 & S272) ^ S72; S96 ^= (S94 & S95) ^ S165; \
  S188 ^= S161; S272 ^= S257; S95 ^= S50; OUT(*out++ op S188 ^ S272 ^ S95;) S188 ^= (S186 & S187) ^ S266; S272 ^= (S270 & S271) ^ S71; S95 ^= (S93 & S94) ^ S164; \
  S187 ^= S160; S271 ^= S256; S94 ^= S49; OUT(*out++ op S187 ^ S271 ^ S94;) S187 ^= (S185 & S186) ^ S265; S271 ^= (S269 & S270) ^ S70; S94 ^= (S92 & S93) ^ S163; \
  S186 ^= S159; S270 ^= S255; S93 ^= S48; OUT(*out++ op S186 ^ S270 ^ S93;) S186 ^= (S184 & S185) ^ S264; S270 ^= (S268 & S269) ^ S69; S93 ^= (S91 & S92) ^ S162; \
  S185 ^= S158; S269 ^= S254; S92 ^= S47; OUT(*out++ op S185 ^ S269 ^ S92;) S185 ^= (S183 & S184) ^ S263; S269 ^= (S267 & S268) ^ S68; S92 ^= (S90 & S91) ^ S161; \
  S184 ^= S157; S268 ^= S253; S91 ^= S46; OUT(*out++ op S184 ^ S268 ^ S91;) S184 ^= (S182 & S183) ^ S262; S268 ^= (S266 & S267) ^ S67; S91 ^= (S89 & S90) ^ S160; \
  S183 ^= S156; S267 ^= S252; S90 ^= S45; OUT(*out++ op S183 ^ S267 ^ S90;) S183 ^= (S181 & S182) ^ S261; S267 ^= (S265 & S266) ^ S66; S90 ^= (S88 & S89) ^ S159; \
  S182 ^= S155; S266 ^= S251; S89 ^= S44; OUT(*out++ op S182 ^ S266 ^ S89;) S182 ^= (S180 & S181) ^ S260; S266 ^= (S264 & S265) ^ S65; S89 ^= (S87 & S88) ^ S158; \
  S181 ^= S154; S265 ^= S250; S88 ^= S43; OUT(*out++ op S181 ^ S265 ^ S88;) S181 ^= (S179 & S180) ^ S259; S265 ^= (S263 & S264) ^ S64; S88 ^= (S86 & S87) ^ S157; \
  S180 ^= S153; S264 ^= S249; S87 ^= S42; OUT(*out++ op S180 ^ S264 ^ S87;) S180 ^= (S178 & S179) ^ S258; S264 ^= (S262 & S263) ^ S63; S87 ^= (S85 & S86) ^ S156; \
  S179 ^= S152; S263 ^= S248; S86 ^= S41; OUT(*out++ op S179 ^ S263 ^ S86;) S179 ^= (S177 & S178) ^ S257; S263 ^= (S261 & S262) ^ S62; S86 ^= (S84 & S85) ^ S155; \
  S178 ^= S151; S262 ^= S247; S85 ^= S40; OUT(*out++ op S178 ^ S262 ^ S85;) S178 ^= (S176 & S177) ^ S256; S262 ^= (S260 & S261) ^ S61; S85 ^= (S83 & S84) ^ S154; \
  S177 ^= S150; S261 ^= S246; S84 ^= S39; OUT(*out++ op S177 ^ S261 ^ S84;) S177 ^= (S175 & S176) ^ S255; S261 ^= (S259 & S260) ^ S60; S84 ^= (S82 & S83) ^ S153; \
  S176 ^= S149; S260 ^= S245; S83 ^= S38; OUT(*out++ op S176 ^ S260 ^ S83;) S176 ^= (S174 & S175) ^ S254; S260 ^= (S258 & S259) ^ S59; S83 ^= (S81 & S82) ^ S152; \
  S175 ^= S148; S259 ^= S244; S82 ^= S37; OUT(*out++ op S175 ^ S259 ^ S82;) S175 ^= (S173 & S174) ^ S253; S259 ^= (S257 & S258) ^ S58; S82 ^= (S80 & S81) ^ S151; \
  S174 ^= S147; S258 ^= S243; S81 ^= S36; OUT(*out++ op S174 ^ S258 ^ S81;) S174 ^= (S172 & S173) ^ S252; S258 ^= (S256 & S257) ^ S57; S81 ^= (S79 & S80) ^ S150; \
  S173 ^= S146; S257 ^= S242; S80 ^= S35; OUT(*out++ op S173 ^ S257 ^ S80;) S173 ^= (S171 & S172) ^ S251; S257 ^= (S255 & S256) ^ S56; S80 ^= (S78 & S79) ^ S149; \
  S172 ^= S145; S256 ^= S241; S79 ^= S34; OUT(*out++ op S172 ^ S256 ^ S79;) S172 ^= (S170 & S171) ^ S250; S256 ^= (S254 & S255) ^ S55; S79 ^= (S77 & S78) ^ S148; \
  S171 ^= S144; S255 ^= S240; S78 ^= S33; OUT(*out++ op S171 ^ S255 ^ S78;) S171 ^= (S169 & S170) ^ S249; S255 ^= (S253 & S254) ^ S54; S78 ^= (S76 & S77) ^ S147; \
  S170 ^= S143; S254 ^= S239; S77 ^= S32; OUT(*out++ op S170 ^ S254 ^ S77;) S170 ^= (S168 & S169) ^ S248; S254 ^= (S252 & S253) ^ S53; S77 ^= (S75 & S76) ^ S146; \
  S169 ^= S142; S253 ^= S238; S76 ^= S31; OUT(*out++ op S169 ^ S253 ^ S76;) S169 ^= (S167 & S168) ^ S247; S253 ^= (S251 & S252) ^ S52; S76 ^= (S74 & S75) ^ S145; \
  S168 ^= S141; S252 ^= S237; S75 ^= S30; OUT(*out++ op S168 ^ S252 ^ S75;) S168 ^= (S166 & S167) ^ S246; S252 ^= (S250 & S251) ^ S51; S75 ^= (S73 & S74) ^ S144; \
  S167 ^= S140; S251 ^= S236; S74 ^= S29; OUT(*out++ op S167 ^ S251 ^ S74;) S167 ^= (S165 & S166) ^ S245; S251 ^= (S249 & S250) ^ S50; S74 ^= (S72 & S73) ^ S143; \
  S166 ^= S139; S250 ^= S235; S73 ^= S28; OUT(*out++ op S166 ^ S250 ^ S73;) S166 ^= (S164 & S165) ^ S244; S250 ^= (S248 & S249) ^ S49; S73 ^= (S71 & S72) ^ S142;

#define C4 \
  S165 ^= S138; S249 ^= S234; S72 ^= S27; OUT(*out++ op S165 ^ S249 ^ S72;) S165 ^= (S163 & S164) ^ S243; S249 ^= (S247 & S248) ^ S48; S72 ^= (S70 & S71) ^ S141; \
  S164 ^= S137; S248 ^= S233; S71 ^= S26; OUT(*out++ op S164 ^ S248 ^ S71;) S164 ^= (S162 & S163) ^ S242; S248 ^= (S246 & S247) ^ S47; S71 ^= (S69 & S70) ^ S140; \
  S163 ^= S136; S247 ^= S232; S70 ^= S25; OUT(*out++ op S163 ^ S247 ^ S70;) S163 ^= (S161 & S162) ^ S241; S247 ^= (S245 & S246) ^ S46; S70 ^= (S68 & S69) ^ S139; \
  S162 ^= S135; S246 ^= S231; S69 ^= S24; OUT(*out++ op S162 ^ S246 ^ S69;) S162 ^= (S160 & S161) ^ S240; S246 ^= (S244 & S245) ^ S45; S69 ^= (S67 & S68) ^ S138; \
  S161 ^= S134; S245 ^= S230; S68 ^= S23; OUT(*out++ op S161 ^ S245 ^ S68;) S161 ^= (S159 & S160) ^ S239; S245 ^= (S243 & S244) ^ S44; S68 ^= (S66 & S67) ^ S137; \
  S160 ^= S133; S244 ^= S229; S67 ^= S22; OUT(*out++ op S160 ^ S244 ^ S67;) S160 ^= (S158 & S159) ^ S238; S244 ^= (S242 & S243) ^ S43; S67 ^= (S65 & S66) ^ S136; \
  S159 ^= S132; S243 ^= S228; S66 ^= S21; OUT(*out++ op S159 ^ S243 ^ S66;) S159 ^= (S157 & S158) ^ S237; S243 ^= (S241 & S242) ^ S42; S66 ^= (S64 & S65) ^ S135; \
  S158 ^= S131; S242 ^= S227; S65 ^= S20; OUT(*out++ op S158 ^ S242 ^ S65;) S158 ^= (S156 & S157) ^ S236; S242 ^= (S240 & S241) ^ S41; S65 ^= (S63 & S64) ^ S134; \
  S157 ^= S130; S241 ^= S226; S64 ^= S19; OUT(*out++ op S157 ^ S241 ^ S64;) S157 ^= (S155 & S156) ^ S235; S241 ^= (S239 & S240) ^ S40; S64 ^= (S62 & S63) ^ S133; \
  S156 ^= S129; S240 ^= S225; S63 ^= S18; OUT(*out++ op S156 ^ S240 ^ S63;) S156 ^= (S154 & S155) ^ S234; S240 ^= (S238 & S239) ^ S39; S63 ^= (S61 & S62) ^ S132; \
  S155 ^= S128; S239 ^= S224; S62 ^= S17; OUT(*out++ op S155 ^ S239 ^ S62;) S155 ^= (S153 & S154) ^ S233; S239 ^= (S237 & S238) ^ S38; S62 ^= (S60 & S61) ^ S131; \
  S154 ^= S127; S238 ^= S223; S61 ^= S16; OUT(*out++ op S154 ^ S238 ^ S61;) S154 ^= (S152 & S153) ^ S232; S238 ^= (S236 & S237) ^ S37; S61 ^= (S59 & S60) ^ S130; \
  S153 ^= S126; S237 ^= S222; S60 ^= S15; OUT(*out++ op S153 ^ S237 ^ S60;) S153 ^= (S151 & S152) ^ S231; S237 ^= (S235 & S236) ^ S36; S60 ^= (S58 & S59) ^ S129; \
  S152 ^= S125; S236 ^= S221; S59 ^= S14; OUT(*out++ op S152 ^ S236 ^ S59;) S152 ^= (S150 & S151) ^ S230; S236 ^= (S234 & S235) ^ S35; S59 ^= (S57 & S58) ^ S128; \
  S151 ^= S124; S235 ^= S220; S58 ^= S13; OUT(*out++ op S151 ^ S235 ^ S58;) S151 ^= (S149 & S150) ^ S229; S235 ^= (S233 & S234) ^ S34; S58 ^= (S56 & S57) ^ S127; \
  S150 ^= S123; S234 ^= S219; S57 ^= S12; OUT(*out++ op S150 ^ S234 ^ S57;) S150 ^= (S148 & S149) ^ S228; S234 ^= (S232 & S233) ^ S33; S57 ^= (S55 & S56) ^ S126; \
  S149 ^= S122; S233 ^= S218; S56 ^= S11; OUT(*out++ op S149 ^ S233 ^ S56;) S149 ^= (S147 & S148) ^ S227; S233 ^= (S231 & S232) ^ S32; S56 ^= (S54 & S55) ^ S125; \
  S148 ^= S121; S232 ^= S217; S55 ^= S10; OUT(*out++ op S148 ^ S232 ^ S55;) S148 ^= (S146 & S147) ^ S226; S232 ^= (S230 & S231) ^ S31; S55 ^= (S53 & S54) ^ S124; \
  S147 ^= S120; S231 ^= S216; S54 ^= S9; OUT(*out++ op S147 ^ S231 ^ S54;) S147 ^= (S145 & S146) ^ S225; S231 ^= (S229 & S230) ^ S30; S54 ^= (S52 & S53) ^ S123; \
  S146 ^= S119; S230 ^= S215; S53 ^= S8; OUT(*out++ op S146 ^ S230 ^ S53;) S146 ^= (S144 & S145) ^ S224; S230 ^= (S228 & S229) ^ S29; S53 ^= (S51 & S52) ^ S122; \
  S145 ^= S118; S229 ^= S214; S52 ^= S7; OUT(*out++ op S145 ^ S229 ^ S52;) S145 ^= (S143 & S144) ^ S223; S229 ^= (S227 & S228) ^ S28; S52 ^= (S50 & S51) ^ S121; \
  S144 ^= S117; S228 ^= S213; S51 ^= S6; OUT(*out++ op S144 ^ S228 ^ S51;) S144 ^= (S142 & S143) ^ S222; S228 ^= (S226 & S227) ^ S27; S51 ^= (S49 & S50) ^ S120; \
  S143 ^= S116; S227 ^= S212; S50 ^= S5; OUT(*out++ op S143 ^ S227 ^ S50;) S143 ^= (S141 & S142) ^ S221; S227 ^= (S225 & S226) ^ S26; S50 ^= (S48 & S49) ^ S119; \
  S142 ^= S115; S226 ^= S211; S49 ^= S4; OUT(*out++ op S142 ^ S226 ^ S49;) S142 ^= (S140 & S141) ^ S220; S226 ^= (S224 & S225) ^ S25; S49 ^= (S47 & S48) ^ S118; \
  S141 ^= S114; S225 ^= S210; S48 ^= S3; OUT(*out++ op S141 ^ S225 ^ S48;) S141 ^= (S139 & S140) ^ S219; S225 ^= (S223 & S224) ^ S24; S48 ^= (S46 & S47) ^ S117; \
  S140 ^= S113; S224 ^= S209; S47 ^= S2; OUT(*out++ op S140 ^ S224 ^ S47;) S140 ^= (S138 & S139) ^ S218; S224 ^= (S222 & S223) ^ S23; S47 ^= (S45 & S46) ^ S116; \
  S139 ^= S112; S223 ^= S208; S46 ^= S1; OUT(*out++ op S139 ^ S223 ^ S46;) S139 ^= (S137 & S138) ^ S217; S223 ^= (S221 & S222) ^ S22; S46 ^= (S44 & S45) ^ S115; \
  S138 ^= S111; S222 ^= S207; S45 ^= S288; OUT(*out++ op S138 ^ S222 ^ S45;) S138 ^= (S136 & S137) ^ S216; S222 ^= (S220 & S221) ^ S21; S45 ^= (S43 & S44) ^ S114; \
  S137 ^= S110; S221 ^= S206; S44 ^= S287; OUT(*out++ op S137 ^ S221 ^ S44;) S137 ^= (S135 & S136) ^ S215; S221 ^= (S219 & S220) ^ S20; S44 ^= (S42 & S43) ^ S113; \
  S136 ^= S109; S220 ^= S205; S43 ^= S286; OUT(*out++ op S136 ^ S220 ^ S43;) S136 ^= (S134 & S135) ^ S214; S220 ^= (S218 & S219) ^ S19; S43 ^= (S41 & S42) ^ S112; \
  S135 ^= S108; S219 ^= S204; S42 ^= S285; OUT(*out++ op S135 ^ S219 ^ S42;) S135 ^= (S133 & S134) ^ S213; S219 ^= (S217 & S218) ^ S18; S42 ^= (S40 & S41) ^ S111; \
  S134 ^= S107; S218 ^= S203; S41 ^= S284; OUT(*out++ op S134 ^ S218 ^ S41;) S134 ^= (S132 & S133) ^ S212; S218 ^= (S216 & S217) ^ S17; S41 ^= (S39 & S40) ^ S110; \
  S133 ^= S106; S217 ^= S202; S40 ^= S283; OUT(*out++ op S133 ^ S217 ^ S40;) S133 ^= (S131 & S132) ^ S211; S217 ^= (S215 & S216) ^ S16; S40 ^= (S38 & S39) ^ S109; \
  S132 ^= S105; S216 ^= S201; S39 ^= S282; OUT(*out++ op S132 ^ S216 ^ S39;) S132 ^= (S130 & S131) ^ S210; S216 ^= (S214 & S215) ^ S15; S39 ^= (S37 & S38) ^ S108; \
  S131 ^= S104; S215 ^= S200; S38 ^= S281; OUT(*out++ op S131 ^ S215 ^ S38;) S131 ^= (S129 & S130) ^ S209; S215 ^= (S213 & S214) ^ S14; S38 ^= (S36 & S37) ^ S107; \
  S130 ^= S103; S214 ^= S199; S37 ^= S280; OUT(*out++ op S130 ^ S214 ^ S37;) S130 ^= (S128 & S129) ^ S208; S214 ^= (S212 & S213) ^ S13; S37 ^= (S35 & S36) ^ S106; \
  S129 ^= S102; S213 ^= S198; S36 ^= S279; OUT(*out++ op S129 ^ S213 ^ S36;) S129 ^= (S127 & S128) ^ S207; S213 ^= (S211 & S212) ^ S12; S36 ^= (S34 & S35) ^ S105; \
  S128 ^= S101; S212 ^= S197; S35 ^= S278; OUT(*out++ op S128 ^ S212 ^ S35;) S128 ^= (S126 & S127) ^ S206; S212 ^= (S210 & S211) ^ S11; S35 ^= (S33 & S34) ^ S104; \
  S127 ^= S100; S211 ^= S196; S34 ^= S277; OUT(*out++ op S127 ^ S211 ^ S34;) S127 ^= (S125 & S126) ^ S205; S211 ^= (S209 & S210) ^ S10; S34 ^= (S32 & S33) ^ S103; \
  S126 ^= S99; S210 ^= S195; S33 ^= S276; OUT(*out++ op S126 ^ S210 ^ S33;) S126 ^= (S124 & S125) ^ S204; S210 ^= (S208 & S209) ^ S9; S33 ^= (S31 & S32) ^ S102; \
  S125 ^= S98; S209 ^= S194; S32 ^= S275; OUT(*out++ op S125 ^ S209 ^ S32;) S125 ^= (S123 & S124) ^ S203; S209 ^= (S207 & S208) ^ S8; S32 ^= (S30 & S31) ^ S101; \
  S124 ^= S97; S208 ^= S193; S31 ^= S274; OUT(*out++ op S124 ^ S208 ^ S31;) S124 ^= (S122 & S123) ^ S202; S208 ^= (S206 & S207) ^ S7; S31 ^= (S29 & S30) ^ S100; \
  S123 ^= S96; S207 ^= S192; S30 ^= S273; OUT(*out++ op S123 ^ S207 ^ S30;) S123 ^= (S121 & S122) ^ S201; S207 ^= (S205 & S206) ^ S6; S30 ^= (S28 & S29) ^ S99; \
  S122 ^= S95; S206 ^= S191; S29 ^= S272; OUT(*out++ op S122 ^ S206 ^ S29;) S122 ^= (S120 & S121) ^ S200; S206 ^= (S204 & S205) ^ S5; S29 ^= (S27 & S28) ^ S98; \
  S121 ^= S94; S205 ^= S190; S28 ^= S271; OUT(*out++ op S121 ^ S205 ^ S28;) S121 ^= (S119 & S120) ^ S199; S205 ^= (S203 & S204) ^ S4; S28 ^= (S26 & S27) ^ S97; \
  S120 ^= S93; S204 ^= S189; S27 ^= S270; OUT(*out++ op S120 ^ S204 ^ S27;) S120 ^= (S118 & S119) ^ S198; S204 ^= (S202 & S203) ^ S3; S27 ^= (S25 & S26) ^ S96; \
  S119 ^= S92; S203 ^= S188; S26 ^= S269; OUT(*out++ op S119 ^ S203 ^ S26;) S119 ^= (S117 & S118) ^ S197; S203 ^= (S201 & S202) ^ S2; S26 ^= (S24 & S25) ^ S95; \
  S118 ^= S91; S202 ^= S187; S25 ^= S268; OUT(*out++ op S118 ^ S202 ^ S25;) S118 ^= (S116 & S117) ^ S196; S202 ^= (S200 & S201) ^ S1; S25 ^= (S23 & S24) ^ S94; \
  S117 ^= S90; S201 ^= S186; S24 ^= S267; OUT(*out++ op S117 ^ S201 ^ S24;) S117 ^= (S115 & S116) ^ S195; S201 ^= (S199 & S200) ^ S288; S24 ^= (S22 & S23) ^ S93; \
  S116 ^= S89; S200 ^= S185; S23 ^= S266; OUT(*out++ op S116 ^ S200 ^ S23;) S116 ^= (S114 & S115) ^ S194; S200 ^= (S198 & S199) ^ S287; S23 ^= (S21 & S22) ^ S92; \
  S115 ^= S88; S199 ^= S184; S22 ^= S265; OUT(*out++ op S115 ^ S199 ^ S22;) S115 ^= (S113 & S114) ^ S193; S199 ^= (S197 & S198) ^ S286; S22 ^= (S20 & S21) ^ S91; \
  S114 ^= S87; S198 ^= S183; S21 ^= S264; OUT(*out++ op S114 ^ S198 ^ S21;) S114 ^= (S112 & S113) ^ S192; S198 ^= (S196 & S197) ^ S285; S21 ^= (S19 & S20) ^ S90; \
  S113 ^= S86; S197 ^= S182; S20 ^= S263; OUT(*out++ op S113 ^ S197 ^ S20;) S113 ^= (S111 & S112) ^ S191; S197 ^= (S195 & S196) ^ S284; S20 ^= (S18 & S19) ^ S89; \
  S112 ^= S85; S196 ^= S181; S19 ^= S262; OUT(*out++ op S112 ^ S196 ^ S19;) S112 ^= (S110 & S111) ^ S190; S196 ^= (S194 & S195) ^ S283; S19 ^= (S17 & S18) ^ S88; \
  S111 ^= S84; S195 ^= S180; S18 ^= S261; OUT(*out++ op S111 ^ S195 ^ S18;) S111 ^= (S109 & S110) ^ S189; S195 ^= (S193 & S194) ^ S282; S18 ^= (S16 & S17) ^ S87; \
  S110 ^= S83; S194 ^= S179; S17 ^= S260; OUT(*out++ op S110 ^ S194 ^ S17;) S110 ^= (S108 & S109) ^ S188; S194 ^= (S192 & S193) ^ S281; S17 ^= (S15 & S16) ^ S86; \
  S109 ^= S82; S193 ^= S178; S16 ^= S259; OUT(*out++ op S109 ^ S193 ^ S16;) S109 ^= (S107 & S108) ^ S187; S193 ^= (S191 & S192) ^ S280; S16 ^= (S14 & S15) ^ S85; \
  S108 ^= S81; S192 ^= S177; S15 ^= S258; OUT(*out++ op S108 ^ S192 ^ S15;) S108 ^= (S106 & S107) ^ S186; S192 ^= (S190 & S191) ^ S279; S15 ^= (S13 & S14) ^ S84; \
  S107 ^= S80; S191 ^= S176; S14 ^= S257; OUT(*out++ op S107 ^ S191 ^ S14;) S107 ^= (S105 & S106) ^ S185; S191 ^= (S189 & S190) ^ S278; S14 ^= (S12 & S13) ^ S83; \
  S106 ^= S79; S190 ^= S175; S13 ^= S256; OUT(*out++ op S106 ^ S190 ^ S13;) S106 ^= (S104 & S105) ^ S184; S190 ^= (S188 & S189) ^ S277; S13 ^= (S11 & S12) ^ S82; \
  S105 ^= S78; S189 ^= S174; S12 ^= S255; OUT(*out++ op S105 ^ S189 ^ S12;) S105 ^= (S103 & S104) ^ S183; S189 ^= (S187 & S188) ^ S276; S12 ^= (S10 & S11) ^ S81; \
  S104 ^= S77; S188 ^= S173; S11 ^= S254; OUT(*out++ op S104 ^ S188 ^ S11;) S104 ^= (S102 & S103) ^ S182; S188 ^= (S186 & S187) ^ S275; S11 ^= (S9 & S10) ^ S80; \
  S103 ^= S76; S187 ^= S172; S10 ^= S253; OUT(*out++ op S103 ^ S187 ^ S10;) S103 ^= (S101 & S102) ^ S181; S187 ^= (S185 & S186) ^ S274; S10 ^= (S8 & S9) ^ S79; \
  S102 ^= S75; S186 ^= S171; S9 ^= S252; OUT(*out++ op S102 ^ S186 ^ S9;) S102 ^= (S100 & S101) ^ S180; S186 ^= (S184 & S185) ^ S273; S9 ^= (S7 & S8) ^ S78; \
  S101 ^= S74; S185 ^= S170; S8 ^= S251; OUT(*out++ op S101 ^ S185 ^ S8;) S101 ^= (S99 & S100) ^ S179; S185 ^= (S183 & S184) ^ S272; S8 ^= (S6 & S7) ^ S77; \
  S100 ^= S73; S184 ^= S169; S7 ^= S250; OUT(*out++ op S100 ^ S184 ^ S7;) S100 ^= (S98 & S99) ^ S178; S184 ^= (S182 & S183) ^ S271; S7 ^= (S5 & S6) ^ S76; \
  S99 ^= S72; S183 ^= S168; S6 ^= S249; OUT(*out++ op S99 ^ S183 ^ S6;) S99 ^= (S97 & S98) ^ S177; S183 ^= (S181 & S182) ^ S270; S6 ^= (S4 & S5) ^ S75; \
  S98 ^= S71; S182 ^= S167; S5 ^= S248; OUT(*out++ op S98 ^ S182 ^ S5;) S98 ^= (S96 & S97) ^ S176; S182 ^= (S180 & S181) ^ S269; S5 ^= (S3 & S4) ^ S74; \
  S97 ^= S70; S181 ^= S166; S4 ^= S247; OUT(*out++ op S97 ^ S181 ^ S4;) S97 ^= (S95 & S96) ^ S175; S181 ^= (S179 & S180) ^ S268; S4 ^= (S2 & S3) ^ S73; \
  S96 ^= S69; S180 ^= S165; S3 ^= S246; OUT(*out++ op S96 ^ S180 ^ S3;) S96 ^= (S94 & S95) ^ S174; S180 ^= (S178 & S179) ^ S267; S3 ^= (S1 & S2) ^ S72; \
  S95 ^= S68; S179 ^= S164; S2 ^= S245; OUT(*out++ op S95 ^ S179 ^ S2;) S95 ^= (S93 & S94) ^ S173; S179 ^= (S177 & S178) ^ S266; S2 ^= (S288 & S1) ^ S71; \
  S94 ^= S67; S178 ^= S163; S1 ^= S244; OUT(*out++ op S94 ^ S178 ^ S1;) S94 ^= (S92 & S93) ^ S172; S178 ^= (S176 & S177) ^ S265; S1 ^= (S287 & S288) ^ S70;

#define FULL_CYCLE C1 C2 C3 C4

#define DECLARE_VARIABLES(numBits) \
  UINT64 \
    S1,   S2,   S3,   S4,   S5,   S6,   S7,   S8,   S9,  S10, \
     S11,  S12,  S13,  S14,  S15,  S16,  S17,  S18,  S19,  S20, \
     S21,  S22,  S23,  S24,  S25,  S26,  S27,  S28,  S29,  S30, \
     S31,  S32,  S33,  S34,  S35,  S36,  S37,  S38,  S39,  S40, \
     S41,  S42,  S43,  S44,  S45,  S46,  S47,  S48,  S49,  S50, \
     S51,  S52,  S53,  S54,  S55,  S56,  S57,  S58,  S59,  S60, \
     S61,  S62,  S63,  S64,  S65,  S66,  S67,  S68,  S69,  S70, \
     S71,  S72,  S73,  S74,  S75,  S76,  S77,  S78,  S79,  S80, \
     S81,  S82,  S83,  S84,  S85,  S86,  S87,  S88,  S89,  S90, \
     S91,  S92,  S93,  S94,  S95,  S96,  S97,  S98,  S99, S100, \
    S101, S102, S103, S104, S105, S106, S107, S108, S109, S110, \
    S111, S112, S113, S114, S115, S116, S117, S118, S119, S120, \
    S121, S122, S123, S124, S125, S126, S127, S128, S129, S130, \
    S131, S132, S133, S134, S135, S136, S137, S138, S139, S140, \
    S141, S142, S143, S144, S145, S146, S147, S148, S149, S150, \
    S151, S152, S153, S154, S155, S156, S157, S158, S159, S160, \
    S161, S162, S163, S164, S165, S166, S167, S168, S169, S170, \
    S171, S172, S173, S174, S175, S176, S177, S178, S179, S180, \
    S181, S182, S183, S184, S185, S186, S187, S188, S189, S190, \
    S191, S192, S193, S194, S195, S196, S197, S198, S199, S200, \
    S201, S202, S203, S204, S205, S206, S207, S208, S209, S210, \
    S211, S212, S213, S214, S215, S216, S217, S218, S219, S220, \
    S221, S222, S223, S224, S225, S226, S227, S228, S229, S230, \
    S231, S232, S233, S234, S235, S236, S237, S238, S239, S240, \
    S241, S242, S243, S244, S245, S246, S247, S248, S249, S250, \
    S251, S252, S253, S254, S255, S256, S257, S258, S259, S260, \
    S261, S262, S263, S264, S265, S266, S267, S268, S269, S270, \
    S271, S272, S273, S274, S275, S276, S277, S278, S279, S280, \
    S281, S282, S283, S284, S285, S286, S287, S288; \
  int numFullCycles = numBits / 288;


#define VERIFY_INPUT_VALUES(numBits) \
  if (numBits == 0) return 0; \
  if (numBits % 288 != 0) return -1;

#define INIT_REGISTERS \
  /* A */ \
  S80 = *key++; S79 = *key++; S78 = *key++; S77 = *key++; S76 = *key++; S75 = *key++; S74 = *key++; S73 = *key++; S72 = *key++; S71 = *key++; \
  S70 = *key++; S69 = *key++; S68 = *key++; S67 = *key++; S66 = *key++; S65 = *key++; S64 = *key++; S63 = *key++; S62 = *key++; S61 = *key++; \
  S60 = *key++; S59 = *key++; S58 = *key++; S57 = *key++; S56 = *key++; S55 = *key++; S54 = *key++; S53 = *key++; S52 = *key++; S51 = *key++; \
  S50 = *key++; S49 = *key++; S48 = *key++; S47 = *key++; S46 = *key++; S45 = *key++; S44 = *key++; S43 = *key++; S42 = *key++; S41 = *key++; \
  S40 = *key++; S39 = *key++; S38 = *key++; S37 = *key++; S36 = *key++; S35 = *key++; S34 = *key++; S33 = *key++; S32 = *key++; S31 = *key++; \
  S30 = *key++; S29 = *key++; S28 = *key++; S27 = *key++; S26 = *key++; S25 = *key++; S24 = *key++; S23 = *key++; S22 = *key++; S21 = *key++; \
  S20 = *key++; S19 = *key++; S18 = *key++; S17 = *key++; S16 = *key++; S15 = *key++; S14 = *key++; S13 = *key++; S12 = *key++; S11 = *key++; \
  S10 = *key++;  S9 = *key++;  S8 = *key++;  S7 = *key++;  S6 = *key++;  S5 = *key++;  S4 = *key++;  S3 = *key++;  S2 = *key++;  S1 = *key; \
  S81 = 0; S82 = 0; S83 = 0; S84 = 0; S85 = 0; S86 = 0; S87 = 0; S88 = 0; S89 = 0; S90 = 0; S91 = 0; S92 = 0; S93 = 0; \
  \
  /* B */ \
  S173 = *iv++; S172 = *iv++; S171 = *iv++; S170 = *iv++; S169 = *iv++; S168 = *iv++; S167 = *iv++; S166 = *iv++; S165 = *iv++; S164 = *iv++; \
  S163 = *iv++; S162 = *iv++; S161 = *iv++; S160 = *iv++; S159 = *iv++; S158 = *iv++; S157 = *iv++; S156 = *iv++; S155 = *iv++; S154 = *iv++; \
  S153 = *iv++; S152 = *iv++; S151 = *iv++; S150 = *iv++; S149 = *iv++; S148 = *iv++; S147 = *iv++; S146 = *iv++; S145 = *iv++; S144 = *iv++; \
  S143 = *iv++; S142 = *iv++; S141 = *iv++; S140 = *iv++; S139 = *iv++; S138 = *iv++; S137 = *iv++; S136 = *iv++; S135 = *iv++; S134 = *iv++; \
  S133 = *iv++; S132 = *iv++; S131 = *iv++; S130 = *iv++; S129 = *iv++; S128 = *iv++; S127 = *iv++; S126 = *iv++; S125 = *iv++; S124 = *iv++; \
  S123 = *iv++; S122 = *iv++; S121 = *iv++; S120 = *iv++; S119 = *iv++; S118 = *iv++; S117 = *iv++; S116 = *iv++; S115 = *iv++; S114 = *iv++; \
  S113 = *iv++; S112 = *iv++; S111 = *iv++; S110 = *iv++; S109 = *iv++; S108 = *iv++; S107 = *iv++; S106 = *iv++; S105 = *iv++; S104 = *iv++; \
  S103 = *iv++; S102 = *iv++; S101 = *iv++; S100 = *iv++;  S99 = *iv++;  S98 = *iv++;  S97 = *iv++;  S96 = *iv++;  S95 = *iv++;  S94 = *iv; \
  S174 = 0; S175 = 0; S176 = 0; S177 = 0; \
  \
  /* C */                                                                         S178 = 0; S179 = 0; \
  S180 = 0; S181 = 0; S182 = 0; S183 = 0; S184 = 0; S185 = 0; S186 = 0; S187 = 0; S188 = 0; S189 = 0; \
  S190 = 0; S191 = 0; S192 = 0; S193 = 0; S194 = 0; S195 = 0; S196 = 0; S197 = 0; S198 = 0; S199 = 0; \
  S200 = 0; S201 = 0; S202 = 0; S203 = 0; S204 = 0; S205 = 0; S206 = 0; S207 = 0; S208 = 0; S209 = 0; \
  S210 = 0; S211 = 0; S212 = 0; S213 = 0; S214 = 0; S215 = 0; S216 = 0; S217 = 0; S218 = 0; S219 = 0; \
  S220 = 0; S221 = 0; S222 = 0; S223 = 0; S224 = 0; S225 = 0; S226 = 0; S227 = 0; S228 = 0; S229 = 0; \
  S230 = 0; S231 = 0; S232 = 0; S233 = 0; S234 = 0; S235 = 0; S236 = 0; S237 = 0; S238 = 0; S239 = 0; \
  S240 = 0; S241 = 0; S242 = 0; S243 = 0; S244 = 0; S245 = 0; S246 = 0; S247 = 0; S248 = 0; S249 = 0; \
  S250 = 0; S251 = 0; S252 = 0; S253 = 0; S254 = 0; S255 = 0; S256 = 0; S257 = 0; S258 = 0; S259 = 0; \
  S260 = 0; S261 = 0; S262 = 0; S263 = 0; S264 = 0; S265 = 0; S266 = 0; S267 = 0; S268 = 0; S269 = 0; \
  S270 = 0; S271 = 0; S272 = 0; S273 = 0; S274 = 0; S275 = 0; S276 = 0; S277 = 0; S278 = 0; S279 = 0; \
  S280 = 0; S281 = 0; S282 = 0; S283 = 0; S284 = 0; S285 = 0; \
  S288 = S287 = S286 = 0xFFFFFFFFFFFFFFFF;


int triviumBitsliced(const UINT64 *key, const UINT64 *iv, UINT64 *out, unsigned int numOutputBits) {
  DECLARE_VARIABLES(numOutputBits)
  VERIFY_INPUT_VALUES(numOutputBits)
  INIT_REGISTERS
#define OUT(x)
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  while (numFullCycles-- > 0) {
#undef OUT
#define OUT(x) x
#define op =
    FULL_CYCLE
  }
  return 0;
}

int triviumBitsliced_withInitOutput(const UINT64 *key, const UINT64 *iv, UINT64 *out, unsigned int numOutputBits) {
  DECLARE_VARIABLES(numOutputBits)
  VERIFY_INPUT_VALUES(numOutputBits)
  INIT_REGISTERS
  while (numFullCycles-- > 0) {
    FULL_CYCLE
  }
  return 0;
}

int triviumBitsliced_xor(const UINT64 *key, const UINT64 *iv, UINT64 *out, unsigned int numOutputBits) {
  DECLARE_VARIABLES(numOutputBits)
  VERIFY_INPUT_VALUES(numOutputBits)
  INIT_REGISTERS
#undef OUT
#define OUT(x)
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  while (numFullCycles-- > 0) {
#undef OUT
#define OUT(x) x
#undef op
#define op ^=
    FULL_CYCLE
  }
  return 0;
}

int triviumBitsliced_xor_withInitOutput(const UINT64 *key, const UINT64 *iv, UINT64 *out, unsigned int numOutputBits) {
  DECLARE_VARIABLES(numOutputBits)
  VERIFY_INPUT_VALUES(numOutputBits)
  INIT_REGISTERS
  while (numFullCycles-- > 0) {
    FULL_CYCLE
  }
  return 0;
}

int triviumBitsliced_encrypt_xor(const UINT64 *key, const UINT64 *iv, const UINT64 *in, unsigned int numInputBits, UINT64 *out, unsigned int numOutputBits) {
  DECLARE_VARIABLES(numOutputBits)
  VERIFY_INPUT_VALUES(numOutputBits)
  if (numInputBits < numOutputBits) return -1; /* too few input bits */
  INIT_REGISTERS
#undef OUT
#define OUT(x)
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  while (numFullCycles-- > 0) {
#undef OUT
#define OUT(x) x
#undef op
#define op ^= *in++ ^
    FULL_CYCLE
  }
  return 0;
}

int triviumBitsliced_encrypt_xor_withInitOutput(const UINT64 *key, const UINT64 *iv, const UINT64 *in, unsigned int numInputBits, UINT64 *out, unsigned int numOutputBits) {
  DECLARE_VARIABLES(numOutputBits)
  VERIFY_INPUT_VALUES(numOutputBits)
  if (numInputBits + 1152 < numOutputBits) return -1; /* too few input bits */
  INIT_REGISTERS
#undef OUT
#define OUT(x) x
#undef op
#define op ^=
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  FULL_CYCLE
  numFullCycles -= 4;
  while (numFullCycles-- > 0) {
#undef op
#define op ^= *in++ ^
    FULL_CYCLE
  }
  return 0;
}




/******************************************************************************
 * Black box API
 ******************************************************************************/
int blackBoxTriviumEncryption(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes, int withInitRoundOutput) {
  if (withInitRoundOutput)
    return trivium_encrypt_xor_withInitOutput(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
  return trivium_encrypt_xor(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
}

int blackBoxTriviumEncryptionBitsliced(const UINT64 *key, const UINT64 *iv, const UINT64 *inBuf, unsigned int numInputBits, UINT64 *outBuf, unsigned int numOutputBits, int withInitRoundOutput) {
  if (withInitRoundOutput)
    return triviumBitsliced_encrypt_xor_withInitOutput(key, iv, inBuf, numInputBits, outBuf, numOutputBits);
  return triviumBitsliced_encrypt_xor(key, iv, inBuf, numInputBits, outBuf, numOutputBits);
}

/******************************************************************************
 * Basic cipher information
 ******************************************************************************/
void getBlackBoxTriviumInfo(int *keySizeInBytes, int *ivSizeInBytes, int *suppressedBytes, int *implicitBlockSizeInBytes) {
  if (keySizeInBytes) *keySizeInBytes = 10;
  if (ivSizeInBytes) *ivSizeInBytes = 10;
  if (suppressedBytes) *suppressedBytes = 144;
  if (implicitBlockSizeInBytes) *implicitBlockSizeInBytes = 8;
}

void getBlackBoxBitslicedTriviumInfo(int *keySizeInBits, int *ivSizeInBits, int *suppressedBits, int *implicitBlockSizeInBits) {
  if (keySizeInBits) *keySizeInBits = 80;
  if (ivSizeInBits) *ivSizeInBits = 80;
  if (suppressedBits) *suppressedBits = 288 * 4;
  if (implicitBlockSizeInBits) *implicitBlockSizeInBits = 288;
}
