/*
 * Adapted HC-256 reference code.
 *
 * Copyright (c) Paul Stankovski
 * Free for all non-commercial use unless this directive conflicts with
 * other applicable copyright statement(s), patent holders, laws or such.
 */
#include "black_box_hc256.h"
#include "tmalloc.h"

#define BLOCK_LEN 1
#define NUM_INIT_ROUNDS 256
#define NUM_HIDDEN_BYTES_PER_INIT_ROUND (16 * sizeof(UINT32))
#define NUM_HIDDEN_BYTES (NUM_INIT_ROUNDS * NUM_HIDDEN_BYTES_PER_INIT_ROUND)

typedef struct {
  UINT32 T[2048];       /* P[i] = T[i]; Q[i] = T[1024+i];*/
  UINT32 X[16];
  UINT32 Y[16];
  UINT32 counter2048;   /*counter2048 = i mod 2048 at the i-th step */
  UINT32 key[8];
  UINT32 iv[8];
  UINT32 keysize;       /* key size in bits */
  UINT32 ivsize;        /* iv size in bits*/
} HC256_info;

#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))

#define UINT32TO32_LITTLE(x) (x)

/* =====================================================================

 *     The following defines the keystream generation function
 *======================================================================*/

/*h1 function*/
#define HC256_h1(ctx, x, y) {         \
     UINT32 B0,B1,B2,B3;           \
     UINT32 t0,t1,t2;              \
     B0 = (BYTE) (x);             \
     t0 = (ctx->T[1024 + B0]);  \
     B1 = (BYTE) ((x) >> 8);      \
     t1 = t0+(ctx->T[1024 + 256 + B1]); \
     B2 = (BYTE) ((x) >> 16);     \
     t2 = t1+(ctx->T[1024 + 512 + B2]); \
     B3 = (BYTE)((x) >> 24);      \
     (y) =  t2 + ((ctx)->T[1024 + 768 + B3]); \
}

/*h2 function*/
#define HC256_h2(ctx, x, y) {      \
     UINT32 B0,B1,B2,B3;        \
     UINT32 t0,t1,t2;           \
     B0 = (BYTE) (x);          \
     t0 = (ctx->T[B0]);      \
     B1 = (BYTE) ((x) >> 8);   \
     t1 = t0 + (ctx->T[256 + B1]); \
     B2 = (BYTE) ((x) >> 16);  \
     t2 = t1 + (ctx->T[512 + B2]); \
     B3 = (BYTE)((x) >> 24);   \
     (y) =  t2 + ((ctx)->T[768 + B3]); \
}


/*one step of HC-256, update P and generate 32 bits keystream*/
#define HC256_step_P(ctx,u,v,a,b,c,d,m){    \
     UINT32 tem0,tem1,tem2,tem3;    \
     tem0 = ROTR32((ctx->T[(v)]),23);           \
     tem1 = ROTR32((ctx->X[(c)]),10);           \
     tem2 = ((ctx->T[(v)]) ^ (ctx->X[(c)])) & 0x3ff;      \
     (ctx->T[(u)]) += (ctx->X[(b)])+(tem0^tem1)+(ctx->T[1024+tem2]);  \
     (ctx->X[(a)]) = (ctx->T[(u)]);                       \
     HC256_h1((ctx),(ctx->X[(d)]),tem3);                \
     (m) = tem3 ^ (ctx->T[(u)]) ;              \
}


/*one step of HC-256, update Q and generate 32 bits keystream*/
#define HC256_step_Q(ctx,u,v,a,b,c,d,m){    \
     UINT32 tem0,tem1,tem2,tem3;         \
     tem0 = ROTR32((ctx->T[(v)]),23);           \
     tem1 = ROTR32((ctx->Y[(c)]),10);           \
     tem2 = ((ctx->T[(v)]) ^ (ctx->Y[(c)])) & 0x3ff;      \
     (ctx->T[(u)]) += (ctx->Y[(b)])+(tem0^tem1)+(ctx->T[tem2]);  \
     (ctx->Y[(a)]) = (ctx->T[(u)]);                       \
     HC256_h2((ctx),(ctx->Y[(d)]),tem3);                    \
     (m) = tem3 ^ (ctx->T[(u)]) ;              \
}

/*16 steps of HC-256, generate 512 bits keystream*/
void HC256_generate_keystream(HC256_info* ctx, UINT32* keystream)
{
   UINT32 cc,dd;
   cc = ctx->counter2048 & 0x3ff;
   dd = (cc+16)&0x3ff;

   if (ctx->counter2048 < 1024)
   {
      ctx->counter2048 = (ctx->counter2048 + 16) & 0x7ff;
      HC256_step_P(ctx, cc+0, cc+1, 0, 6, 13,4, keystream[0]);
      HC256_step_P(ctx, cc+1, cc+2, 1, 7, 14,5, keystream[1]);
      HC256_step_P(ctx, cc+2, cc+3, 2, 8, 15,6, keystream[2]);
      HC256_step_P(ctx, cc+3, cc+4, 3, 9, 0, 7, keystream[3]);
      HC256_step_P(ctx, cc+4, cc+5, 4, 10,1, 8, keystream[4]);
      HC256_step_P(ctx, cc+5, cc+6, 5, 11,2, 9, keystream[5]);
      HC256_step_P(ctx, cc+6, cc+7, 6, 12,3, 10,keystream[6]);
      HC256_step_P(ctx, cc+7, cc+8, 7, 13,4, 11,keystream[7]);
      HC256_step_P(ctx, cc+8, cc+9, 8, 14,5, 12,keystream[8]);
      HC256_step_P(ctx, cc+9, cc+10,9, 15,6, 13,keystream[9]);
      HC256_step_P(ctx, cc+10,cc+11,10,0, 7, 14,keystream[10]);
      HC256_step_P(ctx, cc+11,cc+12,11,1, 8, 15,keystream[11]);
      HC256_step_P(ctx, cc+12,cc+13,12,2, 9, 0, keystream[12]);
      HC256_step_P(ctx, cc+13,cc+14,13,3, 10,1, keystream[13]);
      HC256_step_P(ctx, cc+14,cc+15,14,4, 11,2, keystream[14]);
      HC256_step_P(ctx, cc+15,dd+0, 15,5, 12,3, keystream[15]);
   }
   else
   {
      ctx->counter2048 = (ctx->counter2048 + 16) & 0x7ff;
      HC256_step_Q(ctx, 1024+cc+0, 1024+cc+1, 0, 6, 13,4, keystream[0]);
      HC256_step_Q(ctx, 1024+cc+1, 1024+cc+2, 1, 7, 14,5, keystream[1]);
      HC256_step_Q(ctx, 1024+cc+2, 1024+cc+3, 2, 8, 15,6, keystream[2]);
      HC256_step_Q(ctx, 1024+cc+3, 1024+cc+4, 3, 9, 0, 7, keystream[3]);
      HC256_step_Q(ctx, 1024+cc+4, 1024+cc+5, 4, 10,1, 8, keystream[4]);
      HC256_step_Q(ctx, 1024+cc+5, 1024+cc+6, 5, 11,2, 9, keystream[5]);
      HC256_step_Q(ctx, 1024+cc+6, 1024+cc+7, 6, 12,3, 10,keystream[6]);
      HC256_step_Q(ctx, 1024+cc+7, 1024+cc+8, 7, 13,4, 11,keystream[7]);
      HC256_step_Q(ctx, 1024+cc+8, 1024+cc+9, 8, 14,5, 12,keystream[8]);
      HC256_step_Q(ctx, 1024+cc+9, 1024+cc+10,9, 15,6, 13,keystream[9]);
      HC256_step_Q(ctx, 1024+cc+10,1024+cc+11,10,0, 7, 14,keystream[10]);
      HC256_step_Q(ctx, 1024+cc+11,1024+cc+12,11,1, 8, 15,keystream[11]);
      HC256_step_Q(ctx, 1024+cc+12,1024+cc+13,12,2, 9, 0, keystream[12]);
      HC256_step_Q(ctx, 1024+cc+13,1024+cc+14,13,3, 10,1, keystream[13]);
      HC256_step_Q(ctx, 1024+cc+14,1024+cc+15,14,4, 11,2, keystream[14]);
      HC256_step_Q(ctx, 1024+cc+15,1024+dd+0, 15,5, 12,3, keystream[15]);
   }
}


/*======================================================*/
/*   The following defines the initialization functions */
/*======================================================*/

#define HC256_f1(x)  (ROTR32((x),7) ^ ROTR32((x),18) ^ ((x) >> 3))
#define HC256_f2(x)  (ROTR32((x),17) ^ ROTR32((x),19) ^ ((x) >> 10))

/*update table P*/
#define HC256_update_P(ctx,u,v,a,b,c){      \
     UINT32 tem0,tem1,tem2;       \
     tem0 = ROTR32((ctx->T[(v)]),23);             \
     tem1 = ROTR32((ctx->X[(c)]),10);             \
     tem2 = ((ctx->T[(v)]) ^ (ctx->X[(c)])) & 0x3ff;      \
     (ctx->T[(u)]) += (ctx->X[(b)])+(tem0^tem1)+(ctx->T[1024+tem2]);  \
     (ctx->X[(a)]) = (ctx->T[(u)]);                       \
}

/*update table Q*/
#define HC256_update_Q(ctx,u,v,a,b,c){      \
     UINT32 tem0,tem1,tem2;      \
     tem0 = ROTR32((ctx->T[(v)]),23);             \
     tem1 = ROTR32((ctx->Y[(c)]),10);             \
     tem2 = ((ctx->T[(v)]) ^ (ctx->Y[(c)])) & 0x3ff;      \
     (ctx->T[(u)]) += (ctx->Y[(b)])+(tem0^tem1)+(ctx->T[tem2]);  \
     (ctx->Y[(a)]) = (ctx->T[(u)]);                       \
}

/*16 steps of HC-256, without generating keystream*/
void HC256_setup_update(HC256_info* ctx)  /*each time 16 steps*/
{
   UINT32 cc,dd;
   cc = ctx->counter2048 & 0x3ff;
   dd = (cc+16)&0x3ff;

   if (ctx->counter2048 < 1024)
   {
      ctx->counter2048 = (ctx->counter2048 + 16) & 0x7ff;
      HC256_update_P(ctx, cc+0, cc+1, 0, 6, 13);
      HC256_update_P(ctx, cc+1, cc+2, 1, 7, 14);
      HC256_update_P(ctx, cc+2, cc+3, 2, 8, 15);
      HC256_update_P(ctx, cc+3, cc+4, 3, 9, 0);
      HC256_update_P(ctx, cc+4, cc+5, 4, 10,1);
      HC256_update_P(ctx, cc+5, cc+6, 5, 11,2);
      HC256_update_P(ctx, cc+6, cc+7, 6, 12,3);
      HC256_update_P(ctx, cc+7, cc+8, 7, 13,4);
      HC256_update_P(ctx, cc+8, cc+9, 8, 14,5);
      HC256_update_P(ctx, cc+9, cc+10,9, 15,6);
      HC256_update_P(ctx, cc+10,cc+11,10,0, 7);
      HC256_update_P(ctx, cc+11,cc+12,11,1, 8);
      HC256_update_P(ctx, cc+12,cc+13,12,2, 9);
      HC256_update_P(ctx, cc+13,cc+14,13,3, 10);
      HC256_update_P(ctx, cc+14,cc+15,14,4, 11);
      HC256_update_P(ctx, cc+15,dd+0, 15,5, 12);
   }
   else
   {
      ctx->counter2048 = (ctx->counter2048 + 16) & 0x7ff;
      HC256_update_Q(ctx, 1024+cc+0, 1024+cc+1, 0, 6, 13);
      HC256_update_Q(ctx, 1024+cc+1, 1024+cc+2, 1, 7, 14);
      HC256_update_Q(ctx, 1024+cc+2, 1024+cc+3, 2, 8, 15);
      HC256_update_Q(ctx, 1024+cc+3, 1024+cc+4, 3, 9, 0);
      HC256_update_Q(ctx, 1024+cc+4, 1024+cc+5, 4, 10,1);
      HC256_update_Q(ctx, 1024+cc+5, 1024+cc+6, 5, 11,2);
      HC256_update_Q(ctx, 1024+cc+6, 1024+cc+7, 6, 12,3);
      HC256_update_Q(ctx, 1024+cc+7, 1024+cc+8, 7, 13,4);
      HC256_update_Q(ctx, 1024+cc+8, 1024+cc+9, 8, 14,5);
      HC256_update_Q(ctx, 1024+cc+9, 1024+cc+10,9, 15,6);
      HC256_update_Q(ctx, 1024+cc+10,1024+cc+11,10,0, 7);
      HC256_update_Q(ctx, 1024+cc+11,1024+cc+12,11,1, 8);
      HC256_update_Q(ctx, 1024+cc+12,1024+cc+13,12,2, 9);
      HC256_update_Q(ctx, 1024+cc+13,1024+cc+14,13,3, 10);
      HC256_update_Q(ctx, 1024+cc+14,1024+cc+15,14,4, 11);
      HC256_update_Q(ctx, 1024+cc+15,1024+dd+0, 15,5, 12);
   }
}

/*update table P*/
#define HC256_update_P_withInitOutput(ctx,u,v,a,b,c,d,m){      \
     UINT32 tem0,tem1,tem2,tem3;       \
     tem0 = ROTR32((ctx->T[(v)]),23);             \
     tem1 = ROTR32((ctx->X[(c)]),10);             \
     tem2 = ((ctx->T[(v)]) ^ (ctx->X[(c)])) & 0x3ff;      \
     (ctx->T[(u)]) += (ctx->X[(b)])+(tem0^tem1)+(ctx->T[1024+tem2]);  \
     (ctx->X[(a)]) = (ctx->T[(u)]);                       \
     HC256_h1((ctx),(ctx->X[(d)]),tem3);                \
     (m) = tem3 ^ (ctx->T[(u)]) ;              \
}

/*update table Q*/
#define HC256_update_Q_withInitOutput(ctx,u,v,a,b,c,d,m){      \
     UINT32 tem0,tem1,tem2,tem3;      \
     tem0 = ROTR32((ctx->T[(v)]),23);             \
     tem1 = ROTR32((ctx->Y[(c)]),10);             \
     tem2 = ((ctx->T[(v)]) ^ (ctx->Y[(c)])) & 0x3ff;      \
     (ctx->T[(u)]) += (ctx->Y[(b)])+(tem0^tem1)+(ctx->T[tem2]);  \
     (ctx->Y[(a)]) = (ctx->T[(u)]);                       \
     HC256_h2((ctx),(ctx->Y[(d)]),tem3);                    \
     (m) = tem3 ^ (ctx->T[(u)]) ;              \
}

/*16 steps of HC-256, without generating keystream*/
void HC256_setup_update_withInitOutput(HC256_info* ctx, BYTE *out)  /*each time 16 steps*/
{
   UINT32 cc,dd, *out32 = (UINT32*)out;
   cc = ctx->counter2048 & 0x3ff;
   dd = (cc+16)&0x3ff;

   if (ctx->counter2048 < 1024)
   {
      ctx->counter2048 = (ctx->counter2048 + 16) & 0x7ff;
      HC256_update_P_withInitOutput(ctx, cc+0, cc+1, 0, 6, 13, 4, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+1, cc+2, 1, 7, 14, 5, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+2, cc+3, 2, 8, 15, 6, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+3, cc+4, 3, 9, 0, 7, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+4, cc+5, 4, 10,1, 8, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+5, cc+6, 5, 11,2, 9, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+6, cc+7, 6, 12,3, 10, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+7, cc+8, 7, 13,4, 11, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+8, cc+9, 8, 14,5, 12, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+9, cc+10,9, 15,6, 13, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+10,cc+11,10,0, 7, 14, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+11,cc+12,11,1, 8, 15, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+12,cc+13,12,2, 9, 0, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+13,cc+14,13,3, 10, 1, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+14,cc+15,14,4, 11, 2, *out32++);
      HC256_update_P_withInitOutput(ctx, cc+15,dd+0, 15,5, 12, 3, *out32);
   }
   else
   {
      ctx->counter2048 = (ctx->counter2048 + 16) & 0x7ff;
      HC256_update_Q_withInitOutput(ctx, 1024+cc+0, 1024+cc+1, 0, 6, 13, 4, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+1, 1024+cc+2, 1, 7, 14, 5, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+2, 1024+cc+3, 2, 8, 15, 6, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+3, 1024+cc+4, 3, 9, 0, 7, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+4, 1024+cc+5, 4, 10,1, 8, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+5, 1024+cc+6, 5, 11,2, 9, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+6, 1024+cc+7, 6, 12,3, 10, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+7, 1024+cc+8, 7, 13,4, 11, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+8, 1024+cc+9, 8, 14,5, 12, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+9, 1024+cc+10,9, 15,6, 13, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+10,1024+cc+11,10,0, 7, 14, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+11,1024+cc+12,11,1, 8, 15, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+12,1024+cc+13,12,2, 9, 0, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+13,1024+cc+14,13,3, 10, 1, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+14,1024+cc+15,14,4, 11, 2, *out32++);
      HC256_update_Q_withInitOutput(ctx, 1024+cc+15,1024+dd+0, 15,5, 12, 3, *out32);
   }
}

/* for the 256-bit key:  key[0]...key[32]
*  key[0] is the least significant byte of ctx->key[0] (K_0);
*  key[3] is the most significant byte of ctx->key[0]  (K_0);
*  ...
*  key[28] is the least significant byte of ctx->key[8] (K_8)
*  key[31] is the most significant byte of ctx->key[8]  (K_8)
*
*  for the 256-bit iv:  iv[0]...iv[32]
*  iv[0] is the least significant byte of ctx->iv[0] (IV_0);
*  iv[3] is the most significant byte of ctx->iv[0]  (IV_0);
*  ...
*  iv[28] is the least significant byte of ctx->iv[8] (IV_8)
*  iv[31] is the most significant byte of ctx->iv[8]  (IV_8)
*/

void HC256_keysetup(
  HC256_info* ctx,
  const BYTE* key,
  UINT32 keysize,                /* Key size in bits (128+128*i) */
  UINT32 ivsize)                 /* IV size in bits  (128+128*i)*/
{
  UINT32 i;

  ctx->keysize = keysize;
  ctx->ivsize = ivsize;

  /* Key size in bits (128+128*i) */
  for (i = 0; i < (keysize >> 5); i++) ctx->key[i] = UINT32TO32_LITTLE (((UINT32*)key)[i]);

  for ( ; i < 8 ; i++) ctx->key[i] = ctx->key[i-4];

} /* initialize the key, save the iv size*/


void HC256_ivsetup(HC256_info* ctx, const BYTE* iv)
{
    UINT32 i;

    /* initialize the iv */
    /* IV size in bits  (128+128*i)*/

	for (i = 0; i < (ctx->ivsize >> 5); i++)  ctx->iv[i] = UINT32TO32_LITTLE(((UINT32*)iv)[i]);

    for (; i < 8; i++) ctx->iv[i] = ctx->iv[i-4];

    /* expand the key and IV into the table T */
    /* (expand the key and IV into the table P and Q) */

	for (i = 0; i < 8;  i++)   ctx->T[i] = ctx->key[i];
	for (i = 8; i < 16; i++)   ctx->T[i] = ctx->iv[i-8];

    for (i = 16; i < 528; i++)
		ctx->T[i] = HC256_f2(ctx->T[i-2]) + ctx->T[i-7] + HC256_f1(ctx->T[i-15]) + ctx->T[i-16]+i;

	for (i = 0; i < 16;  i++)  ctx->T[i] = ctx->T[512+i];

	for (i = 16; i < 2048; i++)
		ctx->T[i] = HC256_f2(ctx->T[i-2]) + ctx->T[i-7] + HC256_f1(ctx->T[i-15]) + ctx->T[i-16]+512+i;

    /* initialize counter2048, X and Y */
	ctx->counter2048 = 0;
	for (i = 0; i < 16; i++) ctx->X[i] = ctx->T[1008+i];
    for (i = 0; i < 16; i++) ctx->Y[i] = ctx->T[1024+1008+i];

    /* run the cipher 4096 steps before generating the output */
	for (i = 0; i < 256; i++)  HC256_setup_update(ctx);
}

void HC256_ivsetup_withInitOutput(HC256_info* ctx, const BYTE* iv, BYTE *out)
{
    UINT32 i;

    /* initialize the iv */
    /* IV size in bits  (128+128*i)*/

	for (i = 0; i < (ctx->ivsize >> 5); i++)  ctx->iv[i] = UINT32TO32_LITTLE(((UINT32*)iv)[i]);

    for (; i < 8; i++) ctx->iv[i] = ctx->iv[i-4];

    /* expand the key and IV into the table T */
    /* (expand the key and IV into the table P and Q) */

	for (i = 0; i < 8;  i++)   ctx->T[i] = ctx->key[i];
	for (i = 8; i < 16; i++)   ctx->T[i] = ctx->iv[i-8];

    for (i = 16; i < 528; i++)
		ctx->T[i] = HC256_f2(ctx->T[i-2]) + ctx->T[i-7] + HC256_f1(ctx->T[i-15]) + ctx->T[i-16]+i;

	for (i = 0; i < 16;  i++)  ctx->T[i] = ctx->T[512+i];

	for (i = 16; i < 2048; i++)
		ctx->T[i] = HC256_f2(ctx->T[i-2]) + ctx->T[i-7] + HC256_f1(ctx->T[i-15]) + ctx->T[i-16]+512+i;

    /* initialize counter2048, X and Y */
	ctx->counter2048 = 0;
	for (i = 0; i < 16; i++) ctx->X[i] = ctx->T[1008+i];
    for (i = 0; i < 16; i++) ctx->Y[i] = ctx->T[1024+1008+i];

  /* run the cipher 4096 steps before generating the output */
	for (i = 0; i < NUM_INIT_ROUNDS; i++)  HC256_setup_update_withInitOutput(ctx, out + i * NUM_HIDDEN_BYTES_PER_INIT_ROUND);
}

/*========================================================
 *  The following defines the encryption of data stream
 *  (same as the benchamrk implementation of SNOW-2.0)
 *========================================================
 */

void HC256_process_bytes(
  HC256_info* ctx,
  const BYTE* input,
  BYTE* output,
  UINT32 msglen)                /* Message length in bytes. */
{
  UINT32 keystream[16];

  for ( ; msglen >= 64; msglen -= 64, input += 64, output += 64) {

      HC256_generate_keystream(ctx, keystream);
      tmemcpy(output, (BYTE*)input, 64);
      tmemxor(output, keystream, 64);
#if 0
      ((UINT32*)output)[0]  = ((UINT32*)input)[0]  ^ UINT32TO32_LITTLE(keystream[0]);
      ((UINT32*)output)[1]  = ((UINT32*)input)[1]  ^ UINT32TO32_LITTLE(keystream[1]);
      ((UINT32*)output)[2]  = ((UINT32*)input)[2]  ^ UINT32TO32_LITTLE(keystream[2]);
      ((UINT32*)output)[3]  = ((UINT32*)input)[3]  ^ UINT32TO32_LITTLE(keystream[3]);
      ((UINT32*)output)[4]  = ((UINT32*)input)[4]  ^ UINT32TO32_LITTLE(keystream[4]);
      ((UINT32*)output)[5]  = ((UINT32*)input)[5]  ^ UINT32TO32_LITTLE(keystream[5]);
      ((UINT32*)output)[6]  = ((UINT32*)input)[6]  ^ UINT32TO32_LITTLE(keystream[6]);
      ((UINT32*)output)[7]  = ((UINT32*)input)[7]  ^ UINT32TO32_LITTLE(keystream[7]);
      ((UINT32*)output)[8]  = ((UINT32*)input)[8]  ^ UINT32TO32_LITTLE(keystream[8]);
      ((UINT32*)output)[9]  = ((UINT32*)input)[9]  ^ UINT32TO32_LITTLE(keystream[9]);
      ((UINT32*)output)[10] = ((UINT32*)input)[10] ^ UINT32TO32_LITTLE(keystream[10]);
      ((UINT32*)output)[11] = ((UINT32*)input)[11] ^ UINT32TO32_LITTLE(keystream[11]);
      ((UINT32*)output)[12] = ((UINT32*)input)[12] ^ UINT32TO32_LITTLE(keystream[12]);
      ((UINT32*)output)[13] = ((UINT32*)input)[13] ^ UINT32TO32_LITTLE(keystream[13]);
      ((UINT32*)output)[14] = ((UINT32*)input)[14] ^ UINT32TO32_LITTLE(keystream[14]);
      ((UINT32*)output)[15] = ((UINT32*)input)[15] ^ UINT32TO32_LITTLE(keystream[15]);
#endif
    }



  if (msglen > 0)
  {
      HC256_generate_keystream(ctx, keystream);
      tmemcpy(output, (BYTE*)input, msglen);
      tmemxor(output, keystream, msglen);
#if 0
      for (i = 0; i < msglen; i ++)
	      output[i] = input[i] ^ ((BYTE*)keystream)[i];
#endif
  }
}



/******************************************************************************
 * Black box variants
 ******************************************************************************/
int HC256_xor(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  HC256_info B;

  if (numOutputBytes == 0) return 0;
  if (numInputBytes < numOutputBytes) return -1;

  HC256_keysetup(&B, key, 256, 256);
  HC256_ivsetup(&B, iv);
  HC256_process_bytes(&B, inBuf, outBuf, numOutputBytes);
  return 0;
}

int HC256_xor_withInitOutput(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  HC256_info B;
  BYTE initBytes[NUM_HIDDEN_BYTES];

  if (numOutputBytes == 0) return 0;
  if (numOutputBytes < NUM_HIDDEN_BYTES) return -1;
  if (NUM_HIDDEN_BYTES + numInputBytes < numOutputBytes) return -1;

  HC256_keysetup(&B, key, 256, 256);
  tmemset(initBytes, 0, NUM_HIDDEN_BYTES);
  HC256_ivsetup_withInitOutput(&B, iv, initBytes);
  tmemxor(outBuf, initBytes, numOutputBytes < NUM_HIDDEN_BYTES ? numOutputBytes : NUM_HIDDEN_BYTES);
  numOutputBytes -= NUM_HIDDEN_BYTES;
  if (numOutputBytes == 0) return 0;
  HC256_process_bytes(&B, inBuf, outBuf + NUM_HIDDEN_BYTES, numOutputBytes);
  return 0;
}

/******************************************************************************
 * Black box API
 ******************************************************************************/
int blackBoxHC256Encryption(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes, int withInitRoundOutput) {
  if (withInitRoundOutput)
      return HC256_xor_withInitOutput(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
  return HC256_xor(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
}

/******************************************************************************
 * Basic cipher information
 ******************************************************************************/
void getBlackBoxHC256Info(int *keySizeInBytes, int *ivSizeInBytes, int *suppressedBytes, int *implicitBlockSizeInBytes) {
  if (keySizeInBytes) *keySizeInBytes = 32;
  if (ivSizeInBytes) *ivSizeInBytes = 32;
  if (suppressedBytes) *suppressedBytes = NUM_HIDDEN_BYTES;
  if (implicitBlockSizeInBytes) *implicitBlockSizeInBytes = BLOCK_LEN;
}

