/*
 * Copyright (c) Paul Stankovski
 * Free for all non-commercial use unless this directive conflicts with
 * other applicable copyright statement(s), patent holders, laws or such.
 */
#include "black_box_aes.h"
#include <string.h>
#include <malloc.h>

#define BLOCK_SIZE 16

#define Nb 4
#define MAX_Nr 14
#define SIZE_OF_STATE (4*Nb)
#define SIZE_OF_EXPANDED_KEY (4*Nb*(MAX_Nr+1))

#define ROTL(x,n) (((x)<<(n))|((x)>>(32-(n))))

#ifdef ENDIANNESS_UNDETERMINED
#undef ENDIANNESS_UNDETERMINED
#endif
#ifdef BIG_ENDIAN
#undef BIG_ENDIAN
#endif
#ifndef LITTLE_ENDIAN
#define LITTLE_ENDIAN
#endif

typedef union {
  UINT32 _32[Nb]; /* State of AES */
  BYTE _8_2dim[4][Nb];
  BYTE _8_1dim[SIZE_OF_STATE];
} StateType;

typedef union {
  UINT32 _32[Nb*(MAX_Nr+1)];
  BYTE _8[SIZE_OF_EXPANDED_KEY];
} ExpandedKeyType;

typedef enum {
  kEncrypt, /* Encryption */
  kDecrypt  /* Decryption */
} CryptionMode;

typedef enum {
  kECB, /* Electronic-codebook */
  kCBC, /* Cipher-block chaining */
  kCFB, /* Cipher feedback */
  kOFB, /* Output feedback */
  kCTR  /* Counter mode */
} BlockMode;

#define MAX_BLOCK_LENGTH 32
typedef struct {
  CryptionMode cm; /* kEncrypt or kDecrypt */ \
  BlockMode bm; /* kECB (Electronic Code Book) or kCBC (Cipher Block Chaining) */ \
  BYTE *key; /* Cryption key */ \
  UINT32 keyLengthInBits; /* Length of cryption key in bits */ \
  BYTE *IV; /* Initialization vector must be provided if block mode kCBC is used (set to NULL when using kECB) */ \
  UINT32 IVLengthInBytes; /* IV length in bytes (set to 0 when using block mode kECB) (IV length must be same as the block length of the cipher) */ \
  BYTE *output; /* Output buffer (this is where the cryption result, the cipher text, is placed) */ \
  UINT32 outputBufferLengthInBytes; /* Length of output buffer (in bytes) */ \
  UINT32 outputLengthInBytes;
  BYTE Status;
  UINT32 numBytesInInputBlock;
  BYTE InputBlock[MAX_BLOCK_LENGTH];
  BYTE XorBlock[MAX_BLOCK_LENGTH];
  StateType state;
  ExpandedKeyType w;
} AES_info;

static const UINT32 Rcon[30] = {
0x00,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,
0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5
};

static const BYTE LogTable[256] = { /* Values taken from 'The Design of Rijndael' */
  0,  0, 25,  1, 50,  2, 26,198, 75,199, 27,104, 51,238,223,  3,
100,  4,224, 14, 52,141,129,239, 76,113,  8,200,248,105, 28,193,
125,194, 29,181,249,185, 39,106, 77,228,166,114,154,201,  9,120,
101, 47,138,  5, 33, 15,225, 36, 18,240,130, 69, 53,147,218,142,
150,143,219,189, 54,208,206,148, 19, 92,210,241, 64, 70,131, 56,
102,221,253, 48,191,  6,139, 98,179, 37,226,152, 34,136,145, 16,
126,110, 72,195,163,182, 30, 66, 58,107, 40, 84,250,133, 61,186,
 43,121, 10, 21,155,159, 94,202, 78,212,172,229,243,115,167, 87,
175, 88,168, 80,244,234,214,116, 79,174,233,213,231,230,173,232,
 44,215,117,122,235, 22, 11,245, 89,203, 95,176,156,169, 81,160,
127, 12,246,111, 23,196, 73,236,216, 67, 31, 45,164,118,123,183,
204,187, 62, 90,251, 96,177,134, 59, 82,161,108,170, 85, 41,157,
151,178,135,144, 97,190,220,252,188,149,207,205, 55, 63, 91,209,
 83, 57,132, 60, 65,162,109, 71, 20, 42,158, 93, 86,242,211,171,
 68, 17,146,217, 35, 32, 46,137,180,124,184, 38,119,153,227,165,
103, 74,237,222,197, 49,254, 24, 13, 99,140,128,192,247,112,  7
};

static const BYTE ALogTable[256] = { /* Values taken from 'The Design of Rijndael' */
  1,  3,  5, 15, 17, 51, 85,255, 26, 46,114,150,161,248, 19, 53,
 95,225, 56, 72,216,115,149,164,247,  2,  6, 10, 30, 34,102,170,
229, 52, 92,228, 55, 89,235, 38,106,190,217,112,144,171,230, 49,
 83,245,  4, 12, 20, 60, 68,204, 79,209,104,184,211,110,178,205,
 76,212,103,169,224, 59, 77,215, 98,166,241,  8, 24, 40,120,136,
131,158,185,208,107,189,220,127,129,152,179,206, 73,219,118,154,
181,196, 87,249, 16, 48, 80,240, 11, 29, 39,105,187,214, 97,163,
254, 25, 43,125,135,146,173,236, 47,113,147,174,233, 32, 96,160,
251, 22, 58, 78,210,109,183,194, 93,231, 50, 86,250, 21, 63, 65,
195, 94,226, 61, 71,201, 64,192, 91,237, 44,116,156,191,218,117,
159,186,213,100,172,239, 42,126,130,157,188,223,122,142,137,128,
155,182,193, 88,232, 35,101,175,234, 37,111,177,200, 67,197, 84,
252, 31, 33, 99,165,244,  7,  9, 27, 45,119,153,176,203, 70,202,
 69,207, 74,222,121,139,134,145,168,227, 62, 66,198, 81,243, 14,
 18, 54, 90,238, 41,123,141,140,143,138,133,148,167,242, 13, 23,
 57, 75,221,124,132,151,162,253, 28, 36,108,180,199, 82,246,  1
};

static const BYTE SBox[256] = {
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
};

static const BYTE InvSBox[256] = {
0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
};

#define RET_(H,err) \
  H->Status = err; \
  return;

#define RET(H,err) { RET_(H,err) }

#define CHECK_AES_PARAMS(H,cm,bm) \
  { \
    if (!(H->output)) /* No provided output buffer */ \
      RET(H,-1) /* Invalid output buffer */ \
    if ((cm != kEncrypt) && (cm != kDecrypt)) \
      RET(H,-1) /* invalid cryption mode */ \
    if ((bm != kECB) && (bm != kCBC)) \
      RET(H,-1) /* invalid block mode */ \
    if (!(H->key)) \
      RET(H,-1) /* invalid key buffer */ \
    if ((H->keyLengthInBits != 128) && \
        (H->keyLengthInBits != 192) && \
        (H->keyLengthInBits != 256)) \
      RET(H,-1) /* Invalid key length */ \
    if ((bm == kCBC) && !(H->IV)) \
      RET(H,-1) /* invalid IV buffer */ \
    if ((bm == kCBC) && (H->IV) && (H->IVLengthInBytes != 16)) \
      RET(H,-1) /* invalid IV length */ \
  }

#define SET_NkNr(H) \
    Nk = (UINT32)((H->keyLengthInBits)>>5); \
    Nr = (UINT32)(Nk + 6);

#define ROT_WORD(word) ROTL(word,24)

#ifdef ENDIANNESS_UNDETERMINED
  #define SUB_WORD(newword,word) /* S-box all bytes in a word */ \
  { \
    newword = ((LITTLE_ENDIAN_PLATFORM)? \
                 (( SBox[ ((BYTE*)(&word))[3] ] << 24 ) | \
                  ( SBox[ ((BYTE*)(&word))[2] ] << 16 ) | \
                  ( SBox[ ((BYTE*)(&word))[1] ] << 8  ) | \
                  ( SBox[ ((BYTE*)(&word))[0] ]       ) ): \
                 (( SBox[ ((BYTE*)(&word))[0] ] << 24 ) | \
                  ( SBox[ ((BYTE*)(&word))[1] ] << 16 ) | \
                  ( SBox[ ((BYTE*)(&word))[2] ] << 8  ) | \
                  ( SBox[ ((BYTE*)(&word))[3] ]       ))); \
  }
#elif defined(LITTLE_ENDIAN)
  #define SUB_WORD(word) /* S-box all bytes in a word */ \
  (( SBox[ ((BYTE*)(&word))[3] ] << 24 ) | \
   ( SBox[ ((BYTE*)(&word))[2] ] << 16 ) | \
   ( SBox[ ((BYTE*)(&word))[1] ] << 8  ) | \
   ( SBox[ ((BYTE*)(&word))[0] ]       ) )
#else /* defined(BIG_ENDIAN) */
  #define SUB_WORD(word) /* S-box all bytes in a word */ \
  (( SBox[ ((BYTE*)(&word))[0] ] << 24 ) | \
   ( SBox[ ((BYTE*)(&word))[1] ] << 16 ) | \
   ( SBox[ ((BYTE*)(&word))[2] ] << 8  ) | \
   ( SBox[ ((BYTE*)(&word))[3] ]       ) )
#endif

#ifdef ENDIANNESS_UNDETERMINED /* Reverse byte order if big endian platform */
  #define KEY_DATA_REVERSAL(key,len) \
      if (BIG_ENDIAN_PLATFORM) \
        reverseByteOrder((UINT32 *)key, (UINT32)len);
#elif defined(BIG_ENDIAN)
  #define KEY_DATA_REVERSAL(key,len) reverseByteOrder((UINT32 *)key, (UINT32)len);
#else
  #define KEY_DATA_REVERSAL(key,len) /* Do nothing */
#endif

void KeyExpansionNk6OrLess(BYTE *key, UINT32 *w, UINT32 Nk, UINT32 Nr)
{
  UINT32 temp, temp2;
#ifdef ENDIANNESS_UNDETERMINED
  UINT32 temp3;
#endif
  UINT32 i;

  memcpy((BYTE *)w, (const BYTE *)key, Nk<<2);
  KEY_DATA_REVERSAL(w,Nk)

  i = Nk;
  while (i < (UINT32)(Nb * (Nr+1)))
  {
    temp = w[i-1];
    if (i % Nk == 0)
    {
      temp2 = ROT_WORD(temp);
#ifdef ENDIANNESS_UNDETERMINED
      SUB_WORD(temp3,temp2);
      temp = temp3 ^ Rcon[i/Nk];
#else
      temp = SUB_WORD(temp2) ^ Rcon[i/Nk];
#endif
    }
    w[i++] = w[i-Nk] ^ temp;
  }
}

void KeyExpansionNkMoreThan6(BYTE *key, UINT32 *w)
{ /* Used with 256 bit keys, Nk = 8, Nr = 14 */
  UINT32 temp, temp2;
#ifdef ENDIANNESS_UNDETERMINED
  UINT32 temp3;
#endif
  UINT32 i;

  memcpy((BYTE *)w, (const BYTE *)key, 32);
  KEY_DATA_REVERSAL(w,8)

  i = 8;
  while (i < (Nb*15))
  {
    temp = w[i-1];
    if ((i&0x07) == 0)
    {
      temp2 = ROT_WORD(temp);
#ifdef ENDIANNESS_UNDETERMINED
      SUB_WORD(temp3,temp2);
      temp = temp3 ^ Rcon[i>>3];
#else
      temp = SUB_WORD(temp2) ^ Rcon[i>>3];
#endif
    }
    else if ((i&0x07) == 4)
    {
#ifdef ENDIANNESS_UNDETERMINED
      SUB_WORD(temp,temp);
#else
      temp = SUB_WORD(temp);
#endif
    }
    w[i++] = w[i-8] ^ temp;
  }
}

#define KEY_EXPANSION(key, w, Nk, Nr) \
  if (Nk > 6) \
    KeyExpansionNkMoreThan6(key, w); \
  else \
    KeyExpansionNk6OrLess(key, w, Nk, Nr);

#define INPUT_TO_STATE(B,in) memcpy((BYTE *)B->state._8_1dim, (const BYTE *)in, 4*Nb);
#define STATE_TO_OUTPUT(out,B) memcpy((BYTE *)out, (const BYTE *)B->state._8_1dim, 4*Nb);

#define ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) *sptr++ ^= *wptr++; /* Variants to avoid warnings */
#define ADD_ROUND_KEY_FOR_LAST_COLUMN(sptr, wptr) *sptr ^= *wptr++;
#define ADD_ROUND_KEY_FOR_FINAL_COLUMN(sptr, wptr) *sptr ^= *wptr;

#define ADD_ROUND_KEY(B, wptr) \
  { \
    UINT32 *sptr = B->state._32; \
    ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    ADD_ROUND_KEY_FOR_LAST_COLUMN(sptr, wptr) \
  }

#define ADD_FINAL_ROUND_KEY(B, wptr) \
  { \
    UINT32 *sptr = B->state._32; \
    ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    ADD_ROUND_KEY_FOR_FINAL_COLUMN(sptr, wptr) \
  }

#define INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) *sptr-- ^= *wptr--; /* Variants to avoid warnings */
#define INV_ADD_ROUND_KEY_FOR_LAST_COLUMN(sptr, wptr) *sptr ^= *wptr--;
#define INV_ADD_ROUND_KEY_FOR_FINAL_COLUMN(sptr, wptr) *sptr ^= *wptr;

#define INV_ADD_ROUND_KEY(B, wptr) \
  { \
    UINT32 *sptr = B->state._32 + 3; \
    INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    INV_ADD_ROUND_KEY_FOR_LAST_COLUMN(sptr, wptr) \
  }

#define INV_ADD_FINAL_ROUND_KEY(B, wptr) /* Just to avoid a warning */ \
  { \
    UINT32 *sptr = B->state._32 + 3; \
    INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    INV_ADD_ROUND_KEY_FOR_COLUMN(sptr, wptr) \
    INV_ADD_ROUND_KEY_FOR_FINAL_COLUMN(sptr, wptr) \
  }

#define SUB_ONE_BYTE(B,r,c) /* S-box table look-up */ \
    B->state._8_2dim[r][c] = SBox[B->state._8_2dim[r][c]];

#define SUB_ROW_OF_BYTES(B,r) \
    SUB_ONE_BYTE(B,r,0) \
    SUB_ONE_BYTE(B,r,1) \
    SUB_ONE_BYTE(B,r,2) \
    SUB_ONE_BYTE(B,r,3)

#define SUB_BYTES(B) /* S-box entire state byte by byte */ \
    SUB_ROW_OF_BYTES(B,0) \
    SUB_ROW_OF_BYTES(B,1) \
    SUB_ROW_OF_BYTES(B,2) \
    SUB_ROW_OF_BYTES(B,3)

#define INV_SUB_ONE_BYTE(B,r,c) /* Inverse S-box table look-up */ \
    B->state._8_2dim[r][c] = InvSBox[B->state._8_2dim[r][c]];

#define INV_SUB_ROW_OF_BYTES(B,r) \
    INV_SUB_ONE_BYTE(B,r,0) \
    INV_SUB_ONE_BYTE(B,r,1) \
    INV_SUB_ONE_BYTE(B,r,2) \
    INV_SUB_ONE_BYTE(B,r,3)

#define INV_SUB_BYTES(B) /* Inverse S-box entire state byte by byte */ \
    INV_SUB_ROW_OF_BYTES(B,0) \
    INV_SUB_ROW_OF_BYTES(B,1) \
    INV_SUB_ROW_OF_BYTES(B,2) \
    INV_SUB_ROW_OF_BYTES(B,3)

#define SHIFT_ROWS(B) \
  { \
    BYTE temp8, temp8_2; \
    temp8 = B->state._8_2dim[0][1]; /* Second row */ \
    B->state._8_2dim[0][1] = B->state._8_2dim[1][1]; \
    B->state._8_2dim[1][1] = B->state._8_2dim[2][1]; \
    B->state._8_2dim[2][1] = B->state._8_2dim[3][1]; \
    B->state._8_2dim[3][1] = temp8; \
    temp8   = B->state._8_2dim[0][2]; /* Third row */ \
    temp8_2 = B->state._8_2dim[1][2]; \
    B->state._8_2dim[0][2] = B->state._8_2dim[2][2]; \
    B->state._8_2dim[1][2] = B->state._8_2dim[3][2]; \
    B->state._8_2dim[2][2] = temp8; \
    B->state._8_2dim[3][2] = temp8_2; \
    temp8 = B->state._8_2dim[3][3]; /* Fourth row */ \
    B->state._8_2dim[3][3] = B->state._8_2dim[2][3]; \
    B->state._8_2dim[2][3] = B->state._8_2dim[1][3]; \
    B->state._8_2dim[1][3] = B->state._8_2dim[0][3]; \
    B->state._8_2dim[0][3] = temp8; \
  }

#define INV_SHIFT_ROWS(B) \
  { \
    BYTE temp8, temp8_2; \
    temp8 = B->state._8_2dim[3][1]; /* Second row */ \
    B->state._8_2dim[3][1] = B->state._8_2dim[2][1]; \
    B->state._8_2dim[2][1] = B->state._8_2dim[1][1]; \
    B->state._8_2dim[1][1] = B->state._8_2dim[0][1]; \
    B->state._8_2dim[0][1] = temp8; \
    temp8   = B->state._8_2dim[0][2]; /* Third row */ \
    temp8_2 = B->state._8_2dim[1][2]; \
    B->state._8_2dim[0][2] = B->state._8_2dim[2][2]; \
    B->state._8_2dim[1][2] = B->state._8_2dim[3][2]; \
    B->state._8_2dim[2][2] = temp8; \
    B->state._8_2dim[3][2] = temp8_2; \
    temp8 = B->state._8_2dim[0][3]; /* Fourth row */ \
    B->state._8_2dim[0][3] = B->state._8_2dim[1][3]; \
    B->state._8_2dim[1][3] = B->state._8_2dim[2][3]; \
    B->state._8_2dim[2][3] = B->state._8_2dim[3][3]; \
    B->state._8_2dim[3][3] = temp8; \
  }

#define GF256_MUL2(b,logb) /* Polynomial multiplication by 2 */ \
  ((b)?((logb > 229)?(ALogTable[logb - 230]):(ALogTable[25 + logb])):(0))

#define GF256_MUL3(b,logb) /* Polynomial multiplication by 3 */ \
  ((b)?((logb > 253)?(ALogTable[logb - 254]):(ALogTable[1 + logb])):(0))

#define GF256_MUL9(b,logb) /* Polynomial multiplication by 9 */ \
  ((b)?((logb > 55)?(ALogTable[logb - 56]):(ALogTable[199 + logb])):(0))

#define GF256_MULB(b,logb) /* Polynomial multiplication by B */ \
  ((b)?((logb > 150)?(ALogTable[logb - 151]):(ALogTable[104 + logb])):(0))

#define GF256_MULD(b,logb) /* Polynomial multiplication by D */ \
  ((b)?((logb > 16)?(ALogTable[logb - 17]):(ALogTable[238 + logb])):(0))

#define GF256_MULE(b,logb) /* Polynomial multiplication by E */ \
  ((b)?((logb > 31)?(ALogTable[logb - 32]):(ALogTable[223 + logb])):(0))

#define MIX_ONE_COLUMN(sptr8) \
    s0 = *((BYTE*)(sptr8)); logs0 = LogTable[s0]; \
    s1 = ((BYTE*)(sptr8))[1]; logs1 = LogTable[s1]; \
    s2 = ((BYTE*)(sptr8))[2]; logs2 = LogTable[s2]; \
    s3 = ((BYTE*)(sptr8))[3]; logs3 = LogTable[s3]; \
    *((BYTE*)(sptr8)) = (BYTE)((GF256_MUL2(s0,logs0)) ^ (GF256_MUL3(s1,logs1)) ^ (s2) ^ (s3)); \
    ((BYTE*)(sptr8))[1] = (BYTE)((s0) ^ (GF256_MUL2(s1,logs1)) ^ (GF256_MUL3(s2,logs2)) ^ (s3)); \
    ((BYTE*)(sptr8))[2] = (BYTE)((s0) ^ (s1) ^ (GF256_MUL2(s2,logs2)) ^ (GF256_MUL3(s3,logs3))); \
    ((BYTE*)(sptr8))[3] = (BYTE)((GF256_MUL3(s0,logs0)) ^ (s1) ^ (s2) ^ (GF256_MUL2(s3,logs3)));

#define MIX_COLUMNS(B) /* Multiply each state column by polynomial 3x^3+x^2+x+2 */ \
  { \
    BYTE s0, s1, s2, s3; \
    BYTE logs0, logs1, logs2, logs3; \
    BYTE *sptr8 = (BYTE*)(B->state._8_1dim); \
    MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    MIX_ONE_COLUMN(sptr8) \
  }

#define INV_MIX_ONE_COLUMN(sptr8) \
    s0 = *((BYTE*)(sptr8)); logs0 = LogTable[s0]; \
    s1 = ((BYTE*)(sptr8))[1]; logs1 = LogTable[s1]; \
    s2 = ((BYTE*)(sptr8))[2]; logs2 = LogTable[s2]; \
    s3 = ((BYTE*)(sptr8))[3]; logs3 = LogTable[s3]; \
    *((BYTE*)(sptr8)) = (BYTE)((GF256_MULE(s0,logs0)) ^ (GF256_MULB(s1,logs1)) ^ (GF256_MULD(s2,logs2)) ^ (GF256_MUL9(s3,logs3))); \
    ((BYTE*)(sptr8))[1] = (BYTE)((GF256_MUL9(s0,logs0)) ^ (GF256_MULE(s1,logs1)) ^ (GF256_MULB(s2,logs2)) ^ (GF256_MULD(s3,logs3))); \
    ((BYTE*)(sptr8))[2] = (BYTE)((GF256_MULD(s0,logs0)) ^ (GF256_MUL9(s1,logs1)) ^ (GF256_MULE(s2,logs2)) ^ (GF256_MULB(s3,logs3))); \
    ((BYTE*)(sptr8))[3] = (BYTE)((GF256_MULB(s0,logs0)) ^ (GF256_MULD(s1,logs1)) ^ (GF256_MUL9(s2,logs2)) ^ (GF256_MULE(s3,logs3)));

#define INV_MIX_COLUMNS(stateptr) /* Multiply each state column by polynomial Bx^3+Dx^2+9x+E */ \
  { \
    BYTE s0, s1, s2, s3; \
    BYTE logs0, logs1, logs2, logs3; \
    BYTE *sptr8 = (BYTE*)(stateptr); \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    INV_MIX_ONE_COLUMN(sptr8) \
  }

#define INV_KEY_EXPANSION(key, w, Nk, Nr) \
{ \
  UINT32 i; \
  BYTE *sptr8; \
  KEY_EXPANSION(key, w, Nk, Nr); \
  sptr8 = ((BYTE*)(w + 4)); \
  for (i=1; i<(UINT32)Nr; i++) \
  { \
    BYTE s0, s1, s2, s3; \
    BYTE logs0, logs1, logs2, logs3; \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
    INV_MIX_ONE_COLUMN(sptr8) sptr8 += 4; \
  } \
}

#ifdef ENDIANNESS_UNDETERMINED /* Reverse byte order if big endian platform */
  #define STATE_REVERSAL(B) \
      if (BIG_ENDIAN_PLATFORM) \
        reverseByteOrder((UINT32 *)B->state._32, (UINT32)Nb);
#elif defined(BIG_ENDIAN)
  #define STATE_REVERSAL(B) reverseByteOrder((UINT32 *)B->state._32, (UINT32)Nb);
#else
  #define STATE_REVERSAL(B) /* Do nothing */
#endif

#define INITIALIZE_CDEF(bm,IV,C,D,E,F) \
  if (bm == kCBC) \
  { \
    UINT32 *IVptr = (UINT32*)(IV); \
    C = *IVptr++; /* Get IV */ \
    D = *IVptr++; \
    E = *IVptr++; \
    F = *IVptr; \
  }

#define INITIALIZE_CDEFx INITIALIZE_CDEF

#define GET_NEXT_INPUT_BLOCK_ENCRYPT(B,x,bm,C,D,E,F) \
  INPUT_TO_STATE(B,x) \
  if (bm == kCBC) \
  { \
    B->state._32[0] ^= C; \
    B->state._32[1] ^= D; \
    B->state._32[2] ^= E; \
    B->state._32[3] ^= F; \
    C = ((UINT32*)(x))[0]; /* Get next input block */ \
    D = ((UINT32*)(x))[1]; \
    E = ((UINT32*)(x))[2]; \
    F = ((UINT32*)(x))[3]; \
  }

#define SET_CDEFprev(B,Cprev,Dprev,Eprev,Fprev) \
    Cprev = B->state._32[0]; \
    Dprev = B->state._32[1]; \
    Eprev = B->state._32[2]; \
    Fprev = B->state._32[3];

#define UPDATE_OUTPUT_BUFFER(B,bm,Cx,Dx,Ex,Fx,Cprev,Dprev,Eprev,Fprev,outputPtr) \
    STATE_TO_OUTPUT(outputPtr,B) \
    if (bm == kCBC) \
    { \
      UINT32 *oPtr = (UINT32*)outputPtr; \
      *oPtr++ ^= Cx; /* crypted block to output buffer */ \
      *oPtr++ ^= Dx; \
      *oPtr++ ^= Ex; \
      *oPtr ^= Fx; \
      Cx = Cprev; \
      Dx = Dprev; \
      Ex = Eprev; \
      Fx = Fprev; \
    }

/*
  Function AES_encrypt

  Description: AES encryption function

  Input:
    'B' is an AES_info structure
    'x' is the message to be encrypted
    'xlen' is the length of 'x' (in bytes)

  Output:
    'B->Status' ...
    'B->output' contains the AES encryption of 'x'
    'B->outputLengthInBytes' contains the length of the output in bytes

  Constraints:
    xlen MUST be a multiple of 16.
*/

/*
  Function AES_decrypt

  Description: AES decryption function

  Input:
    'B' is an AES_info structure

  Output:
    'B->Status' ...
    'B->output' contains the AES decryption of 'x'
    'B->outputLengthInBytes' contains the length of the output in bytes

  Constraints:
    xlen MUST be a multiple of 16.
*/

/*
  Function AES_crypt

  Description: AES cryption function

  Input:
    'B' is a pointer to an AES_info structure

  Output:
    'B->Status' ...
    'B->output' contains the AES decryption of the plaintext
    'B->outputLengthInBytes' contains the length of the output in bytes

  Constraints:
*/

void AES_init(AES_info *B)
{
  UINT32 Nk, Nr;
  CryptionMode cm = B->cm;
  BlockMode bm = B->bm;

  CHECK_AES_PARAMS(B,cm,bm)
  SET_NkNr(B)

  if (cm == kEncrypt)
  {
    KEY_EXPANSION(B->key,B->w._32,Nk,Nr);
  }
  else
    INV_KEY_EXPANSION(B->key,B->w._32,Nk,Nr)

  B->Status = 0;
}

#undef STATE_TO_OUTPUT
#define COPY_BUF_16(dst, src) { UINT64 *d = (UINT64*)(dst); UINT64 *s = (UINT64*)(src); *d++ = *s++; *d = *s; }
#define XOR_BUF_16(dst, src) { UINT64 *d = (UINT64*)(dst); UINT64 *s = (UINT64*)(src); *d++ ^= *s++; *d ^= *s; }
#define STATE_TO_OUTPUT(out,B) COPY_BUF_16(out, B->state._8_1dim)
#define STATE_TO_OUTPUT_XOR(out,B) XOR_BUF_16(out, B->state._8_1dim)
void AES128_encryptBlock(AES_info *B, BYTE *x) {
  UINT32 *wptr;

  INPUT_TO_STATE(B, x)
  wptr = B->w._32; /* Point to beginning of expanded key */
  ADD_ROUND_KEY(B, wptr)
  /* 10 rounds */
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) ADD_FINAL_ROUND_KEY(B, wptr) STATE_TO_OUTPUT(B->output,B)
}

void AES256_encryptBlock(AES_info *B, BYTE *x) {
  UINT32 *wptr;

  INPUT_TO_STATE(B, x)
  wptr = B->w._32; /* Point to beginning of expanded key */
  ADD_ROUND_KEY(B, wptr)
  /* 14 rounds */
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr)
  SUB_BYTES(B) SHIFT_ROWS(B) ADD_FINAL_ROUND_KEY(B, wptr) STATE_TO_OUTPUT(B->output,B)
}

void AES128_encryptBlock_xor_withInitOutput(AES_info *B, const BYTE *x, BYTE *lastBlock, int numBlocks) {
  UINT32 *wptr;

  INPUT_TO_STATE(B, x)
  wptr = B->w._32; /* Point to beginning of expanded key */
  ADD_ROUND_KEY(B, wptr)
  /* 10 rounds */
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 0 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 1 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 2 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 3 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 4 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 5 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 6 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 7 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 8 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) ADD_FINAL_ROUND_KEY(B, wptr)          STATE_TO_OUTPUT_XOR(B->output + 9 * BLOCK_SIZE,B)
  STATE_TO_OUTPUT(lastBlock, B)
}

void AES256_encryptBlock_xor_withInitOutput(AES_info *B, const BYTE *x, BYTE *lastBlock, int numBlocks) {
  UINT32 *wptr;

  INPUT_TO_STATE(B, x)
  wptr = B->w._32; /* Point to beginning of expanded key */
  ADD_ROUND_KEY(B, wptr)
  /* 14 rounds */
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  0 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  1 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  2 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  3 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  4 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  5 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  6 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  7 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  8 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output +  9 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 10 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 11 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) MIX_COLUMNS(B) ADD_ROUND_KEY(B, wptr) STATE_TO_OUTPUT_XOR(B->output + 12 * BLOCK_SIZE,B) if (--numBlocks == 0) return;
  SUB_BYTES(B) SHIFT_ROWS(B) ADD_FINAL_ROUND_KEY(B, wptr)          STATE_TO_OUTPUT_XOR(B->output + 13 * BLOCK_SIZE,B)
  STATE_TO_OUTPUT(lastBlock, B)
}

void AES_decryptBlock(AES_info *B, BYTE *x)
{
  UINT32 Nk, Nr;
  UINT32 *wptr;

  SET_NkNr(B)
  INPUT_TO_STATE(B,x)
  STATE_REVERSAL(B)
  wptr = B->w._32+(Nr<<2)+3; /* Point to end of expanded key (4*Nr+3 32-bit words ahead) */
  INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_MIX_COLUMNS(B->state._8_1dim) INV_ADD_ROUND_KEY(B, wptr)
  INV_SUB_BYTES(B) INV_SHIFT_ROWS(B) INV_ADD_FINAL_ROUND_KEY(B, wptr)  STATE_TO_OUTPUT(B->output,B)
}

void AES_final(AES_info *B) {
  (void)B; /* unused - suppress compiler warning */
}

/******************************************************************************
 * Black box variants
 ******************************************************************************/
//zero key + zero iv + zero pt => 66e94bd4ef8a2c3b884cfa59ca342b2e f795bd4a52e29ed713d313fa20e98dbc

int aes128_xor(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  AES_info ctx;
  BYTE pt[16];
  BYTE ct[16];
  int i;
  const int numBlocks = numOutputBytes / BLOCK_SIZE;

  if (numOutputBytes == 0) return 0;
  if (numInputBytes < numOutputBytes) return -1;
  if ((numOutputBytes % BLOCK_SIZE) != 0) return -1;

  ctx.cm = kEncrypt;
  ctx.bm = kCBC;
  ctx.key = (BYTE*)key; ctx.keyLengthInBits = 128;
  ctx.IV  = (BYTE*)iv;  ctx.IVLengthInBytes = BLOCK_SIZE;
  ctx.output = ct;
  ctx.outputBufferLengthInBytes = numBlocks * BLOCK_SIZE;

  AES_init(&ctx);
  if (ctx.Status) return -1;
  for (i=0; i<numBlocks; i++) {
    COPY_BUF_16(pt, i==0 ? iv : ct)
    XOR_BUF_16(pt, inBuf + i * BLOCK_SIZE)
    AES128_encryptBlock(&ctx, pt);
    XOR_BUF_16(outBuf + i * BLOCK_SIZE, ct)
  }
  AES_final(&ctx);
  return 0;
}

int aes128_xor_withInitOutput(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  AES_info ctx;
  BYTE pt[16];
  BYTE ct[16];
  int i;
  int numBlocks = numOutputBytes / BLOCK_SIZE;
  const int numSuppressedBytes = 9 * BLOCK_SIZE;

  if (numOutputBytes == 0) return 0;
  if (numInputBytes + numSuppressedBytes < numOutputBytes) return -1;
  if ((numOutputBytes % BLOCK_SIZE) != 0) return -1;
  if (inBuf == outBuf) return -1;

  ctx.cm = kEncrypt;
  ctx.bm = kCBC;
  ctx.key = (BYTE*)key; ctx.keyLengthInBits = 128;
  ctx.IV  = (BYTE*)iv;  ctx.IVLengthInBytes = BLOCK_SIZE;
  ctx.output = outBuf;
  ctx.outputBufferLengthInBytes = numBlocks * BLOCK_SIZE;

  AES_init(&ctx);
  if (ctx.Status) return -1;

  /* first 10 blocks (9 suppressed + first) */
  COPY_BUF_16(pt, iv)
  XOR_BUF_16(pt, inBuf)
  AES128_encryptBlock_xor_withInitOutput(&ctx, pt, ct, numBlocks);
  if (numBlocks <= 10) {
    AES_final(&ctx);
    return 0;
  }
  ctx.output = ct;
  numBlocks -= 10;

  /* remaining blocks */
  for (i=0; i<numBlocks; i++) {
    COPY_BUF_16(pt, ct)
    XOR_BUF_16(pt, inBuf + (i + 1) * BLOCK_SIZE)
    AES128_encryptBlock(&ctx, pt);
    XOR_BUF_16(outBuf + (i + 10) * BLOCK_SIZE, ct)
  }

  AES_final(&ctx);
  return ctx.Status;
}

int aes256_xor(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  AES_info ctx;
  BYTE pt[16];
  BYTE ct[16];
  int i;
  const int numBlocks = numOutputBytes / BLOCK_SIZE;

  if (numOutputBytes == 0) return 0;
  if (numInputBytes < numOutputBytes) return -1;
  if ((numOutputBytes % BLOCK_SIZE) != 0) return -1;

  ctx.cm = kEncrypt;
  ctx.bm = kCBC;
  ctx.key = (BYTE*)key; ctx.keyLengthInBits = 256;
  ctx.IV  = (BYTE*)iv;  ctx.IVLengthInBytes = BLOCK_SIZE;
  ctx.output = ct;
  ctx.outputBufferLengthInBytes = numBlocks * BLOCK_SIZE;

  AES_init(&ctx);
  if (ctx.Status) return -1;
  for (i=0; i<numBlocks; i++) {
    COPY_BUF_16(pt, i==0 ? iv : ct)
    XOR_BUF_16(pt, inBuf + i * BLOCK_SIZE)
    AES256_encryptBlock(&ctx, pt);
    XOR_BUF_16(outBuf + i * BLOCK_SIZE, ct)
  }
  AES_final(&ctx);
  return 0;
}

int aes256_xor_withInitOutput(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes) {
  AES_info ctx;
  BYTE pt[16];
  BYTE ct[16];
  int i;
  int numBlocks = numOutputBytes / BLOCK_SIZE;
  const int numSuppressedBytes = 13 * BLOCK_SIZE;

  if (numOutputBytes == 0) return 0;
  if (numInputBytes + numSuppressedBytes < numOutputBytes) return -1;
  if ((numOutputBytes % BLOCK_SIZE) != 0) return -1;
  if (inBuf == outBuf) return -1;

  ctx.cm = kEncrypt;
  ctx.bm = kCBC;
  ctx.key = (BYTE*)key; ctx.keyLengthInBits = 256;
  ctx.IV  = (BYTE*)iv;  ctx.IVLengthInBytes = BLOCK_SIZE;
  ctx.output = outBuf;
  ctx.outputBufferLengthInBytes = numBlocks * BLOCK_SIZE;

  AES_init(&ctx);
  if (ctx.Status) return -1;

  /* first 14 blocks (13 suppressed + first) */
  COPY_BUF_16(pt, iv)
  XOR_BUF_16(pt, inBuf)
  AES256_encryptBlock_xor_withInitOutput(&ctx, pt, ct, numBlocks);
  if (numBlocks <= 14) {
    AES_final(&ctx);
    return 0;
  }
  ctx.output = ct;
  numBlocks -= 14;

  /* remaining blocks */
  for (i=0; i<numBlocks; i++) {
    COPY_BUF_16(pt, ct)
    XOR_BUF_16(pt, inBuf + (i + 1) * BLOCK_SIZE)
    AES256_encryptBlock(&ctx, pt);
    XOR_BUF_16(outBuf + (i + 14) * BLOCK_SIZE, ct)
  }

  AES_final(&ctx);
  return ctx.Status;
}

/******************************************************************************
 * Black box API
 ******************************************************************************/
int blackBoxAES128Encryption(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes, int withInitRoundOutput) {
  if (withInitRoundOutput)
      return aes128_xor_withInitOutput(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
  return aes128_xor(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
}

int blackBoxAES256Encryption(const BYTE *key, const BYTE *iv, const BYTE *inBuf, unsigned int numInputBytes, BYTE *outBuf, unsigned int numOutputBytes, int withInitRoundOutput) {
  if (withInitRoundOutput)
      return aes256_xor_withInitOutput(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
  return aes256_xor(key, iv, inBuf, numInputBytes, outBuf, numOutputBytes);
}

/******************************************************************************
 * Basic cipher information
 ******************************************************************************/
void getBlackBoxAES128Info(int *keySizeInBytes, int *ivSizeInBytes, int *suppressedBytes, int *implicitBlockSizeInBytes) {
  if (keySizeInBytes) *keySizeInBytes = 16;
  if (ivSizeInBytes) *ivSizeInBytes = BLOCK_SIZE;
  if (suppressedBytes) *suppressedBytes = 9 * BLOCK_SIZE;
  if (implicitBlockSizeInBytes) *implicitBlockSizeInBytes = BLOCK_SIZE;
}

void getBlackBoxAES256Info(int *keySizeInBytes, int *ivSizeInBytes, int *suppressedBytes, int *implicitBlockSizeInBytes) {
  if (keySizeInBytes) *keySizeInBytes = 32;
  if (ivSizeInBytes) *ivSizeInBytes = BLOCK_SIZE;
  if (suppressedBytes) *suppressedBytes = 13 * BLOCK_SIZE;
  if (implicitBlockSizeInBytes) *implicitBlockSizeInBytes = BLOCK_SIZE;
}

