/*
 * Copyright (c) Paul Stankovski
 * Free for all non-commercial use unless this directive conflicts with
 * other applicable copyright statement(s), patent holders, laws or such.
 */
#include "black_box_bit_set_utils.h"
#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include "bittwiddling.h"
#include "bitslice_utils.h"
#include "assert_utils.h"
#include "memory_utils.h"

#define MAX_BIT_LEN 512
#define MAX_SUPPRESSED_BYTES (4 * 4096)
#define MAX_SUPPRESSED_BITS (MAX_SUPPRESSED_BYTES * 8)

/*******************************************************************************
 * Threading utilities
 ******************************************************************************/
#define MAX_NUM_PARALLELL_BITS 6
#define MAX_NUM_THREADS (1 << MAX_NUM_PARALLELL_BITS)

typedef struct {
  int id;
  bbCipher cipher;
  BYTE *key;
  int numKeyBits;
  int *keyBit;
  BYTE *iv;
  int numIvBits;
  int *ivBit;
  const BYTE *in;
  int inLen;
  BYTE *xorBuf;
  int ret;
} xorParams;

static pthread_mutex_t screen = PTHREAD_MUTEX_INITIALIZER;
//static pthread_attr_t attr;

/*******************************************************************************
 * Iterate over bit set
 ******************************************************************************/
static void setBufBits(BYTE *buf, UINT64 counter, int numBits, int *bit) {
  int i;
  for (i=0; i<numBits; i++)
    setBufBit(buf, bit[i], (int)((counter >> i) & 1));
}

static int xorOverBitSetStandard(
  bbCipher cipher,
  const BYTE *_key, int numKeyBits, int *keyBit,
  const BYTE *_iv, int numIvBits, int *ivBit,
  const BYTE *in, int inLen,
  BYTE *xorBuf) {

  int keySize, ivSize, suppressedBytes, implicitBlockSize;
  UINT64 i, j;
  BYTE key[MAX_BIT_LEN / 8];
  BYTE iv[MAX_BIT_LEN / 8];
  const UINT64 numKeyIterations = (UINT64)1 << numKeyBits;
  const UINT64 numIViterations = (UINT64)1 << numIvBits;

  blackBoxInfo(cipher, &keySize, &ivSize, &suppressedBytes, &implicitBlockSize);
  ASSERT(blackBoxCipherProvidesStandardImplementation(cipher), "Could not find standard implementation!");
  ASSERT(keySize <= MAX_BIT_LEN, "Unexpected key size!");
  ASSERT(ivSize <= MAX_BIT_LEN, "Unexpected iv size!");

  /* copy key and iv */
  MEMCPY(key, _key, keySize);
  MEMCPY(iv, _iv, ivSize);

  /* iterate */
  for (j=0; j<numKeyIterations; j++) {
    setBufBits((BYTE*)key, j, numKeyBits, keyBit); /* set key bits */

    for (i=0; i<numIViterations; i++) {
      setBufBits((BYTE*)iv, i, numIvBits, ivBit); /* set iv bits */

      if (blackBoxEncrypt(cipher, key, iv, in, inLen, xorBuf, suppressedBytes, 1 /* with init round output */))
        return -1; /* cryption error */
    }
  }

  return 0;
}

static int xorOverBitSetStandardWithPreallocatedKeyAndIvBuffers(
  bbCipher cipher,
  const BYTE *key, int numKeyBits, int *keyBit,
  const BYTE *iv, int numIvBits, int *ivBit,
  const BYTE *in, int inLen,
  BYTE *xorBuf) {

  int keySize, ivSize, suppressedBytes, implicitBlockSize;
  UINT64 i, j;
  const UINT64 numKeyIterations = (UINT64)1 << numKeyBits;
  const UINT64 numIViterations = (UINT64)1 << numIvBits;

  ASSERT(blackBoxCipherProvidesStandardImplementation(cipher), "Could not find standard implementation!");
  blackBoxInfo(cipher, &keySize, &ivSize, &suppressedBytes, &implicitBlockSize);

  /* iterate */
  for (j=0; j<numKeyIterations; j++) {
    setBufBits((BYTE*)key, j, numKeyBits, keyBit); /* set key bits */

    for (i=0; i<numIViterations; i++) {
      setBufBits((BYTE*)iv, i, numIvBits, ivBit); /* set iv bits */

      if (blackBoxEncrypt(cipher, key, iv, in, inLen, xorBuf, suppressedBytes, 1 /* with init round output */))
        return -1; /* cryption error */
    }
  }

  return 0;
}

int xorOverBitSetBitsliced(
  bbCipher cipher,
  const BYTE *_key, int numKeyBits, int *keyBit,
  const BYTE *_iv, int numIvBits, int *ivBit,
  const BYTE *_in, int inLen,
  BYTE *xorBuf) {

  int keySizeInBits, ivSizeInBits, suppressedBits, implicitBlockSizeInBits;
  UINT64 i, j;
  UINT64 key[MAX_BIT_LEN];
  UINT64 iv[MAX_BIT_LEN];
  int numBitsclicedKeyBits, numBitsclicedIvBits;
  UINT64 numKeyIterations, numIViterations;
  UINT64 in64[MAX_SUPPRESSED_BITS];
  UINT64 xor64[MAX_SUPPRESSED_BITS];

  blackBoxInfoBitsliced(cipher, &keySizeInBits, &ivSizeInBits, &suppressedBits, &implicitBlockSizeInBits);
  ASSERT(suppressedBits <= MAX_SUPPRESSED_BITS, "Unexpected number of suppressed bits!");
  ASSERT(blackBoxCipherProvidesBitslicedImplementation(cipher), "Could not find bitsliced implementation!");
  ASSERT(numKeyBits + numIvBits >= 6, "This bitslice application requires at least six (6) bits!");
  ASSERT(keySizeInBits <= MAX_BIT_LEN, "Unexpected key size!");
  ASSERT(ivSizeInBits <= MAX_BIT_LEN, "Unexpected iv size!");

  /* bitslice input parameters */
  toBitslicedKeyIv(key, keySizeInBits, _key,
                   iv, ivSizeInBits, _iv,
                   keyBit, numKeyBits,
                   ivBit, numIvBits,
                   &numBitsclicedKeyBits,
                   &numBitsclicedIvBits);
  MEMSET(xor64, 0, suppressedBits * sizeof(UINT64));
  toBitslicedBuf(in64, _in, inLen * 8);

  /* iterate */
  numKeyIterations = (UINT64)1 << (numKeyBits - numBitsclicedKeyBits);
  numIViterations = (UINT64)1 << (numIvBits - numBitsclicedIvBits);
  for (j=0; j<numKeyIterations; j++) {

    setKeyBits(key, j, numKeyBits, keyBit, numBitsclicedKeyBits); /* set key bits */

    for (i=0; i<numIViterations; i++) {
      setIvBits(iv, i, numIvBits, ivBit, numBitsclicedIvBits); /* set iv bits */

      if (blackBoxEncryptBitsliced(cipher, key, iv, in64, inLen * 8, xor64, suppressedBits, 1 /* with init round output */)) {
        return -1; /* cryption error */
      }
    }
  }

  /* bitsliced xor buf to standard xor buf */
  {
    BYTE xorStd[MAX_SUPPRESSED_BYTES];
    fromBitslicedBufByWeight(xorStd, xor64, suppressedBits);
    MEMXOR(xorBuf, xorStd, suppressedBits / 8);
  }

  return 0;
}

int xorOverBitSetBitslicedOriginal(
  bbCipher cipher,
  const BYTE *_key, int numKeyBits, int *keyBit,
  const BYTE *_iv, int numIvBits, int *ivBit,
  const BYTE *_in, int inLen,
  BYTE *xorBuf) {

  int keySizeInBits, ivSizeInBits, suppressedBits, implicitBlockSizeInBits;
  UINT64 i, j;
  UINT64 key[MAX_BIT_LEN];
  UINT64 iv[MAX_BIT_LEN];
  int numBitsclicedKeyBits, numBitsclicedIvBits;
  UINT64 numKeyIterations, numIViterations;
  UINT64 in64[MAX_SUPPRESSED_BITS];
  UINT64 xor64[MAX_SUPPRESSED_BITS];
  UINT64 xorInner64[MAX_SUPPRESSED_BITS];

  blackBoxInfoBitsliced(cipher, &keySizeInBits, &ivSizeInBits, &suppressedBits, &implicitBlockSizeInBits);
  ASSERT(suppressedBits <= MAX_SUPPRESSED_BITS, "Unexpected number of suppressed bits!");
  ASSERT(blackBoxCipherProvidesBitslicedImplementation(cipher), "Could not find bitsliced implementation!");
  ASSERT(numKeyBits + numIvBits >= 6, "This bitslice application requires at least six (6) bits!");
  ASSERT(keySizeInBits <= MAX_BIT_LEN, "Unexpected key size!");
  ASSERT(ivSizeInBits <= MAX_BIT_LEN, "Unexpected iv size!");

  /* bitslice input parameters */
  toBitslicedKeyIv(key, keySizeInBits, _key,
                   iv, ivSizeInBits, _iv,
                   keyBit, numKeyBits,
                   ivBit, numIvBits,
                   &numBitsclicedKeyBits,
                   &numBitsclicedIvBits);
  MEMSET(xor64, 0, suppressedBits * sizeof(UINT64));
  toBitslicedBuf(in64, _in, inLen * 8);

  /* iterate */
  numKeyIterations = (UINT64)1 << (numKeyBits - numBitsclicedKeyBits);
  numIViterations = (UINT64)1 << (numIvBits - numBitsclicedIvBits);
  for (j=0; j<numKeyIterations; j++) {

    setKeyBits(key, j, numKeyBits, keyBit, numBitsclicedKeyBits); /* set key bits */
    MEMSET(xorInner64, 0, suppressedBits * sizeof(UINT64)); /* reset xor buffer */

    for (i=0; i<numIViterations; i++) {
      setIvBits(iv, i, numIvBits, ivBit, numBitsclicedIvBits); /* set iv bits */

      if (blackBoxEncryptBitsliced(cipher, key, iv, in64, inLen * 8, xorInner64, suppressedBits, 1 /* with init round output */)) {
        return -1; /* cryption error */
      }
    }

    /* add partial xor */
    MEMXOR(xor64, xorInner64, suppressedBits * sizeof(UINT64));
  }

  /* bitsliced xor buf to standard xor buf */
  {
    BYTE xorStd[MAX_SUPPRESSED_BYTES];
    fromBitslicedBufByWeight(xorStd, xor64, suppressedBits);
    MEMXOR(xorBuf, xorStd, suppressedBits / 8);
  }

  return 0;
}

int xorOverBitSetNonThreaded(
  bbCipher cipher,
  const BYTE *_key, int numKeyBits, int *keyBit,
  const BYTE *_iv, int numIvBits, int *ivBit,
  const BYTE *in, int inLen,
  BYTE *xorBuf) {

  if (numKeyBits + numIvBits >= 6 && blackBoxCipherProvidesBitslicedImplementation(cipher)) return xorOverBitSetBitsliced(cipher, _key, numKeyBits, keyBit, _iv, numIvBits, ivBit, in, inLen, xorBuf);
  if (blackBoxCipherProvidesStandardImplementation(cipher)) return xorOverBitSetStandard(cipher, _key, numKeyBits, keyBit, _iv, numIvBits, ivBit, in, inLen, xorBuf);
  return -1;
}

int xorOverBitSetThreaded(
  bbCipher cipher,
  const BYTE *_key, int numKeyBits, int *keyBit,
  const BYTE *_iv, int numIvBits, int *ivBit,
  const BYTE *in, int inLen,
  BYTE *xorBuf) {

  if (numKeyBits + numIvBits >= 6 && blackBoxCipherProvidesBitslicedImplementation(cipher)) return xorOverBitSetBitsliced(cipher, _key, numKeyBits, keyBit, _iv, numIvBits, ivBit, in, inLen, xorBuf);
  if (blackBoxCipherProvidesStandardImplementation(cipher)) return xorOverBitSetStandardWithPreallocatedKeyAndIvBuffers(cipher, _key, numKeyBits, keyBit, _iv, numIvBits, ivBit, in, inLen, xorBuf);
  return -1;
}

void *maxtermPartialSummationThread(void *par) {
  xorParams *p = (xorParams*)par;
#if 0
  size_t mystacksize;

  pthread_attr_getstacksize (&attr, &mystacksize);
  printf("Thread %d: stack size = %li bytes \n", p->id, mystacksize);
#endif

  p->ret = xorOverBitSetThreaded(
             p->cipher,
             p->key,
             p->numKeyBits,
             p->keyBit,
             p->iv,
             p->numIvBits,
             p->ivBit,
             p->in,
             p->inLen,
             p->xorBuf);
  pthread_exit(NULL);
  return NULL;
}

static int xorOverBitSetThreadify(
  bbCipher cipher,
  const BYTE *key, int numKeyBits, int *keyBit,
  const BYTE *iv, int numIvBits, int *ivBit,
  const BYTE *in, int inLen,
  BYTE *xorBuf,
  int numParallellBits) {

#define THREAD_POOL_SIZE 16
  const int numThreads = 1 << numParallellBits;
  pthread_t threadBuf[THREAD_POOL_SIZE];
  xorParams paramBuf[THREAD_POOL_SIZE];
  pthread_t *thread = numThreads <= THREAD_POOL_SIZE ? threadBuf : (pthread_t*)MALLOC(numThreads * sizeof(pthread_t));
  xorParams *param = numThreads <= THREAD_POOL_SIZE ? paramBuf : (xorParams*)MALLOC(numThreads * sizeof(xorParams));
//  size_t mystacksize;
  int keySize, ivSize, suppressedBytes, implicitBlockSize;
  int i, ret = 0;
  const int numParallellIvBits = numParallellBits <= numIvBits ? numParallellBits : numIvBits;
  const int numParallellKeyBits = numParallellBits - numParallellIvBits;
#define BIG_BUF_SIZE (1024 * 32)
  BYTE buf[BIG_BUF_SIZE];
  BYTE *bigBuf;

  ASSERT(numParallellBits >= 1, "Unexpected number of parallell bits!");
  ASSERT(numParallellBits <= numKeyBits + numIvBits, "Unsufficient bit space for parallellization!");
  ASSERT(numParallellBits <= MAX_NUM_PARALLELL_BITS, "Too many parallell bits for this implementation bits!");
  ASSERT(numParallellKeyBits <= numKeyBits, "Key bit space error!");
  ASSERT(numParallellIvBits <= numIvBits, "IV bit space error!");

  blackBoxInfo(cipher, &keySize, &ivSize, &suppressedBytes, &implicitBlockSize);

  /* run several instances of xorOverBitSetNonThreaded and combine results */

  /* prepare memory buffers */
  if (((keySize + ivSize + suppressedBytes) * numThreads) <= BIG_BUF_SIZE) {
    bigBuf = buf;
  } else {
    bigBuf = (BYTE*)MALLOC((keySize + ivSize + suppressedBytes) * numThreads); /* sharing one big buffer reduces the number of malloc calls */
  }
  MEMSET(bigBuf, 0, suppressedBytes * numThreads);

  /* load parameters */
  for (i=0; i<numThreads; i++) {
    int j;

    param[i].id = i;
    param[i].cipher = cipher;

    param[i].key = bigBuf + suppressedBytes * numThreads + i * (keySize + ivSize);
    MEMCPY(param[i].key, key, keySize);
    for (j=0; j<numParallellKeyBits; j++)
      setBufBit(param[i].key, keyBit[j], (i >> (j + numParallellIvBits)) & 1);
    param[i].numKeyBits = numKeyBits - numParallellKeyBits;
    param[i].keyBit = keyBit == NULL ? NULL : keyBit + numParallellKeyBits;

    param[i].iv = bigBuf + suppressedBytes * numThreads + keySize + i * (keySize + ivSize);
    MEMCPY(param[i].iv, iv, ivSize);
    for (j=0; j<numParallellIvBits; j++)
      setBufBit(param[i].iv, ivBit[j], (i >> j) & 1);
    param[i].numIvBits = numIvBits - numParallellIvBits;
    param[i].ivBit = ivBit == NULL ? NULL : ivBit + numParallellIvBits;

    param[i].in = in;
    param[i].inLen = inLen;

    param[i].xorBuf = bigBuf + i * suppressedBytes;
  }

  /* initialize attributes */
//  pthread_attr_init(&attr);
//  mystacksize = sizeof(UINT64)*2*MAX_BIT_LEN + 100000;
//  pthread_attr_setstacksize (&attr, mystacksize);

  /* initialize mutexes */
//  pthread_mutex_init(&screen, NULL);

#if 1
  /* start threads */
  for (i=0; i<numThreads; i++) {
    if (pthread_create(&thread[i], NULL, maxtermPartialSummationThread, (void*)&param[i])) {
      pthread_mutex_lock(&screen);
      printf("Error creating thread %d!\n", i);
      pthread_mutex_unlock(&screen);
    }
  }

  /* wait until all threads have completed */
  for (i=0; i<numThreads; i++) {
    if (pthread_join(thread[i], NULL)) {
      pthread_mutex_lock(&screen);
      printf("Error joining thread %d!\n", i);
      pthread_mutex_unlock(&screen);
    }
  }
#else /* process threads serially (for debugging purposes only) */
  for (i=0; i<numThreads; i++) {
    /* start thread */
    if (pthread_create(&thread[i], NULL, maxtermPartialSummationThread, (void*)&param[i])) {
      pthread_mutex_lock(&screen);
      printf("Error creating thread %d!\n", i);
      pthread_mutex_unlock(&screen);
    }
    /* wait until thread has completed */
    if (pthread_join(thread[i], NULL)) {
      pthread_mutex_lock(&screen);
      printf("Error joining thread %d!\n", i);
      pthread_mutex_unlock(&screen);
    }
  }
#endif

  /* combine results */
#if 1
  for (i=0; i<numThreads; i++) {
    if (param[i].ret != 0) {
      ret = -1;
      continue;
    }
    MEMXOR(xorBuf, param[i].xorBuf, suppressedBytes);
  }
#endif

  /* cleanup */
  if (bigBuf != buf) FREE(bigBuf);
  if (param != paramBuf) FREE(param);
  if (thread != threadBuf) FREE(thread);

  /* destroy mutexes */
//  pthread_mutex_destroy(&screen);

  return ret;
}

int xorOverBitSet(
  bbCipher cipher,
  const BYTE *key, int numKeyBits, int *keyBit,
  const BYTE *iv, int numIvBits, int *ivBit,
  const BYTE *in, int inLen,
  BYTE *xorBuf,
  int numParallellBits) {

  /* reduce the number of parallell bits if necessary */
  if (numParallellBits < 0)
    numParallellBits = 0;
  if (numParallellBits > (numKeyBits + numIvBits))
    numParallellBits = numKeyBits + numIvBits;
  if (numParallellBits > MAX_NUM_PARALLELL_BITS)
    numParallellBits = MAX_NUM_PARALLELL_BITS;

  /* there is some overhead for threading, which makes it more expensive to
   * run the threaded version for small data sets. We have, somewhat arbitrarily,
   * imposed a data set limit below.
   */
  if (numParallellBits > 0 && (numKeyBits + numIvBits) >= 13)
    return xorOverBitSetThreadify(cipher, key, numKeyBits, keyBit, iv, numIvBits, ivBit, in, inLen, xorBuf, numParallellBits);
  return xorOverBitSetNonThreaded(cipher, key, numKeyBits, keyBit, iv, numIvBits, ivBit, in, inLen, xorBuf);
}

/*******************************************************************************
 * Basic bit set stuff
 ******************************************************************************/
void initializeBitSet(int *bitSet, int size) {
  int i;
  for (i=0; i<size; i++)
    bitSet[i] = i;
}

int bitSetContainsValue(const int *bitSet, int size, int value) {
  int i;
  for (i=0; i<size; i++)
    if (bitSet[i] == value)
      return 1;
  return 0;
}

void logBitSet(FILE *logFile, int flags, const int *bitSet, int size) {
  int i;
  int noflush = flags & ~LOGFLUSH;

  logger(logFile, noflush, "{");
  for (i=0; i<size; i++)
    logger(logFile, noflush, " %2d%s", bitSet[i], (i < size - 1) ? "," : "");
  logger(logFile, flags, " }");
}

void copyBitSet(int *bitSetDst, int *dstSize, const int *bitSetSrc, int srcSize) {
  int i;
  *dstSize = srcSize;
  for (i=0; i<srcSize; i++)
    bitSetDst[i] = bitSetSrc[i];
}

/*******************************************************************************
 * Bit set ordering
 ******************************************************************************/
/*
 * n is the bit set size
 * m is the size of the bit space
 */
int nextBitSet(int *i, int n, int m) {
  ASSERT(n >= 0 && n <= 8, "Unexpected set size n!\n");
  switch (n) {
  case 0:
    return 1;
  case 1:
    if (++(i[n-1]) <  m) return 1;
    break;
  case 2:
    if (++(i[n-1]) <  m   ) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    break;
  case 3:
    if (++(i[n-1]) <  m   ) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-3]) < (m-2)) { i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    break;
  case 4:
    if (++(i[n-1]) <  m   ) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-3]) < (m-2)) { i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-4]) < (m-3)) { i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    break;
  case 5:
    if (++(i[n-1]) <  m   ) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-3]) < (m-2)) { i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-4]) < (m-3)) { i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-5]) < (m-4)) { i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    break;
  case 6:
    if (++(i[n-1]) <  m   ) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-3]) < (m-2)) { i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-4]) < (m-3)) { i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-5]) < (m-4)) { i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-6]) < (m-5)) { i[n-5] = i[n-6] + 1; i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    break;
  case 7:
    if (++(i[n-1]) <  m   ) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-3]) < (m-2)) { i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-4]) < (m-3)) { i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-5]) < (m-4)) { i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-6]) < (m-5)) { i[n-5] = i[n-6] + 1; i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-7]) < (m-6)) { i[n-6] = i[n-7] + 1; i[n-5] = i[n-6] + 1; i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    break;
  case 8:
    if (++(i[n-1]) <  m) return 1;
    if (++(i[n-2]) < (m-1)) { i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-3]) < (m-2)) { i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-4]) < (m-3)) { i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-5]) < (m-4)) { i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-6]) < (m-5)) { i[n-5] = i[n-6] + 1; i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-7]) < (m-6)) { i[n-6] = i[n-7] + 1; i[n-5] = i[n-6] + 1; i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
    if (++(i[n-8]) < (m-7)) { i[n-7] = i[n-8] + 1; i[n-6] = i[n-7] + 1; i[n-5] = i[n-6] + 1; i[n-4] = i[n-5] + 1; i[n-3] = i[n-4] + 1; i[n-2] = i[n-3] + 1; i[n-1] = i[n-2] + 1; return 1; }
  };
  return 0;
}

void initializeConnectedBitSets(int nTot, int *bitSet1, int *n1, int m1, int *bitSet2, int *n2, int m2) {
  int i;

  ASSERT(bitSet1 || bitSet2, "At least one bit set must be non-empty!\n");
  if (bitSet2) {
    *n1 = 0;
    *n2 = nTot;

    ASSERT(nTot <= m2, "Unexpected bit space size!\n");
    for (i=0; i<nTot; i++)
      bitSet2[i] = i;
  } else { /* no second bit set */
    *n1 = nTot;
    *n2 = 0;

    ASSERT(nTot <= m1, "Unexpected bit space size!\n");
    for (i=0; i<nTot; i++)
      bitSet1[i] = i;
  }
}

int nextConnectedBitSet(int *bitSet1, int *n1, int m1, int *bitSet2, int *n2, int m2) {

  ASSERT((bitSet1 || bitSet2), "At least one bit set must be non-empty!\n");
  if (!bitSet1) return nextBitSet(bitSet2, *n2, m2);
  if (!bitSet2) return nextBitSet(bitSet1, *n1, m1);

  /* proper connected bit set */
  if (*n2 > 0 && nextBitSet(bitSet2, *n2, m2))
    return 1;

  if (*n1 > 0 && nextBitSet(bitSet1, *n1, m1)) {
    if (*n2 > 0)
      initializeBitSet(bitSet2, *n2);
    return 1;
  }

  if (*n2 == 0)
    return 0;

  (*n2)--;
  (*n1)++;
  initializeBitSet(bitSet1, *n1);
  initializeBitSet(bitSet2, *n2);
  return 1;
}






