/* SCOP implementation, including optimized Assembler version of the 
   encryption/decryption routines. Public Domain. No warranty of any kind.
   Author: Simeon Maltchev, smaltchev@yahoo.com, May 1997 */

/* Tested with MSVC and with gcc 2.7.x. */

#include <stdio.h>
#include <assert.h>


/* #define ASM_INTEL */
/* #define ASM_UNIX */


typedef struct {
  unsigned long v[384];
  unsigned char i;
  unsigned char j;
  unsigned char t3;
} st_key;

typedef struct {
  unsigned char coef[8][4];
  unsigned long pc[4];
  unsigned long x[4];
} st_gp8;


static st_key kt;
static st_gp8 int_state;

static void gp8 (unsigned long *out);


static void
expand_key (unsigned char *in, unsigned in_size)
{
  unsigned i;
  unsigned char *p;

  assert (in_size >= 2 && in_size <= 64);

  p = (unsigned char *) &int_state;

  for (i = 0; i < in_size; i++)
    p[i] = in[i];

  for (i = in_size; i < 64; i++)
    p[i] =(unsigned char) (p[i - in_size] + p[i - in_size + 1]);
}

void
init_key (unsigned char *in, unsigned in_size)
{
  unsigned long t[4];
  int i, j;

  expand_key (in, in_size);

  for (i = 0; i < 8; i++)
    gp8 (t);

  for (i = 0; i < 12; i++)
    {
      for (j = 0; j < 8; j++)
        gp8 (kt.v + i * 32 + j * 4);

      gp8 (t);
    }

  gp8 (t);
  kt.i  = (unsigned char) t[0];
  kt.j  = (unsigned char) (t[0] >> 8);
  kt.t3 = (unsigned char) (t[0] >> 16);
}

/* partially optimized */
static void
gp8 (unsigned long *out)
{
  unsigned long y1, y2, x_1, x_2, x_3, x_4;
  unsigned long pc;
  unsigned long newx[4];
  int i, i2;

  for (i = 0; i < 8; i += 2)
    {
      i2 = i >> 1;

      x_1 = int_state.x[i2] >> 16;
      x_2 = x_1 * x_1;
      x_3 = x_2 * x_1;
      x_4 = x_3 * x_1;

      y1 = int_state.coef[i][0] * x_4 +
           int_state.coef[i][1] * x_3 +
           int_state.coef[i][2] * (x_2 + 2 * 32003L * x_1 + 32003L * 32003L) +
           int_state.coef[i][3] * x_1;

      x_1 = int_state.x[i2] & 0xffffL;
      x_2 = x_1 * x_1;
      x_3 = x_2 * x_1;
      x_4 = x_3 * x_1;

      y2 = int_state.coef[i + 1][0] * x_4 +
           int_state.coef[i + 1][1] * x_3 +
           int_state.coef[i + 1][2] * (x_2 + 2 * 32003L * x_1 + 32003L * 32003L) +
           int_state.coef[i + 1][3] * x_1;

      pc = int_state.pc[i2];

      out[i2]  = ((y1 + (pc >> 16)) << 16) |
                 ((y2 + (pc &  0xffffL)) & 0xffffL);

      newx[i2] = (y1 & 0xffff0000L) | (y2 >> 16);
    }

  int_state.x[0] = (newx[0] >> 16) | (newx[3] << 16);
  int_state.x[1] = (newx[0] << 16) | (newx[1] >> 16);
  int_state.x[2] = (newx[1] << 16) | (newx[2] >> 16);
  int_state.x[3] = (newx[2] << 16) | (newx[3] >> 16);
}


#if !defined (ASM_INTEL) && !defined (ASM_UNIX)

/* optimized */
void
encrypt (unsigned long *buf, unsigned buflen)
{
  unsigned char i, j;
  unsigned long t, t3;
  unsigned long w, word, *bufend;

  i  = kt.i;
  j  = kt.j;
  t3 = kt.t3;
  word = buflen;
  bufend = buf + word;
  word = -word;
  w = bufend[word];
  while ((long) word < 0)
    {
      t   = kt.v[128 + j];
      j  += (unsigned char) t3;
      t3  = kt.v[i];
      w  += t;
      i++;
      t   = kt.v[128 + j];
      w  += t;
      t3 += t;
      kt.v[128 + j] = t3;
      j  += (unsigned char) t;
      bufend[word] = w;
      w   = bufend[word + 1];

      t   = kt.v[128 + j];
      j  += (unsigned char) t3;
      t3  = kt.v[i];
      w  += t;
      i++;
      t   = kt.v[128 + j];
      w  += t;
      t3 += t;
      kt.v[128 + j] = t3;
      j  += (unsigned char) t;
      bufend[word + 1] = w;
      w   = bufend[word + 2];

      word += 2;
    }
}

/* optimized */
void
decrypt (unsigned long *buf, unsigned buflen)
{
  unsigned char i, j;
  unsigned long t, t3;
  unsigned long w, word, *bufend;

  i  = kt.i;
  j  = kt.j;
  t3 = kt.t3;
  word = buflen;
  bufend = buf + word;
  word = -word;
  w = bufend[word];
  while ((long) word < 0)
    {
      t   = kt.v[128 + j];
      j  += (unsigned char) t3;
      t3  = kt.v[i];
      w  -= t;
      i++;
      t   = kt.v[128 + j];
      w  -= t;
      t3 += t;
      kt.v[128 + j] = t3;
      j  += (unsigned char) t;
      bufend[word] = w;
      w   = bufend[word + 1];

      t   = kt.v[128 + j];
      j  += (unsigned char) t3;
      t3  = kt.v[i];
      w  -= t;
      i++;
      t   = kt.v[128 + j];
      w  -= t;
      t3 += t;
      kt.v[128 + j] = t3;
      j  += (unsigned char) t;
      bufend[word + 1] = w;
      w   = bufend[word + 2];

      word += 2;
    }
}

#elif defined (ASM_INTEL)

/* Pentium optimized */
void
encrypt (unsigned long *buf, unsigned buflen)
{
  __asm {
         MOV   AL,kt[1536]
         MOV   DL,kt[1537]
         AND   EAX,255
         AND   EDX,255
         MOV   BL,kt[1538]
         MOV   ESI,buf
         MOV   EDI,buflen
         SHL   EDI,2
         ADD   ESI,EDI
         NEG   EDI
         JNS   short LL2
         PUSH  EBP
         MOV   EBP,[ESI+EDI]

  LL1:
         MOV   ECX,dword ptr kt[4*EDX+512]
         ADD   DL,BL
         MOV   EBX,dword ptr kt[4*EAX]
         ADD   EBP,ECX
         INC   AL
         MOV   ECX,dword ptr kt[4*EDX+512]
         ADD   EBP,ECX
         ADD   EBX,ECX
         MOV   dword ptr kt[4*EDX+512],EBX
         ADD   DL,CL
         MOV   [ESI+EDI],EBP
         MOV   EBP,[ESI+EDI+4]

         MOV   ECX,dword ptr kt[4*EDX+512]
         ADD   DL,BL
         MOV   EBX,dword ptr kt[4*EAX]
         ADD   EBP,ECX
         INC   AL
         MOV   ECX,dword ptr kt[4*EDX+512]
         ADD   EBP,ECX
         ADD   EBX,ECX
         MOV   dword ptr kt[4*EDX+512],EBX
         ADD   DL,CL
         MOV   [ESI+EDI+4],EBP
         MOV   EBP,[ESI+EDI+8]

         ADD   EDI,8
         JS    LL1

         POP   EBP
  LL2:
  }
}

/* Pentium optimized */
void
decrypt (unsigned long *buf, unsigned buflen)
{
  __asm {
         MOV   AL,kt[1536]
         MOV   DL,kt[1537]
         AND   EAX,255
         AND   EDX,255
         MOV   BL,kt[1538]
         MOV   ESI,buf
         MOV   EDI,buflen
         SHL   EDI,2
         ADD   ESI,EDI
         NEG   EDI
         JNS   short LL4
         PUSH  EBP
         MOV   EBP,[ESI+EDI]

  LL3:
         MOV   ECX,dword ptr kt[4*EDX+512]
         ADD   DL,BL
         MOV   EBX,dword ptr kt[4*EAX]
         SUB   EBP,ECX
         INC   AL
         MOV   ECX,dword ptr kt[4*EDX+512]
         SUB   EBP,ECX
         ADD   EBX,ECX
         MOV   dword ptr kt[4*EDX+512],EBX
         ADD   DL,CL
         MOV   [ESI+EDI],EBP
         MOV   EBP,[ESI+EDI+4]

         MOV   ECX,dword ptr kt[4*EDX+512]
         ADD   DL,BL
         MOV   EBX,dword ptr kt[4*EAX]
         SUB   EBP,ECX
         INC   AL
         MOV   ECX,dword ptr kt[4*EDX+512]
         SUB   EBP,ECX
         ADD   EBX,ECX
         MOV   dword ptr kt[4*EDX+512],EBX
         ADD   DL,CL
         MOV   [ESI+EDI+4],EBP
         MOV   EBP,[ESI+EDI+8]

         ADD   EDI,8
         JS    LL3

         POP   EBP
  LL4:
  }
}

#elif defined (ASM_UNIX)

/* Pentium optimized */
void
encrypt (unsigned long *buf, unsigned buflen)
{
  asm ("
         pushal
         movb   kt+1536,%al
         movb   kt+1537,%dl
         andl   $255,%eax
         andl   $255,%edx
         movb   kt+1538,%bl
         movl   40(%esp),%esi
         movl   44(%esp),%edi
         shll   $2,%edi
         addl   %edi,%esi
         negl   %edi
         jns    LL2
         movl   (%esi,%edi),%ebp
         nop

  LL1:
         movl   kt+512(,%edx,4),%ecx
         addb   %bl,%dl
         movl   kt(,%eax,4),%ebx
         addl   %ecx,%ebp
         incb   %al
         movl   kt+512(,%edx,4),%ecx
         addl   %ecx,%ebp
         addl   %ecx,%ebx
         movl   %ebx,kt+512(,%edx,4)
         addb   %cl,%dl
         movl   %ebp,(%esi,%edi)
         movl   4(%esi,%edi),%ebp

         movl   kt+512(,%edx,4),%ecx
         addb   %bl,%dl
         movl   kt(,%eax,4),%ebx
         addl   %ecx,%ebp
         incb   %al
         movl   kt+512(,%edx,4),%ecx
         addl   %ecx,%ebp
         addl   %ecx,%ebx
         movl   %ebx,kt+512(,%edx,4)
         addb   %cl,%dl
         movl   %ebp,4(%esi,%edi)
         movl   8(%esi,%edi),%ebp

         addl   $8,%edi
         js     LL1
         
  LL2:
         popal
      ");
}

/* Pentium optimized */
void
decrypt (unsigned long *buf, unsigned buflen)
{
  asm ("
         pushal
         movb   kt+1536,%al
         movb   kt+1537,%dl
         andl   $255,%eax
         andl   $255,%edx
         movb   kt+1538,%bl
         movl   40(%esp),%esi
         movl   44(%esp),%edi
         shll   $2,%edi
         addl   %edi,%esi
         negl   %edi
         jns    LL4
         movl   (%esi,%edi),%ebp
         nop

  LL3:
         movl   kt+512(,%edx,4),%ecx
         addb   %bl,%dl
         movl   kt(,%eax,4),%ebx
         subl   %ecx,%ebp
         incb   %al
         movl   kt+512(,%edx,4),%ecx
         subl   %ecx,%ebp
         addl   %ecx,%ebx
         movl   %ebx,kt+512(,%edx,4)
         addb   %cl,%dl
         movl   %ebp,(%esi,%edi)
         movl   4(%esi,%edi),%ebp

         movl   kt+512(,%edx,4),%ecx
         addb   %bl,%dl
         movl   kt(,%eax,4),%ebx
         subl   %ecx,%ebp
         incb   %al
         movl   kt+512(,%edx,4),%ecx
         subl   %ecx,%ebp
         addl   %ecx,%ebx
         movl   %ebx,kt+512(,%edx,4)
         addb   %cl,%dl
         movl   %ebp,4(%esi,%edi)
         movl   8(%esi,%edi),%ebp

         addl   $8,%edi
         js     LL3

  LL4:
         popal 
      ");
}

#endif
