/*

  sshmp-montgomery.c

  Author: Mika Kojo <mkojo@ssh.fi>

  Copyright (c) 2000, 2001 SSH Communications Security Corp
  All rights reserved.

  Created Sat Feb 17 21:10:09 2001.

  */

#include "sshincludes.h"
#include "sshmp.h"

/* Montgomery representation implementation. The modulus is required
   to be odd. */

/* Very quick initialization! */
Boolean ssh_mpmzm_init_ideal(SshMPMontIntIdeal m, SshMPIntegerConst op)
{
  unsigned int temp_n;

  /* If op < 3 or op % 2 == 0 we cannot work in Montgomery
     representation. */
  if (ssh_mp_cmp_ui(op, 3) < 0 || (ssh_mp_get_ui(op) & 0x1) == 0)
    return FALSE;

  /* Clean. */
  memset(m, 0, sizeof(*m));

  /* Compute mp = -op^-1 (mod 2^SSH_WORD_BITS).
   */
  m->mp = SSH_WORD_NEGATE(ssh_mpmk_small_inv(op->v[0]));

  /* Set the modulus up, also in normalized form. */
  m->m = ssh_xmalloc(sizeof(SshWord) * (op->n + op->n));
  m->d = m->m + op->n;
  m->m_n = op->n;
  ssh_mpk_memcopy(m->m, op->v, m->m_n);
  ssh_mpk_memcopy(m->d, op->v, m->m_n);
  m->shift = ssh_mpk_leading_zeros(m->d, m->m_n);
  ssh_mpk_shift_up_bits(m->d, m->m_n, m->shift);

#ifdef SSHMATH_USE_WORKSPACE
  /* Determine how much memory we want to keep in reserve as working
     space. */
  temp_n =
    ssh_mpk_square_karatsuba_needed_memory(m->m_n);
  m->karatsuba_work_space_n =
    ssh_mpk_mul_karatsuba_needed_memory(m->m_n, m->m_n);
  if (m->karatsuba_work_space_n < temp_n)
    m->karatsuba_work_space_n = temp_n;
  /* Note that it is still possible that no extra memory is needed! */
  if (m->karatsuba_work_space_n)
    m->karatsuba_work_space = ssh_xmalloc(sizeof(SshWord) * 
                                          m->karatsuba_work_space_n);
  else
    m->karatsuba_work_space = NULL;
  
  /* Now allocate the extra higher level working space. */
  
  /* The amount of memory for multiplication and squaring! */
  m->work_space_n = (m->m_n * 2 + 1) * 2;
  m->work_space   = ssh_xmalloc(sizeof(SshWord) * m->work_space_n);
  
#else /* SSHMATH_USE_WORKSPACE */
  m->karatsuba_work_space   = NULL;
  m->karatsuba_work_space_n = 0;
  m->work_space             = NULL;
  m->work_space_n           = 0;
#endif /* SSHMATH_USE_WORKSPACE */
  
  return TRUE;
}

/* Clean up the used moduli space. */
void ssh_mpmzm_clear_ideal(SshMPMontIntIdeal m)
{
  /* Free. */
  ssh_xfree(m->m);
  ssh_xfree(m->karatsuba_work_space);
  ssh_xfree(m->work_space);

  /* Clean. */
  memset(m, 0, sizeof(*m));
}

void ssh_mprz_set_mpmzm_ideal(SshMPInteger ret, SshMPMontIntIdealConst m)
{
  ssh_mp_realloc(ret, m->m_n);
  if (!ssh_mprz_isnan(ret))
    {
      ssh_mpk_memcopy(ret->v, m->m, m->m_n);
      ret->n = m->m_n;
    }
  /* Our moduli cannot be negative! */
  SSH_MP_NO_SIGN(ret);
}

void ssh_mpmzm_init(SshMPMontIntMod op, SshMPMontIntIdealConst m)
{
  op->n = 0;
  op->v = ssh_xmalloc(sizeof(SshWord) * (m->m_n + 1));
  op->m = m;
}

void ssh_mpmzm_init_inherit(SshMPMontIntMod op1,
                            SshMPMontIntModConst op2)
{
  op1->n = 0;
  op1->v = ssh_xmalloc(sizeof(SshWord) * (op2->m->m_n + 1));
  op1->m = op2->m;
}

void ssh_mpmzm_clear(SshMPMontIntMod op)
{
  ssh_xfree(op->v);
  op->n = 0;
  op->m = NULL;
}

Boolean ssh_mpmzm_set(SshMPMontIntMod ret, SshMPMontIntModConst op)
{
  if (op->m != ret->m)
    return FALSE;   
  
  if (ret == op)
    return TRUE;

  if (op->n == 0)
    {
      ret->n = 0;
      return TRUE;
    }
  ssh_mpk_memcopy(ret->v, op->v, op->n);
  ret->n = op->n;
  return TRUE;
}

void ssh_mpmzm_set_mprz(SshMPMontIntMod ret, SshMPIntegerConst op)
{
  SSH_MP_WORKSPACE_DEFINE;
  SshWord *t;
  unsigned int t_n;

  /* Trivial case. */
  if (op->n == 0)
    {
      /* Return zero also. */
      ret->n = 0;
      return;
    }

  /* If the input op != 0 then we will necessarily need some modular
     reduction. Thus the following doesn't need checks for the size
     of the input. */

  /* Compute R*op = ret (mod m) */

  /* Allocate some temporary space. */
  SSH_MP_WORKSPACE_ALLOC(t, (op->n + 1 + ret->m->m_n));

  /* Multiply by R the remainder. */
  ssh_mpk_memzero(t, ret->m->m_n);
  ssh_mpk_memcopy(t + ret->m->m_n, op->v, op->n);
  t_n = op->n + ret->m->m_n + 1;
  t[t_n - 1] = 0;

  /* Normalize. */
  ssh_mpk_shift_up_bits(t + ret->m->m_n, op->n + 1, ret->m->shift);

  /* Validate that length is correct. */
  if (t[t_n - 1] == 0)
    t_n--;

  /* Modular operations. */
  ssh_mpk_mod(t, t_n, ret->m->d, ret->m->m_n);

  /* Denormalize the remainder. */
  ssh_mpk_shift_down_bits(t, ret->m->m_n, ret->m->shift);

  /* Compute exact size. */
  t_n = ret->m->m_n;
  while (t_n && t[t_n - 1] == 0)
    t_n--;

  /* Copy into ret. */
  ssh_mpk_memcopy(ret->v, t, t_n);
  ret->n = t_n;

  SSH_MP_WORKSPACE_FREE(t);
}

Boolean ssh_mpmzm_set_ui(SshMPMontIntMod ret, SshWord u)
{
  /* Do zeroing fast. */
  if (u == 0)
    {
      ret->n = 0;
      return TRUE;
    }
  else
    {
      SshMPIntegerStruct mp;
      /* This is slow, and unoptimized. Most of the time you
         don't need to do this. */
      ssh_mp_init(&mp);
      ssh_mp_set_ui(&mp, u);
      ssh_mpmzm_set_mprz(ret, &mp);
      ssh_mp_clear(&mp);
    }
  return TRUE;
}

void ssh_mprz_set_mpmzm(SshMPInteger ret, SshMPMontIntModConst op)
{
  SSH_MP_WORKSPACE_DEFINE;
  SshWord *t;
  unsigned int t_n;

  /* Allocate enough space for reduction to happen. */
  t_n = op->m->m_n * 2 + 1;
  SSH_MP_WORKSPACE_ALLOC(t, t_n);
  ssh_mpk_memzero(t, t_n);

  /* Reduce. */
  ssh_mpmk_reduce(t, t_n,
                  op->v, op->n,
                  op->m->mp,
                  op->m->m, op->m->m_n);

  /* Compute exact length. */
  t_n = op->m->m_n;
  while (t_n && t[t_n - 1] == 0)
    t_n--;

  /* Copy the result into ret. */
  ssh_mp_realloc(ret, t_n);
  if (!ssh_mprz_isnan(ret))
    {
      ssh_mpk_memcopy(ret->v, t, t_n);
      ret->n = t_n;
    }

  /* Free temporary storage. */
  SSH_MP_WORKSPACE_FREE(t);

  SSH_MP_NO_SIGN(ret);
}

/* This is a simple wrapper but rather useful in many occasions. */
int ssh_mpmzm_cmp(SshMPMontIntModConst op1,
                  SshMPMontIntModConst op2)
{
  if (op1->m != op2->m)
    return 1;
  
  return ssh_mpk_cmp(op1->v, op1->n, op2->v, op2->n);
}

/* Simple wrapper. */
int ssh_mpmzm_cmp_ui(SshMPMontIntModConst op, SshWord u)
{
  return ssh_mpk_cmp_ui(op->v, op->n, u);
}

/* Addition is easy with Montgomery representation. */
Boolean ssh_mpmzm_add(SshMPMontIntMod ret, SshMPMontIntModConst op1,
                   SshMPMontIntModConst op2)
{
  SshWord c;

  if (op1->m != op2->m || op1->m != ret->m)
    return FALSE;

  if (op1->n < op2->n)
    {
      SshMPMontIntModConst t;
      t = op1;
      op1 = op2;
      op2 = t;
    }

  /* Perform the addition. */
  c = ssh_mpk_add(ret->v, op1->v, op1->n, op2->v, op2->n);
  if (c)
    {
      ret->v[op1->n] = c;
      ret->n = op1->n + 1;
    }
  else
    ret->n = op1->n;

  /* Do modular reduction. */
  if (ssh_mpk_cmp(ret->v, ret->n, ret->m->m, ret->m->m_n) > 0)
    {
      ssh_mpk_sub(ret->v, ret->v, ret->n, ret->m->m, ret->m->m_n);
      while (ret->n && ret->v[ret->n - 1] == 0)
        ret->n--;
    }
  return TRUE;
}

/* Subtraction is a bit more difficult. */
Boolean ssh_mpmzm_sub(SshMPMontIntMod ret, SshMPMontIntModConst op1,
                      SshMPMontIntModConst op2)
{

  if (op1->m != op2->m || op1->m != ret->m)
    return FALSE;

  if (ssh_mpk_cmp(op1->v, op1->n, op2->v, op2->n) >= 0)
    {
      ssh_mpk_sub(ret->v, op1->v, op1->n, op2->v, op2->n);
      ret->n = op1->n;
      while (ret->n && ret->v[ret->n - 1] == 0)
        ret->n--;
    }
  else
    {
      ssh_mpk_sub(ret->v, op2->v, op2->n, op1->v, op1->n);
      ret->n = op2->n;
      while (ret->n && ret->v[ret->n - 1] == 0)
        ret->n--;

      /* Do modular reduction. */
      ssh_mpk_sub(ret->v, ret->m->m, ret->m->m_n, ret->v, ret->n);
      ret->n = ret->m->m_n;
      while (ret->n && ret->v[ret->n - 1] == 0)
        ret->n--;
    }
  return TRUE;
}

Boolean ssh_mpmzm_mul(SshMPMontIntMod ret, SshMPMontIntModConst op1,
                   SshMPMontIntModConst op2)
{
  SSH_MP_WORKSPACE_DEFINE;
  SshWord *t, *r;
  unsigned int t_n, r_n;

  if (op1->m != op2->m || op1->m != ret->m)
    return FALSE;

  if (op1->n == 0 || op2->n == 0)
    {
      ret->n = 0;
      return TRUE;
    }

  /* Allocate some temporary space. */
  t_n = op1->n + op2->n + 1;
  r_n = ret->m->m_n*2 + 1;
  if (ret->m->work_space == NULL)
    {
      /* Use the stack based workspace if possible. */
      SSH_MP_WORKSPACE_ALLOC(t, t_n + r_n);
    }
  else
    t = ret->m->work_space;
  r = t + t_n;

  /* Clear temporary space. */
  ssh_mpk_memzero(t, t_n);
  ssh_mpk_mul_karatsuba(t, t_n, op1->v, op1->n, op2->v, op2->n,
                        ret->m->karatsuba_work_space,
                        ret->m->karatsuba_work_space_n);

  /* Find the exact length. */
  while (t_n && t[t_n - 1] == 0)
    t_n--;

  /* Do the reduction step. */
  ssh_mpk_memzero(r, r_n);
  ssh_mpmk_reduce(r, r_n,
                  t, t_n,
                  ret->m->mp,
                  ret->m->m, ret->m->m_n);

  /* Compute exact length. */
  r_n = ret->m->m_n;
  while (r_n && r[r_n - 1] == 0)
    r_n--;

  /* Copy to destination. */
  ssh_mpk_memcopy(ret->v, r, r_n);
  ret->n = r_n;

  /* Free temporary storage. */
  if (ret->m->work_space == NULL)
    {
      SSH_MP_WORKSPACE_FREE(t);
    }
  return TRUE;
}

/* This should work, because op = x*R (mod m) and we can just
   compute op*u = x*R*u (mod m) as before. This should be much
   faster than standard multiplication. */
Boolean ssh_mpmzm_mul_ui(SshMPMontIntMod ret, SshMPMontIntModConst op,
                      SshWord u)
{
  SSH_MP_WORKSPACE_DEFINE;
  SshWord *t;
  int t_n;

  if (op->m != ret->m)
    return FALSE;

  /* Handle the trivial case. */
  if (op->n == 0 || u == 0)
    {
      ret->n = 0;
      return TRUE;
    }

  /* Another trivial case. */
  if (u == 1)
    {
      ssh_mpmzm_set(ret, op);
      return TRUE;
    }

  /* Multiply first. */
  t_n = op->n + 2;
  if (ret->m->work_space == NULL)
    {
      SSH_MP_WORKSPACE_ALLOC(t, t_n);
    }
  else
    t = ret->m->work_space;
  ssh_mpk_memzero(t, t_n);
  ssh_mpk_mul_ui(t, op->v, op->n, u);

  /* Correct the size. */
  while (t_n && t[t_n - 1] == 0)
    t_n--;

  /* Do a compare, which determines whether the modular reduction
     is necessary. */
  if (ssh_mpk_cmp(t, t_n, ret->m->m, ret->m->m_n) >= 0)
    {
      /* Allow growing a bit. */
      t_n ++;

      /* Now reduce (mod m). */

      /*The normalization first. */
      ssh_mpk_shift_up_bits(t, t_n, ret->m->shift);

      /* Check the size again. */
      while (t_n && t[t_n - 1] == 0)
        t_n--;

      /* Reduction function. */
      ssh_mpk_mod(t, t_n, ret->m->d, ret->m->m_n);
      t_n = ret->m->m_n;

      ssh_mpk_shift_down_bits(t, t_n, ret->m->shift);

      /* Correct the size. */
      while (t_n && t[t_n - 1] == 0)
        t_n--;
    }

  ssh_mpk_memcopy(ret->v, t, t_n);
  ret->n = t_n;

  /* Free if necessary. */
  if (ret->m->work_space == NULL)
    {
      SSH_MP_WORKSPACE_FREE(t);
    }
  return TRUE;
}

Boolean ssh_mpmzm_square(SshMPMontIntMod ret, SshMPMontIntModConst op)
{
  SSH_MP_WORKSPACE_DEFINE;
  SshWord *t, *r;
  unsigned int t_n, r_n;

  if (op->m != ret->m)
    return FALSE;

  if (op->n == 0)
    {
      ret->n = 0;
      return TRUE;
    }

  /* Allocate some temporary space. */
  t_n = op->n*2 + 1;
  r_n = ret->m->m_n*2 + 1;
  if (ret->m->work_space == NULL)
    {
      SSH_MP_WORKSPACE_ALLOC(t, t_n + r_n);
    }
  else
    t = ret->m->work_space;
  r = t + t_n;

  /* Clear temporary space. */
  ssh_mpk_memzero(t, t_n + r_n);
  ssh_mpk_square_karatsuba(t, t_n, op->v, op->n,
                           ret->m->karatsuba_work_space,
                           ret->m->karatsuba_work_space_n);

  /* Find the exact length. */
  while (t_n && t[t_n - 1] == 0)
    t_n--;

  /* Do the reduction step. */
  ssh_mpk_memzero(r, r_n);
  ssh_mpmk_reduce(r, r_n,
                  t, t_n,
                  ret->m->mp,
                  ret->m->m, ret->m->m_n);

  /* Compute exact length. */
  r_n = ret->m->m_n;
  while (r_n && r[r_n - 1] == 0)
    r_n--;

  /* Copy to destination. */
  ssh_mpk_memcopy(ret->v, r, r_n);
  ret->n = r_n;

  /* Free temporary storage. */
  if (ret->m->work_space == NULL)
    {
      SSH_MP_WORKSPACE_FREE(t);
    }
  return TRUE;
}

Boolean ssh_mpmzm_mul_2exp(SshMPMontIntMod ret, SshMPMontIntModConst op,
                        unsigned int exp)
{
  SSH_MP_WORKSPACE_DEFINE;
  unsigned int k;
  SshWord *t;
  int t_n, max;

  if (op->m != ret->m)
    return FALSE;

  /* Check if no need to to anything. */
  if (op->n == 0)
    {
      ret->n = 0;
      return TRUE;
    }

  /* Handle some special number of bits here. */
  if (exp == 0)
    {
      ssh_mpmzm_set(ret, op);
      return TRUE;
    }

  if (exp < SSH_WORD_BITS)
    {
      t_n = op->n + 2;
      if (ret->m->work_space == NULL)
        {
          SSH_MP_WORKSPACE_ALLOC(t, t_n);
        }
      else
        t = ret->m->work_space;

      /* Copy to ret. */
      ssh_mpk_memcopy(t, op->v, op->n);
      /* This can be done, because ret has always one extra word. */
      t[op->n] = 0;
      ssh_mpk_shift_up_bits(t, op->n + 1, exp);
      t_n = op->n + 1;
      /* Figure out the correct length. */
      while (t_n && t[t_n - 1] == 0)
        t_n--;
      /* Check if reduction is necessary. */
      if (ssh_mpk_cmp(t, t_n, ret->m->m, ret->m->m_n) >= 0)
        {
          /* Do some additional operations. */
          t[t_n] = 0;
          ssh_mpk_shift_up_bits(t, t_n + 1, ret->m->shift);
          t_n++;
          while (t_n && t[t_n - 1] == 0)
            t_n--;
          /* Perform the reduction. */
          ssh_mpk_mod(t, t_n, ret->m->d, ret->m->m_n);
          t_n = ret->m->m_n;
          ssh_mpk_shift_down_bits(t, t_n, ret->m->shift);
          /* Figure out the correct size. */
          while (t_n && t[t_n - 1] == 0)
            t_n--;
        }

      /* Copy to the ret. */
      ssh_mpk_memcopy(ret->v, t, t_n);
      ret->n = t_n;

      if (ret->m->work_space == NULL)
        {
          SSH_MP_WORKSPACE_FREE(t);
        }

      return TRUE;
    }

  /* Compute the maximum number of suitable bits. */
  max = ret->m->m_n * SSH_WORD_BITS;

  for (; exp; )
    {
      int bits;

      if (exp > max)
        {
          bits = max;
          exp -= max;
        }
      else
        {
          bits = exp;
          exp  = 0;
        }

      /* The standard way of doing the same thing. */
      bits += ret->m->shift;
      k = bits / SSH_WORD_BITS;
      bits %= SSH_WORD_BITS;

      /* Allocate new space. */
      t_n = k + 2 + op->n;
      if (ret->m->work_space == NULL)
        {
          SSH_MP_WORKSPACE_ALLOC(t, t_n);
        }
      else
        t = ret->m->work_space;

      /* Move from op to ret. */
      ssh_mpk_memzero(t, t_n);
      ssh_mpk_memcopy(t + k, op->v, op->n);
      ssh_mpk_shift_up_bits(t + k, op->n + 1, bits);

      /* Figure out the correct size here. */
      while (t_n && t[t_n - 1] == 0)
        t_n--;

      /* Compute the modulus. */
      if (ssh_mpk_cmp(t, t_n, ret->m->d, ret->m->m_n) >= 0)
        {
          ssh_mpk_mod(t, t_n, ret->m->d, ret->m->m_n);
          t_n = ret->m->m_n;
        }
      ssh_mpk_shift_down_bits(t, t_n, ret->m->shift);

      /* Figure out the correct size. */
      while (t_n && t[t_n - 1] == 0)
        t_n--;

      /* Now copy to the ret. */
      ssh_mpk_memcopy(ret->v, t, t_n);
      ret->n = t_n;

      if (ret->m->work_space == NULL)
        {
          SSH_MP_WORKSPACE_FREE(t);
        }
    }
  return TRUE;
}

Boolean ssh_mpmzm_div_2exp(SshMPMontIntMod ret, SshMPMontIntModConst op,
                        unsigned int exp)
{
  unsigned int i;
  SshWord c;

  if (op->m != ret->m)
    return FALSE;

  /* Handle trivial cases first. */
  if (op->n == 0)
    {
      ret->n = 0;
      return TRUE;
    }

  if (exp == 0)
    {
      ssh_mpmzm_set(ret, op);
      return TRUE;
    }

  /* Now handle the main iteration, notice that dividing by very
     large values this way isn't fast! */

  /* Set up the return integer. */
  ssh_mpmzm_set(ret, op);
  if (ret->m->m_n + 1 - ret->n)
    ssh_mpk_memzero(ret->v + ret->n, ret->m->m_n + 1 - ret->n);

  /* Loop until done, might take a while. */
  for (i = 0; i < exp; i++)
    {
      if (ret->v[0] & 0x1)
        {
          if (ret->n < ret->m->m_n)
            ret->n = ret->m->m_n;
          c = ssh_mpk_add(ret->v, ret->v, ret->n, ret->m->m, ret->m->m_n);
          if (c)
            {
              ret->v[ret->n] = c;
              ret->n++;
            }
        }
      ssh_mpk_shift_down_bits(ret->v, ret->n, 1);
      while (ret->n && ret->v[ret->n - 1] == 0)
        ret->n--;
    }
  return TRUE;
}

/* This will be needed in some future time. E.g. when writing fast
   polynomial arithmetic modulo large integer. Although, one should
   then also implement some other routines which would be of lots of
   use. */
Boolean ssh_mpmzm_invert(SshMPMontIntMod ret, SshMPMontIntModConst op)
{
  SshMPIntegerStruct t, q;
  Boolean rv;

  if (op->m != ret->m)
    return FALSE;

  ssh_mprz_init(&t);
  ssh_mprz_init(&q);
  /* Convert into basic integers. */
  ssh_mprz_set_mpmzm(&t, op);
  ssh_mprz_set_mpmzm_ideal(&q, ret->m);
  rv = ssh_mprz_invert(&t, &t, &q);
  ssh_mpmzm_set_mprz(ret, &t);
  ssh_mprz_clear(&t);
  ssh_mprz_clear(&q);
  return rv;
}

Boolean ssh_mpmzm_sqrt(SshMPMontIntMod ret, SshMPMontIntModConst op)
{
  SshMPIntegerStruct t, q;
  Boolean rv;

  if (op->m != ret->m)
    return FALSE;

  ssh_mprz_init(&t);
  ssh_mprz_init(&q);
  /* Convert into basic integers. */
  ssh_mprz_set_mpmzm(&t, op);
  ssh_mprz_set_mpmzm_ideal(&q, ret->m);
  rv = ssh_mprz_mod_sqrt(&t, &t, &q);
  ssh_mpmzm_set_mprz(ret, &t);
  ssh_mprz_clear(&t);
  ssh_mprz_clear(&q);
  return rv;
}

Boolean ssh_mpmzm_pow_ui(SshMPMontIntMod ret, SshWord g, SshMPIntegerConst e)
{
  SshMPMontIntModStruct temp, x;
  unsigned int bits, i;

  /* Trivial cases. */
  if (ssh_mprz_cmp_ui(e, 0) == 0)
    {
      ssh_mpmzm_set_ui(ret, 1);
      return TRUE;
    }

  if (ssh_mprz_cmp_ui(e, 1) == 0)
    {
      ssh_mpmzm_set_ui(ret, g);
      return TRUE;
    }

  ssh_mpmzm_init_inherit(&temp, ret);
  ssh_mpmzm_init_inherit(&x,    ret);

  ssh_mpmzm_set_ui(&x, g);
  ssh_mpmzm_set(&temp, &x);

  /* Compute the size of the exponent. */
  bits = ssh_mpk_size_in_bits(e->v, e->n);

  for (i = bits - 1; i; i--)
    {
      ssh_mpmzm_square(&temp, &temp);
      if (ssh_mprz_get_bit(e, i - 1))
        ssh_mpmzm_mul_ui(&temp, &temp, g);
    }

  ssh_mpmzm_set(ret, &temp);
  ssh_mpmzm_clear(&temp);
  ssh_mpmzm_clear(&x);

  return TRUE;
}

Boolean ssh_mpmzm_pow(SshMPMontIntMod ret, SshMPMontIntModConst g,
                      SshMPIntegerConst e)
{
  SshMPMontIntModStruct temp, x;
  unsigned int table_bits, table_size;
  SshMPMontIntMod table;
  unsigned int bits, i, j, mask, end_square, first;
  unsigned int tab[] =
  { 24, 88, 277, 798, 2173, 5678, 14373, 0 };

  if (ret->m != g->m)
    return FALSE;

  /* Trivial cases. */
  if (ssh_mprz_cmp_ui(e, 0) == 0)
    {
      ssh_mpmzm_set_ui(ret, 1);
      return TRUE;
    }

  if (ssh_mprz_cmp_ui(e, 1) == 0)
    {
      ssh_mpmzm_set(ret, g);
      return TRUE;
    }

  ssh_mpmzm_init_inherit(&temp, ret);
  ssh_mpmzm_init_inherit(&x,    ret);

  /* Initialize the generator (in Montgomery representation). */
  ssh_mpmzm_set(&x, g);

  /* Compute the size of the exponent. */
  bits = ssh_mpk_size_in_bits(e->v, e->n);

  /* Select a reasonable window size. */
  for (i = 0; tab[i]; i++)
    {
      if (bits < tab[i])
        break;
    }
  table_bits = i + 2;
  table_size = ((SshWord)1 << (table_bits - 1));

  /* Allocate the table. */
  table = ssh_xmalloc(sizeof(SshMPMontIntModStruct) * table_size);

  /* Start computing the table. */
  ssh_mpmzm_init_inherit(&table[0], ret);
  ssh_mpmzm_set(&table[0], &x);

  /* Compute g^2 into temp. */
  ssh_mpmzm_set(&temp, &table[0]);
  ssh_mpmzm_square(&temp, &temp);

  /* Compute the small table of powers. */
  for (i = 1; i < table_size; i++)
    {
      ssh_mpmzm_init_inherit(&table[i], ret);
      ssh_mpmzm_mul(&table[i], &table[i - 1], &temp);
    }

  for (first = 1, i = bits; i;)
    {
      for (j = 0, mask = 0; j < table_bits && i; j++, i--)
        {
          mask <<= 1;
          mask |= ssh_mprz_get_bit(e, i - 1);
        }

      for (end_square = 0; (mask & 0x1) == 0;)
        {
          mask >>= 1;
          end_square++;
        }

      if (!first)
        {
          /* First square. */
          for (j = mask; j; j >>= 1)
            ssh_mpmzm_square(&temp, &temp);

          ssh_mpmzm_mul(&temp, &temp, &table[(mask - 1)/2]);
        }
      else
        {
          ssh_mpmzm_set(&temp, &table[(mask - 1)/2]);
          first = 0;
        }

      /* Get rid of zero bits... */
      while (end_square)
        {
          ssh_mpmzm_square(&temp, &temp);
          end_square--;
        }

      while (i && ssh_mprz_get_bit(e, i - 1) == 0)
        {
          ssh_mpmzm_square(&temp, &temp);
          i--;
        }
    }

  /* Clear and free the table. */
  for (i = 0; i < table_size; i++)
    ssh_mpmzm_clear(&table[i]);
  ssh_xfree(table);

  ssh_mpmzm_set(ret, &temp);

  ssh_mpmzm_clear(&temp);
  ssh_mpmzm_clear(&x);

  return TRUE;
}

/* sshmp-montgomery.c */

