199 lines
5.8 KiB
C
199 lines
5.8 KiB
C
/* mpn_sb_divappr_q - schoolbook approximate quotient.
|
|
|
|
THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
|
|
INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
|
|
IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
|
|
FUTURE MPIR RELEASE.
|
|
|
|
|
|
Copyright 2009 William Hart.
|
|
|
|
This file is part of the MPIR Library.
|
|
|
|
The MPIR Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The MPIR Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the MPIR Library; see the file COPYING.LIB. If not, write to
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#include "mpir.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
/*
|
|
Given n = {np, nn} and d = {dp, dn} and a 2 limb inverse
|
|
x = {dip, 2} (with implicit top bit), satisfying
|
|
x*d0 < B^4 <= (x+1)*d0 where d0 = {dp + dn - 2, 2} is the
|
|
top two limbs of the denominator, returns an approximate
|
|
quotient q = {qp, nn - dn + 1} such that d*q + r = n for
|
|
some remainder r with -d < r < d.
|
|
|
|
Requires d = {dp, dn} to be normalised, i.e. the most
|
|
significant bit of the most significant limb must be set.
|
|
Also requires that d is at least two limbs and the
|
|
numerator be at least as many limbs as the denominator
|
|
(this may change in a future release).
|
|
|
|
n = {np, nn} is destroyed.
|
|
*/
|
|
|
|
mp_limb_t
|
|
mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
|
|
mp_srcptr dp, mp_size_t dn, mp_srcptr dip)
|
|
{
|
|
/*
|
|
In order to make use of the the two limb inverse we
|
|
use the following theorem of Torbjorn Granlund and
|
|
Peter Montgomery from their paper, "Division by
|
|
invariant integers using multiplication" (restated
|
|
here for clarity):
|
|
|
|
Lemma 8.1: Let d be normalised, d < B^2 (i.e.
|
|
fits in two words), and suppose that
|
|
m*d < B^4 <= (m+1)*d.
|
|
Let 0 <= n <= B^2*d - 1. Write
|
|
n = n2*B^2 + n1*B^2/2 + n0
|
|
with n1 = 0 or 1 and n0 < B^2/2.
|
|
Suppose
|
|
q1*B^2 + q0 = n2*B^2 + (n2 + n1)*(m-B^2)
|
|
+ n1*(d-B^2/2) + n0
|
|
and 0 <= q0 < B^2.
|
|
Then 0 <= q1 < B^2 and 0 <= n - q1*d < 2d.
|
|
|
|
We apply the theorem as follows. Note that
|
|
n0 and n1*(d-B^2/2) are both less than B^2/2.
|
|
Also note that n1*(m-B^2) < B^2. Thus the sum
|
|
of all these terms contributes at most 1 to q1.
|
|
|
|
We are left with n2*B^2 + n2*(m-B^2). But note
|
|
that (m-B^2) is precisely our precomputed inverse
|
|
without the implied leading bit. If we write
|
|
q1*B^2 + q0 = n2*B^2 + n2*(m-B^2), then from the
|
|
theorem, we have 0 <= n-q1*d < 3d.
|
|
*/
|
|
|
|
mp_limb_t ret, di0, di1, p1, p2, p3, p4, q, q0, n21, n20, cy;
|
|
mp_size_t qn = nn - dn + 1;
|
|
mp_size_t i;
|
|
mp_limb_t dnpr = 0;
|
|
|
|
/*
|
|
We only need to use the top qn limbs of the
|
|
denominator and the same applies for the
|
|
numerator. As we correct at each step for the
|
|
error from the precomputed inverse, the only
|
|
error at the end of the algorithm is from
|
|
truncating.
|
|
|
|
Truncation of the denominator means that at
|
|
each step we may be subtracting an amount which
|
|
is slightly too small from the numerator to get
|
|
the partial remainder at each step. But as we
|
|
use a normalised denominator, this can only
|
|
cause the quotient to be tipped over and made
|
|
one too large.
|
|
|
|
Truncating the numerator can cause the
|
|
quotient to be computed one too small in very
|
|
rare instances. We detect this and correct.
|
|
*/
|
|
|
|
if (qn < dn)
|
|
{
|
|
dp += (dn - qn);
|
|
dn = qn;
|
|
}
|
|
|
|
if (qn < nn)
|
|
{
|
|
np += (nn - qn);
|
|
nn = qn;
|
|
}
|
|
|
|
/*
|
|
It may be that the top limbs of the numerator
|
|
are bigger than the denominator, in which case
|
|
we return the high top limb of the quotient as
|
|
1 instead of 0.
|
|
*/
|
|
|
|
if (mpn_cmp(np + nn - dn, dp, dn) >= 0)
|
|
{
|
|
ret = CNST_LIMB(1);
|
|
mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);
|
|
} else
|
|
ret = CNST_LIMB(0);
|
|
|
|
di1 = dip[1];
|
|
di0 = dip[0];
|
|
for (i = qn - 2; i >= 0L; i--)
|
|
{
|
|
/*
|
|
Compute n2 + top two limbs of n2*di, but
|
|
caring only about the top limb q, which we
|
|
allow to be off by up to 1. We must be
|
|
careful to truncate the numerator when taking
|
|
the quotient.
|
|
*/
|
|
n21 = np[nn - 1];
|
|
n20 = np[nn - 2];
|
|
umul_ppmm(p2, p1, di0, n21);
|
|
umul_ppmm(p4, p3, di1, n20);
|
|
add_ssaaaa(q, q0, n21, p2, CNST_LIMB(0), p4);
|
|
umul_ppmm(p1, p2, di1, n21);
|
|
add_ssaaaa(q, q0, q, q0, p1, p2);
|
|
add_ssaaaa(q, q0, q, q0, CNST_LIMB(0), n20);
|
|
|
|
cy = mpn_submul_1(np + nn - dn - 1, dp, dn, q);
|
|
|
|
/* Either q was correct or too small by 1 */
|
|
if (UNLIKELY(np[nn-1] < cy))
|
|
{
|
|
mpn_add_n(np + nn - dn - 1, np + nn - dn - 1, dp, dn);
|
|
q--;
|
|
} else if ((np[nn-1] > cy) || (mpn_cmp(np + nn - dn - 1, dp, dn) >= 0))
|
|
{
|
|
q++; /* beware: q *can* overflow - see below */
|
|
if (q == 0)
|
|
q--;
|
|
else
|
|
mpn_sub_n(np + nn - dn - 1, np + nn - dn - 1, dp, dn);
|
|
}
|
|
|
|
qp[i] = q;
|
|
|
|
if (dn > i + 1)
|
|
{
|
|
dp++;
|
|
dn--;
|
|
}
|
|
|
|
nn--;
|
|
|
|
/* This is a special case which showed up in testing. It
|
|
may be that truncating the denominator leads to a quotient
|
|
which overflows. As we know that the overflow wouldn't have
|
|
occurred before the truncation happened, we can safely just
|
|
set all remaining limbs of the quotient to all binary ones.
|
|
*/
|
|
if (mpn_cmp(np + nn - dn, dp, dn) == 0)
|
|
{
|
|
i--;
|
|
for ( ; i >= 0L; i--) qp[i] = ~CNST_LIMB(0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|