mpir/mpn/generic/sb_divappr_q.c
2009-10-12 02:05:16 +00:00

199 lines
5.8 KiB
C

/* mpn_sb_divappr_q - schoolbook approximate quotient.
THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
FUTURE MPIR RELEASE.
Copyright 2009 William Hart.
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
/*
Given n = {np, nn} and d = {dp, dn} and a 2 limb inverse
x = {dip, 2} (with implicit top bit), satisfying
x*d0 < B^4 <= (x+1)*d0 where d0 = {dp + dn - 2, 2} is the
top two limbs of the denominator, returns an approximate
quotient q = {qp, nn - dn + 1} such that d*q + r = n for
some remainder r with -d < r < d.
Requires d = {dp, dn} to be normalised, i.e. the most
significant bit of the most significant limb must be set.
Also requires that d is at least two limbs and the
numerator be at least as many limbs as the denominator
(this may change in a future release).
n = {np, nn} is destroyed.
*/
mp_limb_t
mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn, mp_srcptr dip)
{
/*
In order to make use of the the two limb inverse we
use the following theorem of Torbjorn Granlund and
Peter Montgomery from their paper, "Division by
invariant integers using multiplication" (restated
here for clarity):
Lemma 8.1: Let d be normalised, d < B^2 (i.e.
fits in two words), and suppose that
m*d < B^4 <= (m+1)*d.
Let 0 <= n <= B^2*d - 1. Write
n = n2*B^2 + n1*B^2/2 + n0
with n1 = 0 or 1 and n0 < B^2/2.
Suppose
q1*B^2 + q0 = n2*B^2 + (n2 + n1)*(m-B^2)
+ n1*(d-B^2/2) + n0
and 0 <= q0 < B^2.
Then 0 <= q1 < B^2 and 0 <= n - q1*d < 2d.
We apply the theorem as follows. Note that
n0 and n1*(d-B^2/2) are both less than B^2/2.
Also note that n1*(m-B^2) < B^2. Thus the sum
of all these terms contributes at most 1 to q1.
We are left with n2*B^2 + n2*(m-B^2). But note
that (m-B^2) is precisely our precomputed inverse
without the implied leading bit. If we write
q1*B^2 + q0 = n2*B^2 + n2*(m-B^2), then from the
theorem, we have 0 <= n-q1*d < 3d.
*/
mp_limb_t ret, di0, di1, p1, p2, p3, p4, q, q0, n21, n20, cy;
mp_size_t qn = nn - dn + 1;
mp_size_t i;
mp_limb_t dnpr = 0;
/*
We only need to use the top qn limbs of the
denominator and the same applies for the
numerator. As we correct at each step for the
error from the precomputed inverse, the only
error at the end of the algorithm is from
truncating.
Truncation of the denominator means that at
each step we may be subtracting an amount which
is slightly too small from the numerator to get
the partial remainder at each step. But as we
use a normalised denominator, this can only
cause the quotient to be tipped over and made
one too large.
Truncating the numerator can cause the
quotient to be computed one too small in very
rare instances. We detect this and correct.
*/
if (qn < dn)
{
dp += (dn - qn);
dn = qn;
}
if (qn < nn)
{
np += (nn - qn);
nn = qn;
}
/*
It may be that the top limbs of the numerator
are bigger than the denominator, in which case
we return the high top limb of the quotient as
1 instead of 0.
*/
if (mpn_cmp(np + nn - dn, dp, dn) >= 0)
{
ret = CNST_LIMB(1);
mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);
} else
ret = CNST_LIMB(0);
di1 = dip[1];
di0 = dip[0];
for (i = qn - 2; i >= 0L; i--)
{
/*
Compute n2 + top two limbs of n2*di, but
caring only about the top limb q, which we
allow to be off by up to 1. We must be
careful to truncate the numerator when taking
the quotient.
*/
n21 = np[nn - 1];
n20 = np[nn - 2];
umul_ppmm(p2, p1, di0, n21);
umul_ppmm(p4, p3, di1, n20);
add_ssaaaa(q, q0, n21, p2, CNST_LIMB(0), p4);
umul_ppmm(p1, p2, di1, n21);
add_ssaaaa(q, q0, q, q0, p1, p2);
add_ssaaaa(q, q0, q, q0, CNST_LIMB(0), n20);
cy = mpn_submul_1(np + nn - dn - 1, dp, dn, q);
/* Either q was correct or too small by 1 */
if (UNLIKELY(np[nn-1] < cy))
{
mpn_add_n(np + nn - dn - 1, np + nn - dn - 1, dp, dn);
q--;
} else if ((np[nn-1] > cy) || (mpn_cmp(np + nn - dn - 1, dp, dn) >= 0))
{
q++; /* beware: q *can* overflow - see below */
if (q == 0)
q--;
else
mpn_sub_n(np + nn - dn - 1, np + nn - dn - 1, dp, dn);
}
qp[i] = q;
if (dn > i + 1)
{
dp++;
dn--;
}
nn--;
/* This is a special case which showed up in testing. It
may be that truncating the denominator leads to a quotient
which overflows. As we know that the overflow wouldn't have
occurred before the truncation happened, we can safely just
set all remaining limbs of the quotient to all binary ones.
*/
if (mpn_cmp(np + nn - dn, dp, dn) == 0)
{
i--;
for ( ; i >= 0L; i--) qp[i] = ~CNST_LIMB(0);
break;
}
}
return ret;
}