mpir/mpn/generic/sb_divappr_q.c

153 lines
4.2 KiB
C

/* mpn_sb_divappr_q - schoolbook approximate quotient.
THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
FUTURE GNU MP RELEASE.
Copyright 2009 William Hart.
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
/*
Given n = {np, nn} and d = {dp, dn} and a 2 limb inverse
x = {dip, 2} (with implicit top bit), satisfying
x*d0 < B^4 <= (x+1)*d0 where d0 = {dp + dn - 2, 2} is the
top two limbs of the denominator, returns an approximate
quotient q = {qp, nn - dn + 1} such that d*q + r = n for
some remainder r with -d < r < d.
Requires d = {dp, dn} to be normalised, i.e. the most
significant bit of the most significant limb must be set.
n = {np, nn} is destroyed.
*/
mp_limb_t
mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn, mp_srcptr dip)
{
/*
In order to make use of the the two limb inverse we
use the following theorem of Torbjorn Granlund and
Peter Montgomery from their paper, "Division by
invariant integers using multiplication" (restated
here for clarity):
Lemma 8.1: Let d be normalised, d < B^2 (i.e.
fits in two words), and suppose that
m*d < B^4 <= (m+1)*d.
Let 0 <= n <= B^2*d - 1. Write
n = n2*B^2 + n1*B^2/2 + n0
with n1 = 0 or 1 and n0 < B^2/2.
Suppose
q1*B^2 + q0 = n2*B^2 + (n2 + n1)*(m-B^2)
+ n1*(d-B^2/2) + n0
and 0 <= q0 < B^2.
Then 0 <= q1 < B^2 and 0 <= n - q1*d < 2d.
We apply the theorem as follows. Note that
n0 and n1*(d-B^2/2) are both less than B^2/2.
Also note that n1*(m-B^2) < B^2. Thus the sum
of all these terms contributes at most 1 to q1.
We are left with n2*B^2 + n2*(m-B^2). But note
that (m-B^2) is precisely our precomputed inverse
without the implied leading bit. If we write
q1*B^2 + q0 = n2*B^2 + n2*(m-B^2), then from the
theorem, we have 0 <= n-q1*d < 3d.
*/
mp_limb_t ret, di0, di1, p1, p2, p3, p4, q, q0, n21, n20, cy;
mp_size_t qn = nn - dn + 1;
mp_size_t i;
/*
We only need to use the top qn limbs of the
denominator and the same applies for the
numerator.
*/
if (qn < dn)
{
dp += (dn - qn);
dn = qn;
}
if (qn < nn)
{
np += (nn - qn);
nn = qn;
}
/*
It may be that the top limbs of the numerator
are bigger than the denominator, in which case
we return the high top limb of the quotient as
1 instead of 0.
*/
if (mpn_cmp(np + nn - dn, dp, dn) >= 0)
{
ret = CNST_LIMB(1);
mpn_sub_n(np + nn - dn, np + nn - dn, dp, nn);
} else
ret = CNST_LIMB(0);
di1 = dip[1];
di0 = dip[0];
for (i = qn - 2; i >= 0; i--)
{
/*
Compute n2 + top two limbs of n2*di, but
caring only about the top limb q, which we
allow to be off by up to 1.
*/
n21 = np[nn - 1];
n20 = np[nn - 2];
umul_ppmm(p2, p1, di0, n21);
umul_ppmm(p4, p3, di1, n20);
add_ssaaaa(q, q0, n21, p2, CNST_LIMB(0), p4);
umul_ppmm(p1, p2, di1, n21);
add_ssaaaa(q, q0, q, q0, p1, p2);
add_ssaaaa(q, q0, q, q0, CNST_LIMB(0), n20);
cy = mpn_submul_1(np + nn - dn - 1, dp, dn, q);
/* Either q was correct or too small by 1 */
if ((np[nn-1] > cy) || (mpn_cmp(np + nn - dn - 1, dp, dn) >= 0))
{
q++; /* q can't overflow */
mpn_sub_n(np + nn - dn - 1, np + nn - dn - 1, dp, dn);
}
qp[i] = q;
dn--;
nn--;
}
return ret;
}