dnl Generic x86 mpn_mod_34lsub1 -- mpn remainder modulo 2^24-1. dnl Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc. dnl dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or dnl modify it under the terms of the GNU Lesser General Public License as dnl published by the Free Software Foundation; either version 2.1 of the dnl License, or (at your option) any later version. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, dnl but WITHOUT ANY WARRANTY; without even the implied warranty of dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU dnl Lesser General Public License for more details. dnl dnl You should have received a copy of the GNU Lesser General Public dnl License along with the GNU MP Library; see the file COPYING.LIB. If dnl not, write to the Free Software Foundation, Inc., 51 Franklin Street, dnl Fifth Floor, Boston, MA 02110-1301, USA. include(`../config.m4') C cycles/limb C P5: 3.0 C P6: 3.66 C K6: 3.0 C K7: 1.3 C P4: 9 C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size) C defframe(PARAM_SIZE, 8) defframe(PARAM_SRC, 4) dnl re-use parameter space define(SAVE_EBX, `PARAM_SRC') TEXT ALIGN(16) PROLOGUE(mpn_mod_34lsub1) deflit(`FRAME',0) movl PARAM_SIZE, %ecx movl PARAM_SRC, %edx subl $2, %ecx ja L(three_or_more) movl (%edx), %eax jb L(one) movl 4(%edx), %ecx movl %eax, %edx shrl $24, %eax C src[0] low andl $0xFFFFFF, %edx C src[0] high addl %edx, %eax movl %ecx, %edx andl $0xFFFF, %ecx shrl $16, %edx C src[1] high addl %edx, %eax shll $8, %ecx C src[1] low addl %ecx, %eax L(one): ret L(three_or_more): C eax C ebx C ecx size-2 C edx src C esi C edi C ebp movl %ebx, SAVE_EBX C and arrange 16-byte loop alignment xorl %ebx, %ebx pushl %esi FRAME_pushl() xorl %esi, %esi pushl %edi FRAME_pushl() xorl %eax, %eax C and clear carry flag C offset 0x40 here L(top): C eax acc 0mod3 C ebx acc 1mod3 C ecx counter, limbs C edx src C esi acc 2mod3 C edi C ebp leal 12(%edx), %edx leal -2(%ecx), %ecx adcl -12(%edx), %eax adcl -8(%edx), %ebx adcl -4(%edx), %esi decl %ecx jg L(top) C ecx is -2, -1 or 0 representing 0, 1 or 2 more limbs, respectively movl $0xFFFFFFFF, %edi incl %ecx js L(combine) adcl (%edx), %eax movl $0xFFFFFF00, %edi decl %ecx js L(combine) adcl 4(%edx), %ebx movl $0xFFFF0000, %edi L(combine): C eax acc 0mod3 C ebx acc 1mod3 C ecx C edx C esi acc 2mod3 C edi mask C ebp sbbl %ecx, %ecx C carry movl %eax, %edx C 0mod3 shrl $24, %eax C 0mod3 high andl %edi, %ecx C carry masked subl %ecx, %eax C apply carry movl %ebx, %edi C 1mod3 shrl $16, %ebx C 1mod3 high andl $0x00FFFFFF, %edx C 0mod3 low addl %edx, %eax C apply 0mod3 low andl $0xFFFF, %edi shll $8, %edi C 1mod3 low addl %ebx, %eax C apply 1mod3 high addl %edi, %eax C apply 1mod3 low movl %esi, %edx C 2mod3 shrl $8, %esi C 2mod3 high andl $0xFF, %edx C 2mod3 low shll $16, %edx C 2mod3 low addl %esi, %eax C apply 2mod3 high addl %edx, %eax C apply 2mod3 low popl %edi FRAME_popl() movl SAVE_EBX, %ebx popl %esi FRAME_popl() ret EPILOGUE()