[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commits] r7418 - in /fsf/trunk/ports: ./ sysdeps/alpha/ sysdeps/alpha/alphaev5/ sysdeps/alpha/alphaev6/ sysdeps/alpha/alphaev6/fpu/ s...
- To: commits@xxxxxxxxxx
- Subject: [commits] r7418 - in /fsf/trunk/ports: ./ sysdeps/alpha/ sysdeps/alpha/alphaev5/ sysdeps/alpha/alphaev6/ sysdeps/alpha/alphaev6/fpu/ s...
- From: eglibc@xxxxxxxxxx
- Date: Wed, 26 Nov 2008 08:02:02 -0000
Author: eglibc
Date: Wed Nov 26 00:02:02 2008
New Revision: 7418
Log:
Import glibc-ports-mainline for 2008-11-26
Added:
fsf/trunk/ports/ChangeLog.alpha
fsf/trunk/ports/sysdeps/alpha/
fsf/trunk/ports/sysdeps/alpha/Implies
fsf/trunk/ports/sysdeps/alpha/Makefile
fsf/trunk/ports/sysdeps/alpha/Subdirs
fsf/trunk/ports/sysdeps/alpha/Versions
fsf/trunk/ports/sysdeps/alpha/__longjmp.S
fsf/trunk/ports/sysdeps/alpha/_mcount.S
fsf/trunk/ports/sysdeps/alpha/add_n.s
fsf/trunk/ports/sysdeps/alpha/addmul_1.s
fsf/trunk/ports/sysdeps/alpha/alphaev5/
fsf/trunk/ports/sysdeps/alpha/alphaev5/add_n.s
fsf/trunk/ports/sysdeps/alpha/alphaev5/lshift.s
fsf/trunk/ports/sysdeps/alpha/alphaev5/rshift.s
fsf/trunk/ports/sysdeps/alpha/alphaev5/sub_n.s
fsf/trunk/ports/sysdeps/alpha/alphaev6/
fsf/trunk/ports/sysdeps/alpha/alphaev6/Implies
fsf/trunk/ports/sysdeps/alpha/alphaev6/addmul_1.s
fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/
fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrt.S
fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S
fsf/trunk/ports/sysdeps/alpha/alphaev6/memchr.S
fsf/trunk/ports/sysdeps/alpha/alphaev6/memcpy.S
fsf/trunk/ports/sysdeps/alpha/alphaev6/memset.S
fsf/trunk/ports/sysdeps/alpha/alphaev6/stxcpy.S
fsf/trunk/ports/sysdeps/alpha/alphaev6/stxncpy.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/
fsf/trunk/ports/sysdeps/alpha/alphaev67/Implies
fsf/trunk/ports/sysdeps/alpha/alphaev67/ffs.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/ffsll.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/fpu/
fsf/trunk/ports/sysdeps/alpha/alphaev67/fpu/Implies
fsf/trunk/ports/sysdeps/alpha/alphaev67/rawmemchr.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/stpcpy.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/stpncpy.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/strcat.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/strchr.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/strlen.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/strncat.S
fsf/trunk/ports/sysdeps/alpha/alphaev67/strrchr.S
fsf/trunk/ports/sysdeps/alpha/backtrace.c
fsf/trunk/ports/sysdeps/alpha/bb_init_func.S
fsf/trunk/ports/sysdeps/alpha/bits/
fsf/trunk/ports/sysdeps/alpha/bits/atomic.h
fsf/trunk/ports/sysdeps/alpha/bits/endian.h
fsf/trunk/ports/sysdeps/alpha/bits/link.h
fsf/trunk/ports/sysdeps/alpha/bits/mathdef.h
fsf/trunk/ports/sysdeps/alpha/bits/setjmp.h
fsf/trunk/ports/sysdeps/alpha/bsd-_setjmp.S
fsf/trunk/ports/sysdeps/alpha/bsd-setjmp.S
fsf/trunk/ports/sysdeps/alpha/bzero.S
fsf/trunk/ports/sysdeps/alpha/div.S
fsf/trunk/ports/sysdeps/alpha/div_libc.h
fsf/trunk/ports/sysdeps/alpha/divl.S
fsf/trunk/ports/sysdeps/alpha/divlu.S
fsf/trunk/ports/sysdeps/alpha/divq.S
fsf/trunk/ports/sysdeps/alpha/divqu.S
fsf/trunk/ports/sysdeps/alpha/dl-dtprocnum.h
fsf/trunk/ports/sysdeps/alpha/dl-machine.h
fsf/trunk/ports/sysdeps/alpha/dl-sysdep.h
fsf/trunk/ports/sysdeps/alpha/dl-tls.h
fsf/trunk/ports/sysdeps/alpha/dl-trampoline.S
fsf/trunk/ports/sysdeps/alpha/elf/
fsf/trunk/ports/sysdeps/alpha/elf/configure
fsf/trunk/ports/sysdeps/alpha/elf/configure.in
fsf/trunk/ports/sysdeps/alpha/elf/initfini.c
fsf/trunk/ports/sysdeps/alpha/elf/start.S
fsf/trunk/ports/sysdeps/alpha/ffs.S
fsf/trunk/ports/sysdeps/alpha/ffsll.S
fsf/trunk/ports/sysdeps/alpha/fpu/
fsf/trunk/ports/sysdeps/alpha/fpu/Versions
fsf/trunk/ports/sysdeps/alpha/fpu/bits/
fsf/trunk/ports/sysdeps/alpha/fpu/bits/fenv.h
fsf/trunk/ports/sysdeps/alpha/fpu/bits/mathinline.h
fsf/trunk/ports/sysdeps/alpha/fpu/cabsf.c
fsf/trunk/ports/sysdeps/alpha/fpu/cargf.c
fsf/trunk/ports/sysdeps/alpha/fpu/cfloat-compat.h
fsf/trunk/ports/sysdeps/alpha/fpu/cimagf.c
fsf/trunk/ports/sysdeps/alpha/fpu/conjf.c
fsf/trunk/ports/sysdeps/alpha/fpu/crealf.c
fsf/trunk/ports/sysdeps/alpha/fpu/e_sqrt.c
fsf/trunk/ports/sysdeps/alpha/fpu/fclrexcpt.c
fsf/trunk/ports/sysdeps/alpha/fpu/fedisblxcpt.c
fsf/trunk/ports/sysdeps/alpha/fpu/feenablxcpt.c
fsf/trunk/ports/sysdeps/alpha/fpu/fegetenv.c
fsf/trunk/ports/sysdeps/alpha/fpu/fegetexcept.c
fsf/trunk/ports/sysdeps/alpha/fpu/fegetround.c
fsf/trunk/ports/sysdeps/alpha/fpu/feholdexcpt.c
fsf/trunk/ports/sysdeps/alpha/fpu/fenv_libc.h
fsf/trunk/ports/sysdeps/alpha/fpu/fesetenv.c
fsf/trunk/ports/sysdeps/alpha/fpu/fesetround.c
fsf/trunk/ports/sysdeps/alpha/fpu/feupdateenv.c
fsf/trunk/ports/sysdeps/alpha/fpu/fgetexcptflg.c
fsf/trunk/ports/sysdeps/alpha/fpu/fpu_control.h
fsf/trunk/ports/sysdeps/alpha/fpu/fsetexcptflg.c
fsf/trunk/ports/sysdeps/alpha/fpu/ftestexcept.c
fsf/trunk/ports/sysdeps/alpha/fpu/libm-test-ulps
fsf/trunk/ports/sysdeps/alpha/fpu/s_cacosf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_cacoshf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_casinf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_casinhf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_catanf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_catanhf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_ccosf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_ccoshf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_ceil.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_ceilf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_cexpf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_clog10f.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_clogf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_copysign.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_copysignf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_cpowf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_cprojf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_csinf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_csinhf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_csqrtf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_ctanf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_ctanhf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_fabs.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_fabsf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_floor.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_floorf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_fmax.S
fsf/trunk/ports/sysdeps/alpha/fpu/s_fmaxf.S
fsf/trunk/ports/sysdeps/alpha/fpu/s_fmin.S
fsf/trunk/ports/sysdeps/alpha/fpu/s_fminf.S
fsf/trunk/ports/sysdeps/alpha/fpu/s_isnan.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_isnanf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_llrint.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_llrintf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_llround.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_llroundf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_lrint.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_lrintf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_lround.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_lroundf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_nearbyint.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_nearbyintf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_rint.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_rintf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_round.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_roundf.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_trunc.c
fsf/trunk/ports/sysdeps/alpha/fpu/s_truncf.c
fsf/trunk/ports/sysdeps/alpha/gccframe.h
fsf/trunk/ports/sysdeps/alpha/hp-timing.h
fsf/trunk/ports/sysdeps/alpha/htonl.S
fsf/trunk/ports/sysdeps/alpha/htons.S
fsf/trunk/ports/sysdeps/alpha/jmpbuf-offsets.h
fsf/trunk/ports/sysdeps/alpha/jmpbuf-unwind.h
fsf/trunk/ports/sysdeps/alpha/ldiv.S
fsf/trunk/ports/sysdeps/alpha/libc-tls.c
fsf/trunk/ports/sysdeps/alpha/lldiv.S
fsf/trunk/ports/sysdeps/alpha/lshift.s
fsf/trunk/ports/sysdeps/alpha/machine-gmon.h
fsf/trunk/ports/sysdeps/alpha/memchr.S
fsf/trunk/ports/sysdeps/alpha/memset.S
fsf/trunk/ports/sysdeps/alpha/memusage.h
fsf/trunk/ports/sysdeps/alpha/mul_1.s
fsf/trunk/ports/sysdeps/alpha/nptl/
fsf/trunk/ports/sysdeps/alpha/nptl/Makefile
fsf/trunk/ports/sysdeps/alpha/nptl/elf/
fsf/trunk/ports/sysdeps/alpha/nptl/elf/pt-initfini.c
fsf/trunk/ports/sysdeps/alpha/nptl/pthread_spin_lock.S
fsf/trunk/ports/sysdeps/alpha/nptl/pthread_spin_trylock.S
fsf/trunk/ports/sysdeps/alpha/nptl/pthreaddef.h
fsf/trunk/ports/sysdeps/alpha/nptl/tcb-offsets.sym
fsf/trunk/ports/sysdeps/alpha/nptl/tls.h
fsf/trunk/ports/sysdeps/alpha/nscd-types.h
fsf/trunk/ports/sysdeps/alpha/preconfigure
fsf/trunk/ports/sysdeps/alpha/rawmemchr.S
fsf/trunk/ports/sysdeps/alpha/reml.S
fsf/trunk/ports/sysdeps/alpha/remlu.S
fsf/trunk/ports/sysdeps/alpha/remq.S
fsf/trunk/ports/sysdeps/alpha/remqu.S
fsf/trunk/ports/sysdeps/alpha/rshift.s
fsf/trunk/ports/sysdeps/alpha/setjmp.S
fsf/trunk/ports/sysdeps/alpha/shlib-versions
fsf/trunk/ports/sysdeps/alpha/soft-fp/
fsf/trunk/ports/sysdeps/alpha/soft-fp/Makefile
fsf/trunk/ports/sysdeps/alpha/soft-fp/Versions
fsf/trunk/ports/sysdeps/alpha/soft-fp/e_sqrtl.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/local-soft-fp.h
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_add.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cmp.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cmpe.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cvtqux.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cvtqx.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cvttx.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cvtxq.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_cvtxt.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_div.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_mul.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_nintxq.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/ots_sub.c
fsf/trunk/ports/sysdeps/alpha/soft-fp/sfp-machine.h
fsf/trunk/ports/sysdeps/alpha/stackinfo.h
fsf/trunk/ports/sysdeps/alpha/stpcpy.S
fsf/trunk/ports/sysdeps/alpha/stpncpy.S
fsf/trunk/ports/sysdeps/alpha/strcat.S
fsf/trunk/ports/sysdeps/alpha/strchr.S
fsf/trunk/ports/sysdeps/alpha/strcmp.S
fsf/trunk/ports/sysdeps/alpha/strcpy.S
fsf/trunk/ports/sysdeps/alpha/strlen.S
fsf/trunk/ports/sysdeps/alpha/strncat.S
fsf/trunk/ports/sysdeps/alpha/strncmp.S
fsf/trunk/ports/sysdeps/alpha/strncpy.S
fsf/trunk/ports/sysdeps/alpha/strrchr.S
fsf/trunk/ports/sysdeps/alpha/stxcpy.S
fsf/trunk/ports/sysdeps/alpha/stxncpy.S
fsf/trunk/ports/sysdeps/alpha/sub_n.s
fsf/trunk/ports/sysdeps/alpha/submul_1.s
fsf/trunk/ports/sysdeps/alpha/udiv_qrnnd.S
fsf/trunk/ports/sysdeps/mach/alpha/
fsf/trunk/ports/sysdeps/mach/alpha/machine-lock.h
fsf/trunk/ports/sysdeps/mach/alpha/machine-sp.h
fsf/trunk/ports/sysdeps/mach/alpha/setfpucw.c
fsf/trunk/ports/sysdeps/mach/alpha/syscall.S
fsf/trunk/ports/sysdeps/mach/alpha/sysdep.h
fsf/trunk/ports/sysdeps/mach/alpha/thread_state.h
fsf/trunk/ports/sysdeps/mach/hurd/alpha/
fsf/trunk/ports/sysdeps/mach/hurd/alpha/bits/
fsf/trunk/ports/sysdeps/mach/hurd/alpha/bits/sigcontext.h
fsf/trunk/ports/sysdeps/mach/hurd/alpha/exc2signal.c
fsf/trunk/ports/sysdeps/mach/hurd/alpha/init-first.c
fsf/trunk/ports/sysdeps/mach/hurd/alpha/intr-msg.h
fsf/trunk/ports/sysdeps/mach/hurd/alpha/longjmp-ts.c
fsf/trunk/ports/sysdeps/mach/hurd/alpha/sigreturn.c
fsf/trunk/ports/sysdeps/mach/hurd/alpha/static-start.S
fsf/trunk/ports/sysdeps/mach/hurd/alpha/trampoline.c
fsf/trunk/ports/sysdeps/unix/alpha/
fsf/trunk/ports/sysdeps/unix/alpha/Makefile (with props)
fsf/trunk/ports/sysdeps/unix/alpha/pipe.S
fsf/trunk/ports/sysdeps/unix/alpha/rt-sysdep.S
fsf/trunk/ports/sysdeps/unix/alpha/sysdep.S
fsf/trunk/ports/sysdeps/unix/alpha/sysdep.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/Implies
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/Makefile (with props)
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/Versions
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/a.out.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/adjtime.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alpha/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alpha/ptrace.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alpha/regdef.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alphaev6/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alphaev6/fpu/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alphaev6/fpu/Implies
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alphaev67/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alphaev67/fpu/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/alphaev67/fpu/Implies
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/a.out.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/dirent.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/elfclass.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/errno.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/fcntl.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/ioctls.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/ipc.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/mman.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/msq.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/netdb.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/resource.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/sem.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/shm.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/sigaction.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/siginfo.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/signum.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/sigstack.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/stat.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/statfs.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/termios.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/typesizes.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/bits/wordsize.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/brk.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/clone.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/dl-auxv.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/dl-brk.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/dl-support.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/dl-sysdep.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/fpu/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/fpu/Implies
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/fraiseexcpt.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/fxstat.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/fxstatat.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getclktck.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getcontext.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getdents.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getdents64.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/gethostname.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getitimer.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getrusage.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/getsysstats.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/gettimeofday.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/glob.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/ieee_get_fp_control.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/ieee_set_fp_control.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/ioperm.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/ipc_priv.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/kernel_sigaction.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/kernel_stat.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/kernel_sysinfo.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/kernel_termios.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/lxstat.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/makecontext.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/msgctl.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nldbl-abi.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/Makefile
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/Versions
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/aio_cancel.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/bits/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/bits/local_lim.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/bits/pthreadtypes.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/bits/semaphore.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/clone.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/createthread.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/fork.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/lowlevellock.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/pt-vfork.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/pthread_once.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/sem_post.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/sysdep-cancel.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/timer_create.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/timer_delete.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/timer_getoverr.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/timer_gettime.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/timer_settime.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/nptl/vfork.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/oldglob.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/pipe.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/register-dump.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/rt_sigaction.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/select.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/semctl.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setcontext.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setfpucw.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setitimer.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setregid.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setresgid.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setresuid.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/setreuid.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/settimeofday.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/shmctl.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sigaction.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sigcontextinfo.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sigprocmask.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sigsuspend.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sizes.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/swapcontext.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/acct.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/epoll.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/eventfd.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/inotify.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/io.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/procfs.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/signalfd.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/timerfd.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/ucontext.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sys/user.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/syscall.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/syscalls.list
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sysconf.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/sysdep.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/ucontext-offsets.sym
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/utimes.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/wait4.S
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/wordexp.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/xstat.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/xstatconv.c
fsf/trunk/ports/sysdeps/unix/sysv/linux/alpha/xstatconv.h
Removed:
fsf/trunk/ports/sysdeps/unix/bsd/osf/alpha/
Modified:
fsf/trunk/ports/ChangeLog.arm
fsf/trunk/ports/ChangeLog.mips
fsf/trunk/ports/sysdeps/unix/sysv/linux/arm/sysdep.h
fsf/trunk/ports/sysdeps/unix/sysv/linux/mips/sys/signalfd.h
Added: fsf/trunk/ports/ChangeLog.alpha
==============================================================================
--- fsf/trunk/ports/ChangeLog.alpha (added)
+++ fsf/trunk/ports/ChangeLog.alpha Wed Nov 26 00:02:02 2008
@@ -1,0 +1,18 @@
+2008-11-25 Roland McGrath <roland@xxxxxxxxxx>
+
+ * ChangeLog.alpha: New file (this one).
+ * sysdeps/alpha, sysdeps/unix/bsd/osf/alpha,
+ sysdeps/unix/bsd/Attic/osf1/alpha, sysdeps/unix/sysv/linux/alpha,
+ sysdeps/unix/sysv/linux/alpha/alpha, sysdeps/unix/alpha,
+ sysdeps/mach/alpha, sysdeps/mach/hurd/alpha:
+ Subdirectories moved here from main repository.
+ * sysdeps/alpha/nptl, sysdeps/unix/sysv/linux/alpha/nptl:
+ Subdirectories moved here from main repository's nptl/ subdirectory.
+ * sysdeps/alpha/preconfigure: New file.
+ * sysdeps/alpha/shlib-versions: New file.
+
+Local Variables:
+mode: change-log
+left-margin: 8
+fill-column: 74
+End:
Modified: fsf/trunk/ports/ChangeLog.arm
==============================================================================
--- fsf/trunk/ports/ChangeLog.arm (original)
+++ fsf/trunk/ports/ChangeLog.arm Wed Nov 26 00:02:02 2008
@@ -1,3 +1,10 @@
+2008-11-25 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
+
+ * sysdeps/unix/sysv/linux/arm/sysdep.h (LOAD_ARGS_1, LOAD_ARGS_2,
+ LOAD_ARGS_3, LOAD_ARGS_4, LOAD_ARGS_5, LOAD_ARGS_6, LOAD_ARGS_7):
+ Load all arguments into temporary variables before loading into
+ registers.
+
2008-08-19 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
* data/c++-types-arm-linux-gnueabi.data: New.
Modified: fsf/trunk/ports/ChangeLog.mips
==============================================================================
--- fsf/trunk/ports/ChangeLog.mips (original)
+++ fsf/trunk/ports/ChangeLog.mips Wed Nov 26 00:02:02 2008
@@ -1,3 +1,8 @@
+2008-11-25 Joseph Myers <joseph@xxxxxxxxxxxxxxxx>
+
+ * sysdeps/unix/sysv/linux/mips/sys/signalfd.h (signalfd): Fix
+ __THROW vs. __nonnull order for C++.
+
2008-10-15 Daniel Jacobowitz <dan@xxxxxxxxxxxxxxxx>
* sysdeps/mips/dl-dtprocnum.h (DT_MIPS_NUM): Do not redefine.
Added: fsf/trunk/ports/sysdeps/alpha/Implies
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/Implies (added)
+++ fsf/trunk/ports/sysdeps/alpha/Implies Wed Nov 26 00:02:02 2008
@@ -1,0 +1,6 @@
+wordsize-64
+# Alpha uses IEEE 754 single, double and quad precision floating point.
+ieee754/ldbl-128
+ieee754/dbl-64
+ieee754/flt-32
+alpha/soft-fp
Added: fsf/trunk/ports/sysdeps/alpha/Makefile
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/Makefile (added)
+++ fsf/trunk/ports/sysdeps/alpha/Makefile Wed Nov 26 00:02:02 2008
@@ -1,0 +1,48 @@
+# Copyright (C) 1993, 94, 95, 96, 97, 99 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+# Contributed by Brendan Kehoe (brendan@xxxxxxx).
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307 USA.
+
+ifeq ($(subdir),db2)
+CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_ALPHA=1
+endif
+
+ifeq ($(subdir),gmon)
+sysdep_routines += _mcount
+endif
+
+ifeq ($(subdir),gnulib)
+sysdep_routines += divl divlu divq divqu reml remlu remq remqu
+endif
+
+ifeq ($(subdir),string)
+sysdep_routines += stxcpy stxncpy
+endif
+
+ifeq ($(subdir),elf)
+# The ld.so startup code cannot use literals until it self-relocates.
+CFLAGS-rtld.c = -mbuild-constants
+endif
+
+# Build everything with full IEEE math support, and with dynamic rounding;
+# there are a number of math routines that are defined to work with the
+# "current" rounding mode, and it's easiest to set this with all of them.
+sysdep-CFLAGS += -mieee -mfp-rounding-mode=d
+
+# libc.so requires about 16k for the small data area, which is well
+# below the 64k maximum.
+pic-ccflag = -fpic
Added: fsf/trunk/ports/sysdeps/alpha/Subdirs
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/Subdirs (added)
+++ fsf/trunk/ports/sysdeps/alpha/Subdirs Wed Nov 26 00:02:02 2008
@@ -1,0 +1,1 @@
+soft-fp
Added: fsf/trunk/ports/sysdeps/alpha/Versions
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/Versions (added)
+++ fsf/trunk/ports/sysdeps/alpha/Versions Wed Nov 26 00:02:02 2008
@@ -1,0 +1,13 @@
+libc {
+ GLIBC_2.0 {
+ # functions with special/multiple interfaces
+ __divqu; __remqu; __divqs; __remqs; __divlu; __remlu; __divls;
+ __remls; __divl; __reml; __divq; __remq; __divqu; __remqu;
+ }
+}
+libm {
+ GLIBC_2.0 {
+ # used in inline functions.
+ __atan2;
+ }
+}
Added: fsf/trunk/ports/sysdeps/alpha/__longjmp.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/__longjmp.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/__longjmp.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,64 @@
+/* Copyright (C) 1992, 1994, 1997, 2006 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#define __ASSEMBLY__
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+
+
+ENTRY(__longjmp)
+#ifdef PROF
+ ldgp gp, 0(pv)
+ .set noat
+ lda AT, _mcount
+ jsr AT, (AT), _mcount
+ .set at
+ .prologue 1
+#else
+ .prologue 0
+#endif
+
+ mov a1, v0
+ ldq s0, JB_S0*8(a0)
+ ldq s1, JB_S1*8(a0)
+ ldq s2, JB_S2*8(a0)
+ ldq s3, JB_S3*8(a0)
+ ldq s4, JB_S4*8(a0)
+ ldq s5, JB_S5*8(a0)
+ ldq ra, JB_PC*8(a0)
+ ldq fp, JB_FP*8(a0)
+ ldq t0, JB_SP*8(a0)
+ ldt $f2, JB_F2*8(a0)
+ ldt $f3, JB_F3*8(a0)
+ ldt $f4, JB_F4*8(a0)
+ ldt $f5, JB_F5*8(a0)
+ ldt $f6, JB_F6*8(a0)
+ ldt $f7, JB_F7*8(a0)
+ ldt $f8, JB_F8*8(a0)
+ ldt $f9, JB_F9*8(a0)
+#ifdef PTR_DEMANGLE
+ PTR_DEMANGLE(ra, t1)
+ PTR_DEMANGLE2(t0, t1)
+ PTR_DEMANGLE2(fp, t1)
+#endif
+ cmoveq v0, 1, v0
+ mov t0, sp
+ ret
+
+END(__longjmp)
Added: fsf/trunk/ports/sysdeps/alpha/_mcount.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/_mcount.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/_mcount.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,107 @@
+/* Machine-specific calling sequence for `mcount' profiling function. alpha
+ Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
+ Contributed by David Mosberger (davidm@xxxxxxxxxxxxxx).
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Assembly stub to invoke _mcount(). Compiler generated code calls
+ this stub after executing a function's prologue and without saving any
+ registers. It is therefore necessary to preserve a0..a5 as they may
+ contain function arguments. To work correctly with frame- less
+ functions, it is also necessary to preserve ra. Finally, division
+ routines are invoked with a special calling convention and the
+ compiler treats those calls as if they were instructions. In
+ particular, it doesn't save any of the temporary registers (caller
+ saved registers). It is therefore necessary to preserve all
+ caller-saved registers as well.
+
+ Upon entering _mcount, register $at holds the return address and ra
+ holds the return address of the function's caller (selfpc and frompc,
+ respectively in gmon.c language...). */
+
+#include <sysdep.h>
+
+ .set noat
+ .set noreorder
+
+LEAF(_mcount, 0xb0)
+ .prologue 0
+
+ subq sp, 0xb0, sp
+ stq a0, 0x00(sp)
+ mov ra, a0 # a0 = caller-pc
+ stq a1, 0x08(sp)
+ mov $at, a1 # a1 = self-pc
+ stq $at, 0x10(sp)
+
+ stq a2, 0x18(sp)
+ stq a3, 0x20(sp)
+ stq a4, 0x28(sp)
+ stq a5, 0x30(sp)
+ stq ra, 0x38(sp)
+ stq gp, 0x40(sp)
+
+ br gp, 1f
+1: ldgp gp, 0(gp)
+
+ stq t0, 0x48(sp)
+ stq t1, 0x50(sp)
+ stq t2, 0x58(sp)
+ stq t3, 0x60(sp)
+ stq t4, 0x68(sp)
+ stq t5, 0x70(sp)
+ stq t6, 0x78(sp)
+
+ stq t7, 0x80(sp)
+ stq t8, 0x88(sp)
+ stq t9, 0x90(sp)
+ stq t10, 0x98(sp)
+ stq t11, 0xa0(sp)
+ stq v0, 0xa8(sp)
+
+ jsr ra, __mcount
+
+ ldq a0, 0x00(sp)
+ ldq a1, 0x08(sp)
+ ldq $at, 0x10(sp) # restore self-pc
+ ldq a2, 0x18(sp)
+ ldq a3, 0x20(sp)
+ ldq a4, 0x28(sp)
+ ldq a5, 0x30(sp)
+ ldq ra, 0x38(sp)
+ ldq gp, 0x40(sp)
+ mov $at, pv # make pv point to return address
+ ldq t0, 0x48(sp) # this is important under OSF/1 to
+ ldq t1, 0x50(sp) # ensure that the code that we return
+ ldq t2, 0x58(sp) # can correctly compute its gp
+ ldq t3, 0x60(sp)
+ ldq t4, 0x68(sp)
+ ldq t5, 0x70(sp)
+ ldq t6, 0x78(sp)
+ ldq t7, 0x80(sp)
+ ldq t8, 0x88(sp)
+ ldq t9, 0x90(sp)
+ ldq t10, 0x98(sp)
+ ldq t11, 0xa0(sp)
+ ldq v0, 0xa8(sp)
+
+ addq sp, 0xb0, sp
+ ret zero,($at),1
+
+ END(_mcount)
+
+weak_alias (_mcount, mcount)
Added: fsf/trunk/ports/sysdeps/alpha/add_n.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/add_n.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/add_n.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,120 @@
+ # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_add_n
+ .ent __mpn_add_n
+__mpn_add_n:
+ .frame $30,0,$26,0
+
+ ldq $3,0($17)
+ ldq $4,0($18)
+
+ subq $19,1,$19
+ and $19,4-1,$2 # number of limbs in first loop
+ bis $31,$31,$0
+ beq $2,.L0 # if multiple of 4 limbs, skip first loop
+
+ subq $19,$2,$19
+
+.Loop0: subq $2,1,$2
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ addq $17,8,$17
+ addq $18,8,$18
+ bis $5,$5,$3
+ bis $6,$6,$4
+ addq $16,8,$16
+ bne $2,.Loop0
+
+.L0: beq $19,.Lend
+
+ .align 3
+.Loop: subq $19,4,$19
+
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ ldq $3,16($17)
+ addq $6,$0,$6
+ ldq $4,16($18)
+ cmpult $6,$0,$1
+ addq $5,$6,$6
+ cmpult $6,$5,$0
+ stq $6,8($16)
+ or $0,$1,$0
+
+ ldq $5,24($17)
+ addq $4,$0,$4
+ ldq $6,24($18)
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,16($16)
+ or $0,$1,$0
+
+ ldq $3,32($17)
+ addq $6,$0,$6
+ ldq $4,32($18)
+ cmpult $6,$0,$1
+ addq $5,$6,$6
+ cmpult $6,$5,$0
+ stq $6,24($16)
+ or $0,$1,$0
+
+ addq $17,32,$17
+ addq $18,32,$18
+ addq $16,32,$16
+ bne $19,.Loop
+
+.Lend: addq $4,$0,$4
+ cmpult $4,$0,$1
+ addq $3,$4,$4
+ cmpult $4,$3,$0
+ stq $4,0($16)
+ or $0,$1,$0
+ ret $31,($26),1
+
+ .end __mpn_add_n
Added: fsf/trunk/ports/sysdeps/alpha/addmul_1.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/addmul_1.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/addmul_1.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,92 @@
+ # Alpha 21064 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ # the result to a second limb vector.
+
+ # Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # s2_limb r19
+
+ # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_addmul_1
+ .ent __mpn_addmul_1 2
+__mpn_addmul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ addq $5,$3,$3
+ cmpult $3,$5,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end __mpn_addmul_1
Added: fsf/trunk/ports/sysdeps/alpha/alphaev5/add_n.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev5/add_n.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev5/add_n.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,148 @@
+ # Alpha __mpn_add_n -- Add two limb vectors of the same length > 0 and
+ # store sum in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_add_n
+ .ent __mpn_add_n
+__mpn_add_n:
+ .frame $30,0,$26,0
+
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
+ ldq $5,8($17)
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ addq $0,$4,$20 # 1st main add
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $20,$0,$25 # compute cy from last add
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ addq $5,$28,$21 # 2nd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+ .align 4
+.Loop: cmpult $21,$28,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ addq $28,$6,$22 # 3rd main add
+ ldq $5,8($17)
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ addq $4,$28,$20 # 1st main add
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $20,$28,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ addq $5,$28,$21 # 2nd main add
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $21,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ addq $28,$6,$22 # 3rd main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $22,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ addq $28,$7,$23 # 4th main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $23,$28,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ addq $4,$28,$20 # main add
+ ldq $4,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $20,$28,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ addq $4,$28,$20 # main add
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $20,$28,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+
+.Lret: or $25,$31,$0 # return cy
+ ret $31,($26),1
+ .end __mpn_add_n
Added: fsf/trunk/ports/sysdeps/alpha/alphaev5/lshift.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev5/lshift.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev5/lshift.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,174 @@
+ # Alpha EV5 __mpn_lshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # cnt r19
+
+ # This code runs at 3.25 cycles/limb on the EV5.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_lshift
+ .ent __mpn_lshift
+__mpn_lshift:
+ .frame $30,0,$26,0
+
+ s8addq $18,$17,$17 # make r17 point at end of s1
+ ldq $4,-8($17) # load first limb
+ subq $31,$19,$20
+ s8addq $18,$16,$16 # make r16 point at end of RES
+ subq $18,1,$18
+ and $18,4-1,$28 # number of limbs in first loop
+ srl $4,$20,$0 # compute function result
+
+ beq $28,.L0
+ subq $18,$28,$18
+
+ .align 3
+.Loop0: ldq $3,-16($17)
+ subq $16,8,$16
+ sll $4,$19,$5
+ subq $17,8,$17
+ subq $28,1,$28
+ srl $3,$20,$6
+ or $3,$3,$4
+ or $5,$6,$8
+ stq $8,0($16)
+ bne $28,.Loop0
+
+.L0: sll $4,$19,$24
+ beq $18,.Lend
+ # warm up phase 1
+ ldq $1,-16($17)
+ subq $18,4,$18
+ ldq $2,-24($17)
+ ldq $3,-32($17)
+ ldq $4,-40($17)
+ beq $18,.Lend1
+ # warm up phase 2
+ srl $1,$20,$7
+ sll $1,$19,$21
+ srl $2,$20,$8
+ ldq $1,-48($17)
+ sll $2,$19,$22
+ ldq $2,-56($17)
+ srl $3,$20,$5
+ or $7,$24,$7
+ sll $3,$19,$23
+ or $8,$21,$8
+ srl $4,$20,$6
+ ldq $3,-64($17)
+ sll $4,$19,$24
+ ldq $4,-72($17)
+ subq $18,4,$18
+ beq $18,.Lend2
+ .align 4
+ # main loop
+.Loop: stq $7,-8($16)
+ or $5,$22,$5
+ stq $8,-16($16)
+ or $6,$23,$6
+
+ srl $1,$20,$7
+ subq $18,4,$18
+ sll $1,$19,$21
+ unop # ldq $31,-96($17)
+
+ srl $2,$20,$8
+ ldq $1,-80($17)
+ sll $2,$19,$22
+ ldq $2,-88($17)
+
+ stq $5,-24($16)
+ or $7,$24,$7
+ stq $6,-32($16)
+ or $8,$21,$8
+
+ srl $3,$20,$5
+ unop # ldq $31,-96($17)
+ sll $3,$19,$23
+ subq $16,32,$16
+
+ srl $4,$20,$6
+ ldq $3,-96($17)
+ sll $4,$19,$24
+ ldq $4,-104($17)
+
+ subq $17,32,$17
+ bne $18,.Loop
+ # cool down phase 2/1
+.Lend2: stq $7,-8($16)
+ or $5,$22,$5
+ stq $8,-16($16)
+ or $6,$23,$6
+ srl $1,$20,$7
+ sll $1,$19,$21
+ srl $2,$20,$8
+ sll $2,$19,$22
+ stq $5,-24($16)
+ or $7,$24,$7
+ stq $6,-32($16)
+ or $8,$21,$8
+ srl $3,$20,$5
+ sll $3,$19,$23
+ srl $4,$20,$6
+ sll $4,$19,$24
+ # cool down phase 2/2
+ stq $7,-40($16)
+ or $5,$22,$5
+ stq $8,-48($16)
+ or $6,$23,$6
+ stq $5,-56($16)
+ stq $6,-64($16)
+ # cool down phase 2/3
+ stq $24,-72($16)
+ ret $31,($26),1
+
+ # cool down phase 1/1
+.Lend1: srl $1,$20,$7
+ sll $1,$19,$21
+ srl $2,$20,$8
+ sll $2,$19,$22
+ srl $3,$20,$5
+ or $7,$24,$7
+ sll $3,$19,$23
+ or $8,$21,$8
+ srl $4,$20,$6
+ sll $4,$19,$24
+ # cool down phase 1/2
+ stq $7,-8($16)
+ or $5,$22,$5
+ stq $8,-16($16)
+ or $6,$23,$6
+ stq $5,-24($16)
+ stq $6,-32($16)
+ stq $24,-40($16)
+ ret $31,($26),1
+
+.Lend: stq $24,-8($16)
+ ret $31,($26),1
+ .end __mpn_lshift
Added: fsf/trunk/ports/sysdeps/alpha/alphaev5/rshift.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev5/rshift.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev5/rshift.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,172 @@
+ # Alpha EV5 __mpn_rshift --
+
+ # Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr r16
+ # s1_ptr r17
+ # size r18
+ # cnt r19
+
+ # This code runs at 3.25 cycles/limb on the EV5.
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_rshift
+ .ent __mpn_rshift
+__mpn_rshift:
+ .frame $30,0,$26,0
+
+ ldq $4,0($17) # load first limb
+ subq $31,$19,$20
+ subq $18,1,$18
+ and $18,4-1,$28 # number of limbs in first loop
+ sll $4,$20,$0 # compute function result
+
+ beq $28,.L0
+ subq $18,$28,$18
+
+ .align 3
+.Loop0: ldq $3,8($17)
+ addq $16,8,$16
+ srl $4,$19,$5
+ addq $17,8,$17
+ subq $28,1,$28
+ sll $3,$20,$6
+ or $3,$3,$4
+ or $5,$6,$8
+ stq $8,-8($16)
+ bne $28,.Loop0
+
+.L0: srl $4,$19,$24
+ beq $18,.Lend
+ # warm up phase 1
+ ldq $1,8($17)
+ subq $18,4,$18
+ ldq $2,16($17)
+ ldq $3,24($17)
+ ldq $4,32($17)
+ beq $18,.Lend1
+ # warm up phase 2
+ sll $1,$20,$7
+ srl $1,$19,$21
+ sll $2,$20,$8
+ ldq $1,40($17)
+ srl $2,$19,$22
+ ldq $2,48($17)
+ sll $3,$20,$5
+ or $7,$24,$7
+ srl $3,$19,$23
+ or $8,$21,$8
+ sll $4,$20,$6
+ ldq $3,56($17)
+ srl $4,$19,$24
+ ldq $4,64($17)
+ subq $18,4,$18
+ beq $18,.Lend2
+ .align 4
+ # main loop
+.Loop: stq $7,0($16)
+ or $5,$22,$5
+ stq $8,8($16)
+ or $6,$23,$6
+
+ sll $1,$20,$7
+ subq $18,4,$18
+ srl $1,$19,$21
+ unop # ldq $31,-96($17)
+
+ sll $2,$20,$8
+ ldq $1,72($17)
+ srl $2,$19,$22
+ ldq $2,80($17)
+
+ stq $5,16($16)
+ or $7,$24,$7
+ stq $6,24($16)
+ or $8,$21,$8
+
+ sll $3,$20,$5
+ unop # ldq $31,-96($17)
+ srl $3,$19,$23
+ addq $16,32,$16
+
+ sll $4,$20,$6
+ ldq $3,88($17)
+ srl $4,$19,$24
+ ldq $4,96($17)
+
+ addq $17,32,$17
+ bne $18,.Loop
+ # cool down phase 2/1
+.Lend2: stq $7,0($16)
+ or $5,$22,$5
+ stq $8,8($16)
+ or $6,$23,$6
+ sll $1,$20,$7
+ srl $1,$19,$21
+ sll $2,$20,$8
+ srl $2,$19,$22
+ stq $5,16($16)
+ or $7,$24,$7
+ stq $6,24($16)
+ or $8,$21,$8
+ sll $3,$20,$5
+ srl $3,$19,$23
+ sll $4,$20,$6
+ srl $4,$19,$24
+ # cool down phase 2/2
+ stq $7,32($16)
+ or $5,$22,$5
+ stq $8,40($16)
+ or $6,$23,$6
+ stq $5,48($16)
+ stq $6,56($16)
+ # cool down phase 2/3
+ stq $24,64($16)
+ ret $31,($26),1
+
+ # cool down phase 1/1
+.Lend1: sll $1,$20,$7
+ srl $1,$19,$21
+ sll $2,$20,$8
+ srl $2,$19,$22
+ sll $3,$20,$5
+ or $7,$24,$7
+ srl $3,$19,$23
+ or $8,$21,$8
+ sll $4,$20,$6
+ srl $4,$19,$24
+ # cool down phase 1/2
+ stq $7,0($16)
+ or $5,$22,$5
+ stq $8,8($16)
+ or $6,$23,$6
+ stq $5,16($16)
+ stq $6,24($16)
+ stq $24,32($16)
+ ret $31,($26),1
+
+.Lend: stq $24,0($16)
+ ret $31,($26),1
+ .end __mpn_rshift
Added: fsf/trunk/ports/sysdeps/alpha/alphaev5/sub_n.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev5/sub_n.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev5/sub_n.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,149 @@
+ # Alpha __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+ # store difference in a third limb vector.
+
+ # Copyright (C) 1995 Free Software Foundation, Inc.
+
+ # This file is part of the GNU MP Library.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published by
+ # the Free Software Foundation; either version 2.1 of the License, or (at your
+ # option) any later version.
+
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # s2_ptr $18
+ # size $19
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __mpn_sub_n
+ .ent __mpn_sub_n
+__mpn_sub_n:
+ .frame $30,0,$26,0
+
+ or $31,$31,$25 # clear cy
+ subq $19,4,$19 # decr loop cnt
+ blt $19,.Lend2 # if less than 4 limbs, goto 2nd loop
+ # Start software pipeline for 1st loop
+ ldq $0,0($18)
+ ldq $1,8($18)
+ ldq $4,0($17)
+ ldq $5,8($17)
+ addq $17,32,$17 # update s1_ptr
+ ldq $2,16($18)
+ subq $4,$0,$20 # 1st main sub
+ ldq $3,24($18)
+ subq $19,4,$19 # decr loop cnt
+ ldq $6,-16($17)
+ cmpult $4,$20,$25 # compute cy from last sub
+ ldq $7,-8($17)
+ addq $1,$25,$28 # cy add
+ addq $18,32,$18 # update s2_ptr
+ subq $5,$28,$21 # 2nd main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ blt $19,.Lend1 # if less than 4 limbs remain, jump
+ # 1st loop handles groups of 4 limbs in a software pipeline
+ .align 4
+.Loop: cmpult $5,$21,$25 # compute cy from last add
+ ldq $0,0($18)
+ or $8,$25,$25 # combine cy from the two adds
+ ldq $1,8($18)
+ addq $2,$25,$28 # cy add
+ ldq $4,0($17)
+ subq $6,$28,$22 # 3rd main sub
+ ldq $5,8($17)
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $6,$22,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ subq $7,$28,$23 # 4th main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $7,$23,$25 # compute cy from last add
+ addq $17,32,$17 # update s1_ptr
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ addq $0,$25,$28 # cy add
+ ldq $2,16($18)
+ subq $4,$28,$20 # 1st main sub
+ ldq $3,24($18)
+ cmpult $28,$25,$8 # compute cy from last add
+ ldq $6,-16($17)
+ cmpult $4,$20,$25 # compute cy from last add
+ ldq $7,-8($17)
+ or $8,$25,$25 # combine cy from the two adds
+ subq $19,4,$19 # decr loop cnt
+ stq $22,-16($16)
+ addq $1,$25,$28 # cy add
+ stq $23,-8($16)
+ subq $5,$28,$21 # 2nd main sub
+ addq $18,32,$18 # update s2_ptr
+ cmpult $28,$25,$8 # compute cy from last add
+ bge $19,.Loop
+ # Finish software pipeline for 1st loop
+.Lend1: cmpult $5,$21,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $2,$25,$28 # cy add
+ subq $6,$28,$22 # 3rd main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $6,$22,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+ stq $21,8($16)
+ addq $3,$25,$28 # cy add
+ subq $7,$28,$23 # 4th main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $7,$23,$25 # compute cy from last add
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,32,$16 # update res_ptr
+ stq $22,-16($16)
+ stq $23,-8($16)
+.Lend2: addq $19,4,$19 # restore loop cnt
+ beq $19,.Lret
+ # Start software pipeline for 2nd loop
+ ldq $0,0($18)
+ ldq $4,0($17)
+ subq $19,1,$19
+ beq $19,.Lend0
+ # 2nd loop handles remaining 1-3 limbs
+ .align 4
+.Loop0: addq $0,$25,$28 # cy add
+ ldq $0,8($18)
+ subq $4,$28,$20 # main sub
+ ldq $1,8($17)
+ addq $18,8,$18
+ cmpult $28,$25,$8 # compute cy from last add
+ addq $17,8,$17
+ stq $20,0($16)
+ cmpult $4,$20,$25 # compute cy from last add
+ subq $19,1,$19 # decr loop cnt
+ or $8,$25,$25 # combine cy from the two adds
+ addq $16,8,$16
+ or $1,$31,$4
+ bne $19,.Loop0
+.Lend0: addq $0,$25,$28 # cy add
+ subq $4,$28,$20 # main sub
+ cmpult $28,$25,$8 # compute cy from last add
+ cmpult $4,$20,$25 # compute cy from last add
+ stq $20,0($16)
+ or $8,$25,$25 # combine cy from the two adds
+
+.Lret: or $25,$31,$0 # return cy
+ ret $31,($26),1
+ .end __mpn_sub_n
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/Implies
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/Implies (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/Implies Wed Nov 26 00:02:02 2008
@@ -1,0 +1,1 @@
+alpha/alphaev5
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/addmul_1.s
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/addmul_1.s (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/addmul_1.s Wed Nov 26 00:02:02 2008
@@ -1,0 +1,479 @@
+ # Alpha ev6 mpn_addmul_1 -- Multiply a limb vector with a limb and add
+ # the result to a second limb vector.
+ #
+ # Copyright (C) 2000 Free Software Foundation, Inc.
+ #
+ # This file is part of the GNU MP Library.
+ #
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation; either version 2.1 of the License, or (at
+ # your option) any later version.
+ #
+ # The GNU MP Library is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ # License for more details.
+ #
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+ # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+
+ # INPUT PARAMETERS
+ # res_ptr $16
+ # s1_ptr $17
+ # size $18
+ # s2_limb $19
+ #
+ # This code runs at 42 cycles/limb on EV4, 18 cycles/limb on EV5, and
+ # exactly 3.625 cycles/limb on EV6...
+ #
+ # This code was written in close cooperation with ev6 pipeline expert
+ # Steve Root (root@xxxxxxxxxxxxxxxxxx). Any errors are tege's fault, though.
+ #
+ # Register usages for unrolled loop:
+ # 0-3 mul's
+ # 4-7 acc's
+ # 8-15 mul results
+ # 20,21 carry's
+ # 22,23 save for stores
+ #
+ # Sustains 8 mul-adds in 29 cycles in the unrolled inner loop.
+ #
+ # The stores can issue a cycle late so we have paired no-op's to 'catch'
+ # them, so that further disturbance to the schedule is damped.
+ #
+ # We couldn't pair the loads, because the entangled schedule of the
+ # carry's has to happen on one side {0} of the machine. Note, the total
+ # use of U0, and the total use of L0 (after attending to the stores).
+ # which is part of the reason why....
+ #
+ # This is a great schedule for the d_cache, a poor schedule for the
+ # b_cache. The lockup on U0 means that any stall can't be recovered
+ # from. Consider a ldq in L1. say that load gets stalled because it
+ # collides with a fill from the b_Cache. On the next cycle, this load
+ # gets priority. If first looks at L0, and goes there. The instruction
+ # we intended for L0 gets to look at L1, which is NOT where we want
+ # it. It either stalls 1, because it can't go in L0, or goes there, and
+ # causes a further instruction to stall.
+ #
+ # So for b_cache, we're likely going to want to put one or more cycles
+ # back into the code! And, of course, put in prefetches. For the
+ # accumulator, lds, intent to modify. For the multiplier, you might
+ # want ldq, evict next, if you're not wanting to use it again soon. Use
+ # 256 ahead of present pointer value. At a place where we have an mt
+ # followed by a bookkeeping, put the bookkeeping in upper, and the
+ # prefetch into lower.
+ #
+ # Note, the usage of physical registers per cycle is smoothed off, as
+ # much as possible.
+ #
+ # Note, the ldq's and stq's are at the end of the quadpacks. note, we'd
+ # like not to have a ldq or stq to preceded a conditional branch in a
+ # quadpack. The conditional branch moves the retire pointer one cycle
+ # later.
+ #
+ # Optimization notes:
+ # Callee-saves regs: $9 $10 $11 $12 $13 $14 $15 $26 ?$27?
+ # Reserved regs: $29 $30 $31
+ # Free caller-saves regs in unrolled code: $24 $25 $28
+ # We should swap some of the callee-saves regs for some of the free
+ # caller-saves regs, saving some overhead cycles.
+ # Most importantly, we should write fast code for the 0-7 case.
+ # The code we use there are for the 21164, and runs at 7 cycles/limb
+ # on the 21264. Should not be hard, if we write specialized code for
+ # 1-7 limbs (the one for 0 limbs should be straightforward). We then just
+ # need a jump table indexed by the low 3 bits of the count argument.
+
+ .set noreorder
+ .set noat
+ .text
+
+ .globl __mpn_addmul_1
+ .ent __mpn_addmul_1
+__mpn_addmul_1:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ cmpult $18, 8, $1
+ beq $1, $Large
+
+ ldq $2, 0($17) # $2 = s1_limb
+ addq $17, 8, $17 # s1_ptr++
+ subq $18, 1, $18 # size--
+ mulq $2, $19, $3 # $3 = prod_low
+ ldq $5, 0($16) # $5 = *res_ptr
+ umulh $2, $19, $0 # $0 = prod_high
+ beq $18, $Lend0b # jump if size was == 1
+ ldq $2, 0($17) # $2 = s1_limb
+ addq $17, 8, $17 # s1_ptr++
+ subq $18, 1, $18 # size--
+ addq $5, $3, $3
+ cmpult $3, $5, $4
+ stq $3, 0($16)
+ addq $16, 8, $16 # res_ptr++
+ beq $18, $Lend0a # jump if size was == 2
+
+ .align 3
+$Loop0: mulq $2, $19, $3 # $3 = prod_low
+ ldq $5, 0($16) # $5 = *res_ptr
+ addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
+ subq $18, 1, $18 # size--
+ umulh $2, $19, $4 # $4 = cy_limb
+ ldq $2, 0($17) # $2 = s1_limb
+ addq $17, 8, $17 # s1_ptr++
+ addq $3, $0, $3 # $3 = cy_limb + prod_low
+ cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
+ addq $5, $3, $3
+ cmpult $3, $5, $5
+ stq $3, 0($16)
+ addq $16, 8, $16 # res_ptr++
+ addq $5, $0, $0 # combine carries
+ bne $18, $Loop0
+$Lend0a:
+ mulq $2, $19, $3 # $3 = prod_low
+ ldq $5, 0($16) # $5 = *res_ptr
+ addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
+ umulh $2, $19, $4 # $4 = cy_limb
+ addq $3, $0, $3 # $3 = cy_limb + prod_low
+ cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
+ addq $5, $3, $3
+ cmpult $3, $5, $5
+ stq $3, 0($16)
+ addq $5, $0, $0 # combine carries
+ addq $4, $0, $0 # cy_limb = prod_high + cy
+ ret $31, ($26), 1
+$Lend0b:
+ addq $5, $3, $3
+ cmpult $3, $5, $5
+ stq $3, 0($16)
+ addq $0, $5, $0
+ ret $31, ($26), 1
+
+$Large:
+ lda $30, -240($30)
+ stq $9, 8($30)
+ stq $10, 16($30)
+ stq $11, 24($30)
+ stq $12, 32($30)
+ stq $13, 40($30)
+ stq $14, 48($30)
+ stq $15, 56($30)
+
+ and $18, 7, $20 # count for the first loop, 0-7
+ srl $18, 3, $18 # count for unrolled loop
+ bis $31, $31, $0
+ beq $20, $Lunroll
+ ldq $2, 0($17) # $2 = s1_limb
+ addq $17, 8, $17 # s1_ptr++
+ subq $20, 1, $20 # size--
+ mulq $2, $19, $3 # $3 = prod_low
+ ldq $5, 0($16) # $5 = *res_ptr
+ umulh $2, $19, $0 # $0 = prod_high
+ beq $20, $Lend1b # jump if size was == 1
+ ldq $2, 0($17) # $2 = s1_limb
+ addq $17, 8, $17 # s1_ptr++
+ subq $20, 1, $20 # size--
+ addq $5, $3, $3
+ cmpult $3, $5, $4
+ stq $3, 0($16)
+ addq $16, 8, $16 # res_ptr++
+ beq $20, $Lend1a # jump if size was == 2
+
+ .align 3
+$Loop1: mulq $2, $19, $3 # $3 = prod_low
+ ldq $5, 0($16) # $5 = *res_ptr
+ addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
+ subq $20, 1, $20 # size--
+ umulh $2, $19, $4 # $4 = cy_limb
+ ldq $2, 0($17) # $2 = s1_limb
+ addq $17, 8, $17 # s1_ptr++
+ addq $3, $0, $3 # $3 = cy_limb + prod_low
+ cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
+ addq $5, $3, $3
+ cmpult $3, $5, $5
+ stq $3, 0($16)
+ addq $16, 8, $16 # res_ptr++
+ addq $5, $0, $0 # combine carries
+ bne $20, $Loop1
+
+$Lend1a:
+ mulq $2, $19, $3 # $3 = prod_low
+ ldq $5, 0($16) # $5 = *res_ptr
+ addq $4, $0, $0 # cy_limb = cy_limb + 'cy'
+ umulh $2, $19, $4 # $4 = cy_limb
+ addq $3, $0, $3 # $3 = cy_limb + prod_low
+ cmpult $3, $0, $0 # $0 = carry from (cy_limb + prod_low)
+ addq $5, $3, $3
+ cmpult $3, $5, $5
+ stq $3, 0($16)
+ addq $16, 8, $16 # res_ptr++
+ addq $5, $0, $0 # combine carries
+ addq $4, $0, $0 # cy_limb = prod_high + cy
+ br $31, $Lunroll
+$Lend1b:
+ addq $5, $3, $3
+ cmpult $3, $5, $5
+ stq $3, 0($16)
+ addq $16, 8, $16 # res_ptr++
+ addq $0, $5, $0
+
+$Lunroll:
+ lda $17, -16($17) # L1 bookkeeping
+ lda $16, -16($16) # L1 bookkeeping
+ bis $0, $31, $12
+
+ # ____ UNROLLED LOOP SOFTWARE PIPELINE STARTUP ____
+
+ ldq $2, 16($17) # L1
+ ldq $3, 24($17) # L1
+ lda $18, -1($18) # L1 bookkeeping
+ ldq $6, 16($16) # L1
+ ldq $7, 24($16) # L1
+ ldq $0, 32($17) # L1
+ mulq $19, $2, $13 # U1
+ ldq $1, 40($17) # L1
+ umulh $19, $2, $14 # U1
+ mulq $19, $3, $15 # U1
+ lda $17, 64($17) # L1 bookkeeping
+ ldq $4, 32($16) # L1
+ ldq $5, 40($16) # L1
+ umulh $19, $3, $8 # U1
+ ldq $2, -16($17) # L1
+ mulq $19, $0, $9 # U1
+ ldq $3, -8($17) # L1
+ umulh $19, $0, $10 # U1
+ addq $6, $13, $6 # L0 lo + acc
+ mulq $19, $1, $11 # U1
+ cmpult $6, $13, $20 # L0 lo add => carry
+ lda $16, 64($16) # L1 bookkeeping
+ addq $6, $12, $22 # U0 hi add => answer
+ cmpult $22, $12, $21 # L0 hi add => carry
+ addq $14, $20, $14 # U0 hi mul + carry
+ ldq $6, -16($16) # L1
+ addq $7, $15, $23 # L0 lo + acc
+ addq $14, $21, $14 # U0 hi mul + carry
+ ldq $7, -8($16) # L1
+ umulh $19, $1, $12 # U1
+ cmpult $23, $15, $20 # L0 lo add => carry
+ addq $23, $14, $23 # U0 hi add => answer
+ ldq $0, 0($17) # L1
+ mulq $19, $2, $13 # U1
+ cmpult $23, $14, $21 # L0 hi add => carry
+ addq $8, $20, $8 # U0 hi mul + carry
+ ldq $1, 8($17) # L1
+ umulh $19, $2, $14 # U1
+ addq $4, $9, $4 # L0 lo + acc
+ stq $22, -48($16) # L0
+ stq $23, -40($16) # L1
+ mulq $19, $3, $15 # U1
+ addq $8, $21, $8 # U0 hi mul + carry
+ cmpult $4, $9, $20 # L0 lo add => carry
+ addq $4, $8, $22 # U0 hi add => answer
+ ble $18, $Lend # U1 bookkeeping
+
+ # ____ MAIN UNROLLED LOOP ____
+ .align 4
+$Loop:
+ bis $31, $31, $31 # U1 mt
+ cmpult $22, $8, $21 # L0 hi add => carry
+ addq $10, $20, $10 # U0 hi mul + carry
+ ldq $4, 0($16) # L1
+
+ bis $31, $31, $31 # U1 mt
+ addq $5, $11, $23 # L0 lo + acc
+ addq $10, $21, $10 # L0 hi mul + carry
+ ldq $5, 8($16) # L1
+
+ umulh $19, $3, $8 # U1
+ cmpult $23, $11, $20 # L0 lo add => carry
+ addq $23, $10, $23 # U0 hi add => answer
+ ldq $2, 16($17) # L1
+
+ mulq $19, $0, $9 # U1
+ cmpult $23, $10, $21 # L0 hi add => carry
+ addq $12, $20, $12 # U0 hi mul + carry
+ ldq $3, 24($17) # L1
+
+ umulh $19, $0, $10 # U1
+ addq $6, $13, $6 # L0 lo + acc
+ stq $22, -32($16) # L0
+ stq $23, -24($16) # L1
+
+ bis $31, $31, $31 # L0 st slosh
+ mulq $19, $1, $11 # U1
+ bis $31, $31, $31 # L1 st slosh
+ addq $12, $21, $12 # U0 hi mul + carry
+
+ cmpult $6, $13, $20 # L0 lo add => carry
+ bis $31, $31, $31 # U1 mt
+ lda $18, -1($18) # L1 bookkeeping
+ addq $6, $12, $22 # U0 hi add => answer
+
+ bis $31, $31, $31 # U1 mt
+ cmpult $22, $12, $21 # L0 hi add => carry
+ addq $14, $20, $14 # U0 hi mul + carry
+ ldq $6, 16($16) # L1
+
+ bis $31, $31, $31 # U1 mt
+ addq $7, $15, $23 # L0 lo + acc
+ addq $14, $21, $14 # U0 hi mul + carry
+ ldq $7, 24($16) # L1
+
+ umulh $19, $1, $12 # U1
+ cmpult $23, $15, $20 # L0 lo add => carry
+ addq $23, $14, $23 # U0 hi add => answer
+ ldq $0, 32($17) # L1
+
+ mulq $19, $2, $13 # U1
+ cmpult $23, $14, $21 # L0 hi add => carry
+ addq $8, $20, $8 # U0 hi mul + carry
+ ldq $1, 40($17) # L1
+
+ umulh $19, $2, $14 # U1
+ addq $4, $9, $4 # U0 lo + acc
+ stq $22, -16($16) # L0
+ stq $23, -8($16) # L1
+
+ bis $31, $31, $31 # L0 st slosh
+ mulq $19, $3, $15 # U1
+ bis $31, $31, $31 # L1 st slosh
+ addq $8, $21, $8 # L0 hi mul + carry
+
+ cmpult $4, $9, $20 # L0 lo add => carry
+ bis $31, $31, $31 # U1 mt
+ lda $17, 64($17) # L1 bookkeeping
+ addq $4, $8, $22 # U0 hi add => answer
+
+ bis $31, $31, $31 # U1 mt
+ cmpult $22, $8, $21 # L0 hi add => carry
+ addq $10, $20, $10 # U0 hi mul + carry
+ ldq $4, 32($16) # L1
+
+ bis $31, $31, $31 # U1 mt
+ addq $5, $11, $23 # L0 lo + acc
+ addq $10, $21, $10 # L0 hi mul + carry
+ ldq $5, 40($16) # L1
+
+ umulh $19, $3, $8 # U1
+ cmpult $23, $11, $20 # L0 lo add => carry
+ addq $23, $10, $23 # U0 hi add => answer
+ ldq $2, -16($17) # L1
+
+ mulq $19, $0, $9 # U1
+ cmpult $23, $10, $21 # L0 hi add => carry
+ addq $12, $20, $12 # U0 hi mul + carry
+ ldq $3, -8($17) # L1
+
+ umulh $19, $0, $10 # U1
+ addq $6, $13, $6 # L0 lo + acc
+ stq $22, 0($16) # L0
+ stq $23, 8($16) # L1
+
+ bis $31, $31, $31 # L0 st slosh
+ mulq $19, $1, $11 # U1
+ bis $31, $31, $31 # L1 st slosh
+ addq $12, $21, $12 # U0 hi mul + carry
+
+ cmpult $6, $13, $20 # L0 lo add => carry
+ bis $31, $31, $31 # U1 mt
+ lda $16, 64($16) # L1 bookkeeping
+ addq $6, $12, $22 # U0 hi add => answer
+
+ bis $31, $31, $31 # U1 mt
+ cmpult $22, $12, $21 # L0 hi add => carry
+ addq $14, $20, $14 # U0 hi mul + carry
+ ldq $6, -16($16) # L1
+
+ bis $31, $31, $31 # U1 mt
+ addq $7, $15, $23 # L0 lo + acc
+ addq $14, $21, $14 # U0 hi mul + carry
+ ldq $7, -8($16) # L1
+
+ umulh $19, $1, $12 # U1
+ cmpult $23, $15, $20 # L0 lo add => carry
+ addq $23, $14, $23 # U0 hi add => answer
+ ldq $0, 0($17) # L1
+
+ mulq $19, $2, $13 # U1
+ cmpult $23, $14, $21 # L0 hi add => carry
+ addq $8, $20, $8 # U0 hi mul + carry
+ ldq $1, 8($17) # L1
+
+ umulh $19, $2, $14 # U1
+ addq $4, $9, $4 # L0 lo + acc
+ stq $22, -48($16) # L0
+ stq $23, -40($16) # L1
+
+ bis $31, $31, $31 # L0 st slosh
+ mulq $19, $3, $15 # U1
+ bis $31, $31, $31 # L1 st slosh
+ addq $8, $21, $8 # U0 hi mul + carry
+
+ cmpult $4, $9, $20 # L0 lo add => carry
+ addq $4, $8, $22 # U0 hi add => answer
+ bis $31, $31, $31 # L1 mt
+ bgt $18, $Loop # U1 bookkeeping
+
+# ____ UNROLLED LOOP SOFTWARE PIPELINE FINISH ____
+$Lend:
+ cmpult $22, $8, $21 # L0 hi add => carry
+ addq $10, $20, $10 # U0 hi mul + carry
+ ldq $4, 0($16) # L1
+ addq $5, $11, $23 # L0 lo + acc
+ addq $10, $21, $10 # L0 hi mul + carry
+ ldq $5, 8($16) # L1
+ umulh $19, $3, $8 # U1
+ cmpult $23, $11, $20 # L0 lo add => carry
+ addq $23, $10, $23 # U0 hi add => answer
+ mulq $19, $0, $9 # U1
+ cmpult $23, $10, $21 # L0 hi add => carry
+ addq $12, $20, $12 # U0 hi mul + carry
+ umulh $19, $0, $10 # U1
+ addq $6, $13, $6 # L0 lo + acc
+ stq $22, -32($16) # L0
+ stq $23, -24($16) # L1
+ mulq $19, $1, $11 # U1
+ addq $12, $21, $12 # U0 hi mul + carry
+ cmpult $6, $13, $20 # L0 lo add => carry
+ addq $6, $12, $22 # U0 hi add => answer
+ cmpult $22, $12, $21 # L0 hi add => carry
+ addq $14, $20, $14 # U0 hi mul + carry
+ addq $7, $15, $23 # L0 lo + acc
+ addq $14, $21, $14 # U0 hi mul + carry
+ umulh $19, $1, $12 # U1
+ cmpult $23, $15, $20 # L0 lo add => carry
+ addq $23, $14, $23 # U0 hi add => answer
+ cmpult $23, $14, $21 # L0 hi add => carry
+ addq $8, $20, $8 # U0 hi mul + carry
+ addq $4, $9, $4 # U0 lo + acc
+ stq $22, -16($16) # L0
+ stq $23, -8($16) # L1
+ bis $31, $31, $31 # L0 st slosh
+ addq $8, $21, $8 # L0 hi mul + carry
+ cmpult $4, $9, $20 # L0 lo add => carry
+ addq $4, $8, $22 # U0 hi add => answer
+ cmpult $22, $8, $21 # L0 hi add => carry
+ addq $10, $20, $10 # U0 hi mul + carry
+ addq $5, $11, $23 # L0 lo + acc
+ addq $10, $21, $10 # L0 hi mul + carry
+ cmpult $23, $11, $20 # L0 lo add => carry
+ addq $23, $10, $23 # U0 hi add => answer
+ cmpult $23, $10, $21 # L0 hi add => carry
+ addq $12, $20, $12 # U0 hi mul + carry
+ stq $22, 0($16) # L0
+ stq $23, 8($16) # L1
+ addq $12, $21, $0 # U0 hi mul + carry
+
+ ldq $9, 8($30)
+ ldq $10, 16($30)
+ ldq $11, 24($30)
+ ldq $12, 32($30)
+ ldq $13, 40($30)
+ ldq $14, 48($30)
+ ldq $15, 56($30)
+ lda $30, 240($30)
+ ret $31, ($26), 1
+
+ .end __mpn_addmul_1
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrt.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrt.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrt.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,45 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .arch ev6
+ .set noreorder
+ .set noat
+
+ENTRY(__ieee754_sqrt)
+#ifdef PROF
+ ldgp gp, 0(pv)
+ lda AT, _mcount
+ jsr AT, (AT), _mcount
+ .prologue 1
+#else
+ .prologue 0
+#endif
+
+ .align 4
+#ifdef _IEEE_FP_INEXACT
+ sqrtt/sui $f16, $f0
+#else
+ sqrtt/su $f16, $f0
+#endif
+ ret
+ nop
+ nop
+
+END(__ieee754_sqrt)
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/fpu/e_sqrtf.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,45 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .arch ev6
+ .set noreorder
+ .set noat
+
+ENTRY(__ieee754_sqrtf)
+#ifdef PROF
+ ldgp gp, 0(pv)
+ lda AT, _mcount
+ jsr AT, (AT), _mcount
+ .prologue 1
+#else
+ .prologue 0
+#endif
+
+ .align 4
+#ifdef _IEEE_FP_INEXACT
+ sqrts/sui $f16, $f0
+#else
+ sqrts/su $f16, $f0
+#endif
+ ret
+ nop
+ nop
+
+END(__ieee754_sqrtf)
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/memchr.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/memchr.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/memchr.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,193 @@
+/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by David Mosberger (davidm@xxxxxxxxxxxxxx).
+ EV6 optimized by Rick Gorton <rick.gorton@xxxxxxxxxxxxxxxxxxx>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .arch ev6
+ .set noreorder
+ .set noat
+
+ENTRY(__memchr)
+#ifdef PROF
+ ldgp gp, 0(pv)
+ lda AT, _mcount
+ jsr AT, (AT), _mcount
+ .prologue 1
+#else
+ .prologue 0
+#endif
+
+ # Hack -- if someone passes in (size_t)-1, hoping to just
+ # search til the end of the address space, we will overflow
+ # below when we find the address of the last byte. Given
+ # that we will never have a 56-bit address space, cropping
+ # the length is the easiest way to avoid trouble.
+ zap $18, 0x80, $5 # U : Bound length
+ beq $18, $not_found # U :
+ ldq_u $1, 0($16) # L : load first quadword Latency=3
+ and $17, 0xff, $17 # E : L L U U : 00000000000000ch
+
+ insbl $17, 1, $2 # U : 000000000000ch00
+ cmpult $18, 9, $4 # E : small (< 1 quad) string?
+ or $2, $17, $17 # E : 000000000000chch
+ lda $3, -1($31) # E : U L L U
+
+ sll $17, 16, $2 # U : 00000000chch0000
+ addq $16, $5, $5 # E : Max search address
+ or $2, $17, $17 # E : 00000000chchchch
+ sll $17, 32, $2 # U : U L L U : chchchch00000000
+
+ or $2, $17, $17 # E : chchchchchchchch
+ extql $1, $16, $7 # U : $7 is upper bits
+ beq $4, $first_quad # U :
+ ldq_u $6, -1($5) # L : L U U L : eight or less bytes to search Latency=3
+
+ extqh $6, $16, $6 # U : 2 cycle stall for $6
+ mov $16, $0 # E :
+ nop # E :
+ or $7, $6, $1 # E : L U L U $1 = quadword starting at $16
+
+ # Deal with the case where at most 8 bytes remain to be searched
+ # in $1. E.g.:
+ # $18 = 6
+ # $1 = ????c6c5c4c3c2c1
+$last_quad:
+ negq $18, $6 # E :
+ xor $17, $1, $1 # E :
+ srl $3, $6, $6 # U : $6 = mask of $18 bits set
+ cmpbge $31, $1, $2 # E : L U L U
+
+ nop
+ nop
+ and $2, $6, $2 # E :
+ beq $2, $not_found # U : U L U L
+
+$found_it:
+#if defined(__alpha_fix__) && defined(__alpha_cix__)
+ /*
+ * Since we are guaranteed to have set one of the bits, we don't
+ * have to worry about coming back with a 0x40 out of cttz...
+ */
+ cttz $2, $3 # U0 :
+ addq $0, $3, $0 # E : All done
+ nop # E :
+ ret # L0 : L U L U
+#else
+ /*
+ * Slow and clunky. It can probably be improved.
+ * An exercise left for others.
+ */
+ negq $2, $3 # E :
+ and $2, $3, $2 # E :
+ and $2, 0x0f, $1 # E :
+ addq $0, 4, $3 # E :
+
+ cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
+ nop # E : keep with cmov
+ and $2, 0x33, $1 # E :
+ addq $0, 2, $3 # E : U L U L : 2 cycle stall on $0
+
+ cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
+ nop # E : keep with cmov
+ and $2, 0x55, $1 # E :
+ addq $0, 1, $3 # E : U L U L : 2 cycle stall on $0
+
+ cmoveq $1, $3, $0 # E : Latency 2, extra map cycle
+ nop
+ nop
+ ret # L0 : L U L U
+#endif
+
+ # Deal with the case where $18 > 8 bytes remain to be
+ # searched. $16 may not be aligned.
+ .align 4
+$first_quad:
+ andnot $16, 0x7, $0 # E :
+ insqh $3, $16, $2 # U : $2 = 0000ffffffffffff ($16<0:2> ff)
+ xor $1, $17, $1 # E :
+ or $1, $2, $1 # E : U L U L $1 = ====ffffffffffff
+
+ cmpbge $31, $1, $2 # E :
+ bne $2, $found_it # U :
+ # At least one byte left to process.
+ ldq $1, 8($0) # L :
+ subq $5, 1, $18 # E : U L U L
+
+ addq $0, 8, $0 # E :
+ # Make $18 point to last quad to be accessed (the
+ # last quad may or may not be partial).
+ andnot $18, 0x7, $18 # E :
+ cmpult $0, $18, $2 # E :
+ beq $2, $final # U : U L U L
+
+ # At least two quads remain to be accessed.
+
+ subq $18, $0, $4 # E : $4 <- nr quads to be processed
+ and $4, 8, $4 # E : odd number of quads?
+ bne $4, $odd_quad_count # U :
+ # At least three quads remain to be accessed
+ mov $1, $4 # E : L U L U : move prefetched value to correct reg
+
+ .align 4
+$unrolled_loop:
+ ldq $1, 8($0) # L : prefetch $1
+ xor $17, $4, $2 # E :
+ cmpbge $31, $2, $2 # E :
+ bne $2, $found_it # U : U L U L
+
+ addq $0, 8, $0 # E :
+ nop # E :
+ nop # E :
+ nop # E :
+
+$odd_quad_count:
+ xor $17, $1, $2 # E :
+ ldq $4, 8($0) # L : prefetch $4
+ cmpbge $31, $2, $2 # E :
+ addq $0, 8, $6 # E :
+
+ bne $2, $found_it # U :
+ cmpult $6, $18, $6 # E :
+ addq $0, 8, $0 # E :
+ nop # E :
+
+ bne $6, $unrolled_loop # U :
+ mov $4, $1 # E : move prefetched value into $1
+ nop # E :
+ nop # E :
+
+$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do
+ nop # E :
+ nop # E :
+ bne $18, $last_quad # U :
+
+$not_found:
+ mov $31, $0 # E :
+ nop # E :
+ nop # E :
+ ret # L0 :
+
+ END(__memchr)
+
+weak_alias (__memchr, memchr)
+#if !__BOUNDED_POINTERS__
+weak_alias (__memchr, __ubp_memchr)
+#endif
+libc_hidden_builtin_def (memchr)
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/memcpy.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/memcpy.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/memcpy.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,256 @@
+/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ EV6 optimized by Rick Gorton <rick.gorton@xxxxxxxxxxxxxxxxxxx>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/*
+ * Much of the information about 21264 scheduling/coding comes from:
+ * Compiler Writer's Guide for the Alpha 21264
+ * abbreviated as 'CWG' in other comments here
+ * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
+ * Scheduling notation:
+ * E - either cluster
+ * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
+ * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
+ *
+ * Temp usage notes:
+ * $0 - destination address
+ * $1,$2, - scratch
+ */
+
+#include <sysdep.h>
+
+ .arch ev6
+ .set noreorder
+ .set noat
+
+ENTRY(memcpy)
+ .prologue 0
+
+ mov $16, $0 # E : copy dest to return
+ ble $18, $nomoredata # U : done with the copy?
+ xor $16, $17, $1 # E : are source and dest alignments the same?
+ and $1, 7, $1 # E : are they the same mod 8?
+
+ bne $1, $misaligned # U : Nope - gotta do this the slow way
+ /* source and dest are same mod 8 address */
+ and $16, 7, $1 # E : Are both 0mod8?
+ beq $1, $both_0mod8 # U : Yes
+ nop # E :
+
+ /*
+ * source and dest are same misalignment. move a byte at a time
+ * until a 0mod8 alignment for both is reached.
+ * At least one byte more to move
+ */
+
+$head_align:
+ ldbu $1, 0($17) # L : grab a byte
+ subq $18, 1, $18 # E : count--
+ addq $17, 1, $17 # E : src++
+ stb $1, 0($16) # L :
+ addq $16, 1, $16 # E : dest++
+ and $16, 7, $1 # E : Are we at 0mod8 yet?
+ ble $18, $nomoredata # U : done with the copy?
+ bne $1, $head_align # U :
+
+$both_0mod8:
+ cmple $18, 127, $1 # E : Can we unroll the loop?
+ bne $1, $no_unroll # U :
+ and $16, 63, $1 # E : get mod64 alignment
+ beq $1, $do_unroll # U : no single quads to fiddle
+
+$single_head_quad:
+ ldq $1, 0($17) # L : get 8 bytes
+ subq $18, 8, $18 # E : count -= 8
+ addq $17, 8, $17 # E : src += 8
+ nop # E :
+
+ stq $1, 0($16) # L : store
+ addq $16, 8, $16 # E : dest += 8
+ and $16, 63, $1 # E : get mod64 alignment
+ bne $1, $single_head_quad # U : still not fully aligned
+
+$do_unroll:
+ addq $16, 64, $7 # E : Initial (+1 trip) wh64 address
+ cmple $18, 127, $1 # E : Can we go through the unrolled loop?
+ bne $1, $tail_quads # U : Nope
+ nop # E :
+
+$unroll_body:
+ wh64 ($7) # L1 : memory subsystem hint: 64 bytes at
+ # ($7) are about to be over-written
+ ldq $6, 0($17) # L0 : bytes 0..7
+ nop # E :
+ nop # E :
+
+ ldq $4, 8($17) # L : bytes 8..15
+ ldq $5, 16($17) # L : bytes 16..23
+ addq $7, 64, $7 # E : Update next wh64 address
+ nop # E :
+
+ ldq $3, 24($17) # L : bytes 24..31
+ addq $16, 64, $1 # E : fallback value for wh64
+ nop # E :
+ nop # E :
+
+ addq $17, 32, $17 # E : src += 32 bytes
+ stq $6, 0($16) # L : bytes 0..7
+ nop # E :
+ nop # E :
+
+ stq $4, 8($16) # L : bytes 8..15
+ stq $5, 16($16) # L : bytes 16..23
+ subq $18, 192, $2 # E : At least two more trips to go?
+ nop # E :
+
+ stq $3, 24($16) # L : bytes 24..31
+ addq $16, 32, $16 # E : dest += 32 bytes
+ nop # E :
+ nop # E :
+
+ ldq $6, 0($17) # L : bytes 0..7
+ ldq $4, 8($17) # L : bytes 8..15
+ cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use
+ # fallback wh64 address if < 2 more trips
+ nop # E :
+
+ ldq $5, 16($17) # L : bytes 16..23
+ ldq $3, 24($17) # L : bytes 24..31
+ addq $16, 32, $16 # E : dest += 32
+ subq $18, 64, $18 # E : count -= 64
+
+ addq $17, 32, $17 # E : src += 32
+ stq $6, -32($16) # L : bytes 0..7
+ stq $4, -24($16) # L : bytes 8..15
+ cmple $18, 63, $1 # E : At least one more trip?
+
+ stq $5, -16($16) # L : bytes 16..23
+ stq $3, -8($16) # L : bytes 24..31
+ nop # E :
+ beq $1, $unroll_body
+
+$tail_quads:
+$no_unroll:
+ .align 4
+ subq $18, 8, $18 # E : At least a quad left?
+ blt $18, $less_than_8 # U : Nope
+ nop # E :
+ nop # E :
+
+$move_a_quad:
+ ldq $1, 0($17) # L : fetch 8
+ subq $18, 8, $18 # E : count -= 8
+ addq $17, 8, $17 # E : src += 8
+ nop # E :
+
+ stq $1, 0($16) # L : store 8
+ addq $16, 8, $16 # E : dest += 8
+ bge $18, $move_a_quad # U :
+ nop # E :
+
+$less_than_8:
+ .align 4
+ addq $18, 8, $18 # E : add back for trailing bytes
+ ble $18, $nomoredata # U : All-done
+ nop # E :
+ nop # E :
+
+ /* Trailing bytes */
+$tail_bytes:
+ subq $18, 1, $18 # E : count--
+ ldbu $1, 0($17) # L : fetch a byte
+ addq $17, 1, $17 # E : src++
+ nop # E :
+
+ stb $1, 0($16) # L : store a byte
+ addq $16, 1, $16 # E : dest++
+ bgt $18, $tail_bytes # U : more to be done?
+ nop # E :
+
+ /* branching to exit takes 3 extra cycles, so replicate exit here */
+ ret $31, ($26), 1 # L0 :
+ nop # E :
+ nop # E :
+ nop # E :
+
+$misaligned:
+ mov $0, $4 # E : dest temp
+ and $0, 7, $1 # E : dest alignment mod8
+ beq $1, $dest_0mod8 # U : life doesnt totally suck
+ nop
+
+$aligndest:
+ ble $18, $nomoredata # U :
+ ldbu $1, 0($17) # L : fetch a byte
+ subq $18, 1, $18 # E : count--
+ addq $17, 1, $17 # E : src++
+
+ stb $1, 0($4) # L : store it
+ addq $4, 1, $4 # E : dest++
+ and $4, 7, $1 # E : dest 0mod8 yet?
+ bne $1, $aligndest # U : go until we are aligned.
+
+ /* Source has unknown alignment, but dest is known to be 0mod8 */
+$dest_0mod8:
+ subq $18, 8, $18 # E : At least a quad left?
+ blt $18, $misalign_tail # U : Nope
+ ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes
+ nop # E :
+
+$mis_quad:
+ ldq_u $16, 8($17) # L : Fetch next 8
+ extql $3, $17, $3 # U : masking
+ extqh $16, $17, $1 # U : masking
+ bis $3, $1, $1 # E : merged bytes to store
+
+ subq $18, 8, $18 # E : count -= 8
+ addq $17, 8, $17 # E : src += 8
+ stq $1, 0($4) # L : store 8 (aligned)
+ mov $16, $3 # E : "rotate" source data
+
+ addq $4, 8, $4 # E : dest += 8
+ bge $18, $mis_quad # U : More quads to move
+ nop
+ nop
+
+$misalign_tail:
+ addq $18, 8, $18 # E : account for tail stuff
+ ble $18, $nomoredata # U :
+ nop
+ nop
+
+$misalign_byte:
+ ldbu $1, 0($17) # L : fetch 1
+ subq $18, 1, $18 # E : count--
+ addq $17, 1, $17 # E : src++
+ nop # E :
+
+ stb $1, 0($4) # L : store
+ addq $4, 1, $4 # E : dest++
+ bgt $18, $misalign_byte # U : more to go?
+ nop
+
+
+$nomoredata:
+ ret $31, ($26), 1 # L0 :
+ nop # E :
+ nop # E :
+ nop # E :
+
+END(memcpy)
+libc_hidden_builtin_def (memcpy)
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/memset.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/memset.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/memset.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,224 @@
+/* Copyright (C) 2000, 2003 Free Software Foundation, Inc.
+ Contributed by Richard Henderson (rth@xxxxxxxx)
+ EV6 optimized by Rick Gorton <rick.gorton@xxxxxxxxxxxxxxxxxxx>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <sysdep.h>
+
+ .arch ev6
+ .set noat
+ .set noreorder
+
+ENTRY(memset)
+#ifdef PROF
+ ldgp gp, 0(pv)
+ lda AT, _mcount
+ jsr AT, (AT), _mcount
+ .prologue 1
+#else
+ .prologue 0
+#endif
+
+ /*
+ * Serious stalling happens. The only way to mitigate this is to
+ * undertake a major re-write to interleave the constant materialization
+ * with other parts of the fall-through code. This is important, even
+ * though it makes maintenance tougher.
+ * Do this later.
+ */
+ and $17, 255, $1 # E : 00000000000000ch
+ insbl $17, 1, $2 # U : 000000000000ch00
+ mov $16, $0 # E : return value
+ ble $18, $end # U : zero length requested?
+
+ addq $18, $16, $6 # E : max address to write to
+ or $1, $2, $17 # E : 000000000000chch
+ insbl $1, 2, $3 # U : 0000000000ch0000
+ insbl $1, 3, $4 # U : 00000000ch000000
+
+ or $3, $4, $3 # E : 00000000chch0000
+ inswl $17, 4, $5 # U : 0000chch00000000
+ xor $16, $6, $1 # E : will complete write be within one quadword?
+ inswl $17, 6, $2 # U : chch000000000000
+
+ or $17, $3, $17 # E : 00000000chchchch
+ or $2, $5, $2 # E : chchchch00000000
+ bic $1, 7, $1 # E : fit within a single quadword?
+ and $16, 7, $3 # E : Target addr misalignment
+
+ or $17, $2, $17 # E : chchchchchchchch
+ beq $1, $within_quad # U :
+ nop # E :
+ beq $3, $aligned # U : target is 0mod8
+
+ /*
+ * Target address is misaligned, and won't fit within a quadword.
+ */
+ ldq_u $4, 0($16) # L : Fetch first partial
+ mov $16, $5 # E : Save the address
+ insql $17, $16, $2 # U : Insert new bytes
+ subq $3, 8, $3 # E : Invert (for addressing uses)
+
+ addq $18, $3, $18 # E : $18 is new count ($3 is negative)
+ mskql $4, $16, $4 # U : clear relevant parts of the quad
+ subq $16, $3, $16 # E : $16 is new aligned destination
+ or $2, $4, $1 # E : Final bytes
+
+ nop
+ stq_u $1,0($5) # L : Store result
+ nop
+ nop
+
+ .align 4
+$aligned:
+ /*
+ * We are now guaranteed to be quad aligned, with at least
+ * one partial quad to write.
+ */
+
+ sra $18, 3, $3 # U : Number of remaining quads to write
+ and $18, 7, $18 # E : Number of trailing bytes to write
+ mov $16, $5 # E : Save dest address
+ beq $3, $no_quad # U : tail stuff only
+
+ /*
+ * It's worth the effort to unroll this and use wh64 if possible.
+ * At this point, entry values are:
+ * $16 Current destination address
+ * $5 A copy of $16
+ * $6 The max quadword address to write to
+ * $18 Number trailer bytes
+ * $3 Number quads to write
+ */
+
+ and $16, 0x3f, $2 # E : Forward work (only useful for unrolled loop)
+ subq $3, 16, $4 # E : Only try to unroll if > 128 bytes
+ subq $2, 0x40, $1 # E : bias counter (aligning stuff 0mod64)
+ blt $4, $loop # U :
+
+ /*
+ * We know we've got at least 16 quads, minimum of one trip
+ * through unrolled loop. Do a quad at a time to get us 0mod64
+ * aligned.
+ */
+
+ nop # E :
+ nop # E :
+ nop # E :
+ beq $1, $bigalign # U :
+
+$alignmod64:
+ stq $17, 0($5) # L :
+ subq $3, 1, $3 # E : For consistency later
+ addq $1, 8, $1 # E : Increment towards zero for alignment
+ addq $5, 8, $4 # E : Initial wh64 address (filler instruction)
+
+ nop
+ nop
+ addq $5, 8, $5 # E : Inc address
+ blt $1, $alignmod64 # U :
+
+$bigalign:
+ /*
+ * $3 - number quads left to go
+ * $5 - target address (aligned 0mod64)
+ * $17 - mask of stuff to store
+ * Scratch registers available: $7, $2, $4, $1
+ * We know that we'll be taking a minimum of one trip through.
+ * CWG Section 3.7.6: do not expect a sustained store rate of > 1/cycle
+ * Assumes the wh64 needs to be for 2 trips through the loop in the future.
+ * The wh64 is issued on for the starting destination address for trip +2
+ * through the loop, and if there are less than two trips left, the target
+ * address will be for the current trip.
+ */
+
+$do_wh64:
+ wh64 ($4) # L1 : memory subsystem write hint
+ subq $3, 24, $2 # E : For determining future wh64 addresses
+ stq $17, 0($5) # L :
+ nop # E :
+
+ addq $5, 128, $4 # E : speculative target of next wh64
+ stq $17, 8($5) # L :
+ stq $17, 16($5) # L :
+ addq $5, 64, $7 # E : Fallback address for wh64 (== next trip addr)
+
+ stq $17, 24($5) # L :
+ stq $17, 32($5) # L :
+ cmovlt $2, $7, $4 # E : Latency 2, extra mapping cycle
+ nop
+
+ stq $17, 40($5) # L :
+ stq $17, 48($5) # L :
+ subq $3, 16, $2 # E : Repeat the loop at least once more?
+ nop
+
+ stq $17, 56($5) # L :
+ addq $5, 64, $5 # E :
+ subq $3, 8, $3 # E :
+ bge $2, $do_wh64 # U :
+
+ nop
+ nop
+ nop
+ beq $3, $no_quad # U : Might have finished already
+
+ .align 4
+ /*
+ * Simple loop for trailing quadwords, or for small amounts
+ * of data (where we can't use an unrolled loop and wh64)
+ */
+$loop:
+ stq $17, 0($5) # L :
+ subq $3, 1, $3 # E : Decrement number quads left
+ addq $5, 8, $5 # E : Inc address
+ bne $3, $loop # U : more?
+
+$no_quad:
+ /*
+ * Write 0..7 trailing bytes.
+ */
+ nop # E :
+ beq $18, $end # U : All done?
+ ldq $7, 0($5) # L :
+ mskqh $7, $6, $2 # U : Mask final quad
+
+ insqh $17, $6, $4 # U : New bits
+ or $2, $4, $1 # E : Put it all together
+ stq $1, 0($5) # L : And back to memory
+ ret $31,($26),1 # L0 :
+
+$within_quad:
+ ldq_u $1, 0($16) # L :
+ insql $17, $16, $2 # U : New bits
+ mskql $1, $16, $4 # U : Clear old
+ or $2, $4, $2 # E : New result
+
+ mskql $2, $6, $4 # U :
+ mskqh $1, $6, $2 # U :
+ or $2, $4, $1 # E :
+ stq_u $1, 0($16) # L :
+
+$end:
+ nop
+ nop
+ nop
+ ret $31,($26),1 # L0 :
+
+ END(memset)
+libc_hidden_builtin_def (memset)
Added: fsf/trunk/ports/sysdeps/alpha/alphaev6/stxcpy.S
==============================================================================
--- fsf/trunk/ports/sysdeps/alpha/alphaev6/stxcpy.S (added)
+++ fsf/trunk/ports/sysdeps/alpha/alphaev6/stxcpy.S Wed Nov 26 00:02:02 2008
@@ -1,0 +1,328 @@
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+ Contributed by Richard Henderson (rth@xxxxxxxx)
+ EV6 optimized by Rick Gorton <rick.gorton@xxxxxxxxxxxxxxxxxxx>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Copy a null-terminated string from SRC to DST.
+
+ This is an internal routine used by strcpy, stpcpy, and strcat.
+ As such, it uses special linkage conventions to make implementation
+ of these public functions more efficient.
+
+ On input:
+ t9 = return address
+ a0 = DST
+ a1 = SRC
+
+ On output:
+ t8 = bitmask (with one bit set) indicating the last byte written
+ a0 = unaligned address of the last *word* written
+
+ Furthermore, v0, a3-a5, t11, and t12 are untouched.
+*/
+
+
+#include <sysdep.h>
+
+ .arch ev6
+ .set noat
+ .set noreorder
+ .text
+
+/* There is a problem with either gdb (as of 4.16) or gas (as of 2.7) that
+ doesn't like putting the entry point for a procedure somewhere in the
+ middle of the procedure descriptor. Work around this by putting the
+ aligned copy in its own procedure descriptor */
+
+
+ .ent stxcpy_aligned
+ .align 4
+stxcpy_aligned:
+ .frame sp, 0, t9
+ .prologue 0
+
+ /* On entry to this basic block:
+ t0 == the first destination word for masking back in
+ t1 == the first source word. */
+
+ /* Create the 1st output word and detect 0's in the 1st input word. */
+ lda t2, -1 # E : build a mask against false zero
+ mskqh t2, a1, t2 # U : detection in the src word (stall)
+ mskqh t1, a1, t3 # U :
+ ornot t1, t2, t2 # E : (stall)
+
+ mskql t0, a1, t0 # U : assemble the first output word
+ cmpbge zero, t2, t10 # E : bits set iff null found
+ or t0, t3, t1 # E : (stall)
+ bne t10, $a_eos # U : (stall)
+
+ /* On entry to this basic block:
+ t0 == the first destination word for masking back in
+ t1 == a source word not containing a null. */
+ /* Nops here to separate store quads from load quads */
+
+$a_loop:
+ stq_u t1, 0(a0) # L :
+ addq a0, 8, a0 # E :
+ nop
+ nop
+
+ ldq_u t1, 0(a1) # L : Latency=3
+ addq a1, 8, a1 # E :
+ cmpbge zero, t1, t10 # E : (3 cycle stall)
+ beq t10, $a_loop # U : (stall for t10)
+
+ /* Take care of the final (partial) word store.
+ On entry to this basic block we have:
+ t1 == the source word containing the null
+ t10 == the cmpbge mask that found it. */
+$a_eos:
+ negq t10, t6 # E : find low bit set
+ and t10, t6, t8 # E : (stall)
+ /* For the sake of the cache, don't read a destination word
+ if we're not going to need it. */
+ and t8, 0x80, t6 # E : (stall)
+ bne t6, 1f # U : (stall)
+
+ /* We're doing a partial word store and so need to combine
+ our source and original destination words. */
+ ldq_u t0, 0(a0) # L : Latency=3
+ subq t8, 1, t6 # E :
+ zapnot t1, t6, t1 # U : clear src bytes >= null (stall)
+ or t8, t6, t10 # E : (stall)
+
+ zap t0, t10, t0 # E : clear dst bytes <= null
+ or t0, t1, t1 # E : (stall)
+ nop
+ nop
+
+1: stq_u t1, 0(a0) # L :
+ ret (t9) # L0 : Latency=3
+ nop
+ nop
+
+ .end stxcpy_aligned
+
+ .align 4
+ .ent __stxcpy
+ .globl __stxcpy
+__stxcpy:
+ .frame sp, 0, t9
+ .prologue 0
+
+ /* Are source and destination co-aligned? */
+ xor a0, a1, t0 # E :
+ unop # E :
+ and t0, 7, t0 # E : (stall)
+ bne t0, $unaligned # U : (stall)
+
+ /* We are co-aligned; take care of a partial first word. */
+ ldq_u t1, 0(a1) # L : load first src word
+ and a0, 7, t0 # E : take care not to load a word ...
+ addq a1, 8, a1 # E :
+ beq t0, stxcpy_aligned # U : ... if we wont need it (stall)
+
+ ldq_u t0, 0(a0) # L :
+ br stxcpy_aligned # L0 : Latency=3
+ nop
+ nop
+
+
+/* The source and destination are not co-aligned. Align the destination
+ and cope. We have to be very careful about not reading too much and
+ causing a SEGV. */
+
+ .align 4
+$u_head:
+ /* We know just enough now to be able to assemble the first
+ full source word. We can still find a zero at the end of it
+ that prevents us from outputting the whole thing.
+
+ On entry to this basic block:
+ t0 == the first dest word, for masking back in, if needed else 0
+ t1 == the low bits of the first source word
+ t6 == bytemask that is -1 in dest word bytes */
+
+ ldq_u t2, 8(a1) # L :
+ addq a1, 8, a1 # E :
+ extql t1, a1, t1 # U : (stall on a1)
+ extqh t2, a1, t4 # U : (stall on a1)
+
+ mskql t0, a0, t0 # U :
+ or t1, t4, t1 # E :
+ mskqh t1, a0, t1 # U : (stall on t1)
+ or t0, t1, t1 # E : (stall on t1)
+
+ or t1, t6, t6 # E :
+ cmpbge zero, t6, t10 # E : (stall)
+ lda t6, -1 # E : for masking just below
+ bne t10, $u_final # U : (stall)
+
+ mskql t6, a1, t6 # U : mask out the bits we have
+ or t6, t2, t2 # E : already extracted before (stall)
+ cmpbge zero, t2, t10 # E : testing eos (stall)
+ bne t10, $u_late_head_exit # U : (stall)
+
+ /* Finally, we've got all the stupid leading edge cases taken care
+ of and we can set up to enter the main loop. */
+
+ stq_u t1, 0(a0) # L : store first output word
+ addq a0, 8, a0 # E :
+ extql t2, a1, t0 # U : position ho-bits of lo word
+ ldq_u t2, 8(a1) # U : read next high-order source word
+
+ addq a1, 8, a1 # E :
+ cmpbge zero, t2, t10 # E : (stall for t2)
+ nop # E :
+ bne t10, $u_eos # U : (stall)
+
+ /* Unaligned copy main loop. In order to avoid reading too much,
+ the loop is structured to detect zeros in aligned source words.
+ This has, unfortunately, effectively pulled half of a loop
+ iteration out into the head and half into the tail, but it does
+ prevent nastiness from accumulating in the very thing we want
+ to run as fast as possible.
+
+ On entry to this basic block:
+ t0 == the shifted high-order bits from the previous source word
[... 28821 lines stripped ...]