diff -Niru yafu-1.28.5/Makefile yafu-1.28.5-edit/Makefile --- yafu-1.28.5/Makefile 2011-09-07 12:16:12.000000000 +0200 +++ yafu-1.28.5-edit/Makefile 2011-09-14 13:13:04.689000000 +0200 @@ -19,8 +19,8 @@ CC = gcc #CC = x86_64-w64-mingw32-gcc-4.5.1 -#CFLAGS = -march=core2 -mtune=core2 -CFLAGS = -g +CFLAGS = -march=core2 -mtune=core2 +#CFLAGS = -g WARN_FLAGS = -Wall #-W -Wconversion OPT_FLAGS = -O3 INC = -I. -Iinclude @@ -53,12 +53,12 @@ CFLAGS += -DHAVE_GMP INC += -I../gmp/include INC += -I../gmp-ecm/include - LIBS += -L../gmp/lib/linux -L../gmp-ecm/lib/linux -lecm -lgmp + LIBS += -L../gmp/lib/linux -L../gmp-ecm/lib/linux -lecm -lgmp -L/usr/local/lib endif ifeq ($(NFS),1) CFLAGS += -DUSE_NFS - LIBS += -L../msieve/lib/linux + LIBS += -L../msieve # NFS builds require GMP CFLAGS += -DHAVE_GMP @@ -70,7 +70,7 @@ CFLAGS += -DHAVE_GMP_ECM INC += -I../gmp-ecm/include # INC += -I/sppdg/scratch/buhrow/ecm-6.2.3/install/include/ - LIBS += -L../gmp-ecm/lib/linux -lecm -lmsieve -lgmp + LIBS += -L../gmp-ecm/lib/linux -lecm -lmsieve -lgmp -lz # LIBS += -L/sppdg/scratch/buhrow/ecm-6.2.3/install/lib/ -lecm -lmsieve -lgmp endif diff -Niru yafu-1.28.5/factor/gmp-ecm/ecm.c yafu-1.28.5-edit/factor/gmp-ecm/ecm.c --- yafu-1.28.5/factor/gmp-ecm/ecm.c 2011-07-24 21:07:54.000000000 +0200 +++ yafu-1.28.5-edit/factor/gmp-ecm/ecm.c 2011-09-14 13:20:25.322000000 +0200 @@ -28,7 +28,7 @@ void *malloc_shared(size_t bytes, int *save_shmid) { - int shmid = shmget(IPC_PRIVATE, bytes, SHM_R | SHM_W); + int shmid = shmget(IPC_PRIVATE, bytes, IPC_CREAT | 0666); if (shmid == -1) { printf("Couldn't allocated shared memory segment in ECM\n"); exit(1); diff -Niru yafu-1.28.5/factor/qs/smallmpqs.c yafu-1.28.5-edit/factor/qs/smallmpqs.c --- yafu-1.28.5/factor/qs/smallmpqs.c 2011-08-11 08:02:34.000000000 +0200 +++ yafu-1.28.5-edit/factor/qs/smallmpqs.c 2011-09-14 02:47:23.465000000 +0200 @@ -130,72 +130,7 @@ void sm_get_params(int bits, uint32 *B, uint32 *M, uint32 *BL); int qcomp_smpqs(const void *x, const void *y); -#if defined(GCC_ASM32X) || defined(GCC_ASM64X) || defined(__MINGW32__) - //these compilers support SIMD - #define SM_SIMD_SIEVE_SCAN 1 - #define SM_SCAN_CLEAN asm volatile("emms"); - - #if defined(HAS_SSE2) - #define SM_SIMD_SIEVE_SCAN_VEC 1 - - //top level sieve scanning with SSE2 - #define SM_SIEVE_SCAN_64 \ - asm volatile ( \ - "movdqa (%1), %%xmm0 \n\t" \ - "por 16(%1), %%xmm0 \n\t" \ - "por 32(%1), %%xmm0 \n\t" \ - "por 48(%1), %%xmm0 \n\t" \ - "pmovmskb %%xmm0, %0 \n\t" \ - : "=r"(result) \ - : "r"(sieveblock + j) \ - : "%xmm0"); - - #define SIEVE_SCAN_64_VEC \ - asm volatile ( \ - "movdqa (%1), %%xmm0 \n\t" \ - "por 16(%1), %%xmm0 \n\t" \ - "por 32(%1), %%xmm0 \n\t" \ - "por 48(%1), %%xmm0 \n\t" \ - "pmovmskb %%xmm0, %%r11 \n\t" /* output results to 64 bit register */ \ - "testq %%r11, %%r11 \n\t" /* AND, and set ZF */ \ - "jz 2f \n\t" /* jump out if zero (no hits). high percentage. */ \ - "movdqa (%1), %%xmm0 \n\t" /* else, we had hits, move sections of sieveblock back in */ \ - "movdqa 16(%1), %%xmm1 \n\t" /* there are 16 bytes in each section */ \ - "movdqa 32(%1), %%xmm2 \n\t" /* extract high bit masks from each byte */ \ - "movdqa 48(%1), %%xmm3 \n\t" /* and combine into one 64 bit register */ \ - "pmovmskb %%xmm1, %%r9d \n\t" /* */ \ - "pmovmskb %%xmm3, %%r11d \n\t" /* */ \ - "salq $16, %%r9 \n\t" /* */ \ - "pmovmskb %%xmm2, %%r10d \n\t" /* */ \ - "salq $48, %%r11 \n\t" /* */ \ - "pmovmskb %%xmm0, %%r8d \n\t" /* */ \ - "salq $32, %%r10 \n\t" /* */ \ - "orq %%r11,%%r9 \n\t" /* */ \ - "orq %%r10,%%r8 \n\t" /* */ \ - "orq %%r9,%%r8 \n\t" /* r8 now holds 64 byte mask results, in order, from sieveblock */ \ - "xorq %%r11,%%r11 \n\t" /* initialize count of set bits */ \ - "xorq %%r10,%%r10 \n\t" /* initialize bit scan offset */ \ - "1: \n\t" /* top of bit scan loop */ \ - "bsfq %%r8,%%rcx \n\t" /* put least significant set bit index into rcx */ \ - "addq %%rcx,%%r10 \n\t" /* add in the offset of this index */ \ - "movb %%r10b, (%2, %%r11, 1) \n\t" /* put the bit index into the output buffer */ \ - "shrq %%cl,%%r8 \n\t" /* shift the bit scan register up to the bit we just processed */ \ - "incq %%r11 \n\t" /* increment the count of set bits */ \ - "shrq $1, %%r8 \n\t" /* clear the bit */ \ - "testq %%r8,%%r8 \n\t" /* check if there are any more set bits */ \ - "jnz 1b \n\t" /* loop if so */ \ - "2: \n\t" /* */ \ - "movl %%r11d, %0 \n\t" /* return the count of set bits */ \ - : "=r"(result) \ - : "r"(sieveblock + j), "r"(buffer) \ - : "xmm0", "xmm1", "xmm2", "xmm3", "r8", "r9", "r10", "r11", "rcx", "cc", "memory"); - - #else - - #undef SM_SIMD_SIEVE_SCAN - #endif - -#elif defined(MSC_ASM32A) +#if defined(MSC_ASM32A) #define SM_SIMD_SIEVE_SCAN 1 #define SM_SCAN_CLEAN ASM_M {emms}; diff -Niru yafu-1.28.5/factor/qs/tdiv_scan.c yafu-1.28.5-edit/factor/qs/tdiv_scan.c --- yafu-1.28.5/factor/qs/tdiv_scan.c 2011-09-07 10:47:30.000000000 +0200 +++ yafu-1.28.5-edit/factor/qs/tdiv_scan.c 2011-09-14 02:46:26.298000000 +0200 @@ -55,81 +55,7 @@ */ -#if defined(GCC_ASM32X) || defined(GCC_ASM64X) || defined(__MINGW32__) - #define SCAN_CLEAN asm volatile("emms"); - - //top level sieve scanning with SSE2 - #define SIEVE_SCAN_32_VEC \ - asm volatile ( \ - "movdqa (%1), %%xmm0 \n\t" \ - "por 16(%1), %%xmm0 \n\t" \ - "pmovmskb %%xmm0, %%r11 \n\t" /* output results to 64 bit register */ \ - "testq %%r11, %%r11 \n\t" /* AND, and set ZF */ \ - "jz 2f \n\t" /* jump out if zero (no hits). high percentage. */ \ - "movdqa (%1), %%xmm0 \n\t" /* else, we had hits, move sections of sieveblock back in */ \ - "movdqa 16(%1), %%xmm1 \n\t" /* there are 16 bytes in each section */ \ - "pmovmskb %%xmm1, %%r9d \n\t" /* */ \ - "salq $16, %%r9 \n\t" /* */ \ - "pmovmskb %%xmm0, %%r8d \n\t" /* */ \ - "orq %%r9,%%r8 \n\t" /* r8 now holds 64 byte mask results, in order, from sieveblock */ \ - "xorq %%r11,%%r11 \n\t" /* initialize count of set bits */ \ - "xorq %%r10,%%r10 \n\t" /* initialize bit scan offset */ \ - "1: \n\t" /* top of bit scan loop */ \ - "bsfq %%r8,%%rcx \n\t" /* put least significant set bit index into rcx */ \ - "addq %%rcx,%%r10 \n\t" /* add in the offset of this index */ \ - "movb %%r10b, (%2, %%r11, 1) \n\t" /* put the bit index into the output buffer */ \ - "shrq %%cl,%%r8 \n\t" /* shift the bit scan register up to the bit we just processed */ \ - "incq %%r11 \n\t" /* increment the count of set bits */ \ - "shrq $1, %%r8 \n\t" /* clear the bit */ \ - "testq %%r8,%%r8 \n\t" /* check if there are any more set bits */ \ - "jnz 1b \n\t" /* loop if so */ \ - "2: \n\t" /* */ \ - "movl %%r11d, %0 \n\t" /* return the count of set bits */ \ - : "=r"(result) \ - : "r"(sieveblock + j), "r"(buffer) \ - : "xmm0", "xmm1", "xmm2", "xmm3", "r8", "r9", "r10", "r11", "rcx", "cc", "memory"); - - #define SIEVE_SCAN_64_VEC \ - asm volatile ( \ - "movdqa (%1), %%xmm0 \n\t" \ - "por 16(%1), %%xmm0 \n\t" \ - "por 32(%1), %%xmm0 \n\t" \ - "por 48(%1), %%xmm0 \n\t" \ - "pmovmskb %%xmm0, %%r11 \n\t" /* output results to 64 bit register */ \ - "testq %%r11, %%r11 \n\t" /* AND, and set ZF */ \ - "jz 2f \n\t" /* jump out if zero (no hits). high percentage. */ \ - "movdqa (%1), %%xmm0 \n\t" /* else, we had hits, move sections of sieveblock back in */ \ - "movdqa 16(%1), %%xmm1 \n\t" /* there are 16 bytes in each section */ \ - "movdqa 32(%1), %%xmm2 \n\t" /* extract high bit masks from each byte */ \ - "movdqa 48(%1), %%xmm3 \n\t" /* and combine into one 64 bit register */ \ - "pmovmskb %%xmm1, %%r9d \n\t" /* */ \ - "pmovmskb %%xmm3, %%r11d \n\t" /* */ \ - "salq $16, %%r9 \n\t" /* */ \ - "pmovmskb %%xmm2, %%r10d \n\t" /* */ \ - "salq $48, %%r11 \n\t" /* */ \ - "pmovmskb %%xmm0, %%r8d \n\t" /* */ \ - "salq $32, %%r10 \n\t" /* */ \ - "orq %%r11,%%r9 \n\t" /* */ \ - "orq %%r10,%%r8 \n\t" /* */ \ - "xorq %%r11,%%r11 \n\t" /* initialize count of set bits */ \ - "orq %%r9,%%r8 \n\t" /* r8 now holds 64 byte mask results, in order, from sieveblock */ \ - "xorq %%r10,%%r10 \n\t" /* initialize bit scan offset */ \ - "1: \n\t" /* top of bit scan loop */ \ - "bsfq %%r8,%%rcx \n\t" /* put least significant set bit index into rcx */ \ - "addq %%rcx,%%r10 \n\t" /* add in the offset of this index */ \ - "movb %%r10b, (%2, %%r11, 1) \n\t" /* put the bit index into the output buffer */ \ - "shrq %%cl,%%r8 \n\t" /* shift the bit scan register up to the bit we just processed */ \ - "incq %%r11 \n\t" /* increment the count of set bits */ \ - "shrq $1, %%r8 \n\t" /* clear the bit */ \ - "testq %%r8,%%r8 \n\t" /* check if there are any more set bits */ \ - "jnz 1b \n\t" /* loop if so */ \ - "2: \n\t" /* */ \ - "movl %%r11d, %0 \n\t" /* return the count of set bits */ \ - : "=r"(result) \ - : "r"(sieveblock + j), "r"(buffer) \ - : "xmm0", "xmm1", "xmm2", "xmm3", "r8", "r9", "r10", "r11", "rcx", "cc", "memory"); - -#elif defined(MSC_ASM32A) +#if defined(MSC_ASM32A) #define SCAN_CLEAN ASM_M {emms}; //top level sieve scanning with SSE2 diff -Niru yafu-1.28.5/include/types.h yafu-1.28.5-edit/include/types.h --- yafu-1.28.5/include/types.h 2011-07-20 08:31:44.000000000 +0200 +++ yafu-1.28.5-edit/include/types.h 2011-09-14 02:57:59.066000000 +0200 @@ -331,6 +331,7 @@ #define strto_fpdigit strtoul #define strto_uint64 strtoull + #define align_free free typedef unsigned char uint8; typedef unsigned short uint16; typedef uint32_t uint32; diff -Niru yafu-1.28.5/top/driver.c yafu-1.28.5-edit/top/driver.c --- yafu-1.28.5/top/driver.c 2011-09-07 12:16:12.000000000 +0200 +++ yafu-1.28.5-edit/top/driver.c 2011-09-14 02:40:21.127000000 +0200 @@ -33,7 +33,7 @@ #if defined(HAVE_GMP_ECM) && defined(_MSC_VER) #include #elif defined(HAVE_GMP_ECM) - #include "config.h" + #include "../../ecm-6.3/config.h" #endif // the number of recognized command line options