#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <math.h>
#include "sequences.h"
static
UV isqrt(UV n) {
UV root;
#if BITS_PER_WORD == 32
if
(n >= W_CONST(4294836225))
return
W_CONST(65535);
#else
if
(n >= W_CONST(18446744065119617025))
return
W_CONST(4294967295);
#endif
root = (UV)
sqrt
((
double
)n);
while
(root*root > n) root--;
while
((root+1)*(root+1) <= n) root++;
return
root;
}
unsigned
char
* sieve_erat30(WTYPE end);
static
unsigned
char
nextwheel30[30] = {
1, 7, 7, 7, 7, 7, 7, 11, 11, 11, 11, 13, 13, 17, 17,
17, 17, 19, 19, 23, 23, 23, 23, 29, 29, 29, 29, 29, 29, 1 };
static
unsigned
char
prevwheel30[30] = {
29, 29, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 11, 11, 13,
13, 13, 13, 17, 17, 19, 19, 19, 19, 23, 23, 23, 23, 23, 23 };
static
unsigned
char
masktab30[30] = {
0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 4, 0, 8, 0,
0, 0, 16, 0, 32, 0, 0, 0, 64, 0, 0, 0, 0, 0,128 };
static
unsigned
char
distancewheel30[30] = {
1, 0, 5, 4, 3, 2, 1, 0, 3, 2, 1, 0, 1, 0, 3,
2, 1, 0, 1, 0, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0 };
#if 0
static
int
is_prime_in_sieve(
const
unsigned
char
* sieve, WTYPE p) {
WTYPE d = p/30;
WTYPE m = p - d*30;
return
( (masktab30[m] != 0) && ((sieve[d] & masktab30[m]) == 0) );
}
#endif
static
WTYPE next_prime_in_sieve(
const
unsigned
char
* sieve, WTYPE p) {
WTYPE d, m;
if
(p < 7)
return
(p < 2) ? 2 : (p < 3) ? 3 : (p < 5) ? 5 : 7;
d = p/30;
m = p - d*30;
do
{
if
(m==29) { d++; m = 1;
while
(sieve[d] == 0xFF) d++; }
else
{ m = nextwheel30[m]; }
}
while
(sieve[d] & masktab30[m]);
return
(d*30+m);
}
static
WTYPE prev_prime_in_sieve(
const
unsigned
char
* sieve, WTYPE p) {
WTYPE d, m;
if
(p <= 7)
return
(p <= 2) ? 0 : (p <= 3) ? 2 : (p <= 5) ? 3 : 5;
d = p/30;
m = p - d*30;
do
{
m = prevwheel30[m];
if
(m==29) {
if
(d == 0)
return
0; d--; }
}
while
(sieve[d] & masktab30[m]);
return
(d*30+m);
}
#define START_DO_FOR_EACH_SIEVE_PRIME(sieve, a, b) \
{ \
WTYPE p = a; \
WTYPE l_ = b; \
WTYPE d_ = p/30; \
WTYPE m_ = p-d_*30; \
m_ += distancewheel30[m_]; \
p = d_*30 + m_; \
while
( p <= l_ ) { \
if
((sieve[d_] & masktab30[m_]) == 0)
#define END_DO_FOR_EACH_SIEVE_PRIME \
m_ = nextwheel30[m_];
if
(m_ == 1) { d_++; } \
p = d_*30+m_; \
} \
}
#if 0
static
__inline__ uint64_t rdtsc(
void
)
{
unsigned a, d;
asm
volatile
(
"rdtsc"
:
"=a"
(a),
"=d"
(d));
return
((uint64_t)a) | (((uint64_t)d) << 32);
}
#endif
#if BITS_PER_WORD == 64
#if defined(__POPCNT__) && defined(__GNUC__) && (__GNUC__> 4 || (__GNUC__== 4 && __GNUC_MINOR__> 1))
#define popcnt(b) __builtin_popcountll(b)
#else
static
UV popcnt(UV b) {
b -= (b >> 1) & 0x5555555555555555;
b = (b & 0x3333333333333333) + ((b >> 2) & 0x3333333333333333);
b = (b + (b >> 4)) & 0x0f0f0f0f0f0f0f0f;
return
(b * 0x0101010101010101) >> 56;
}
#endif
#endif
#if defined(__GNUC__)
#define word_unaligned(m,wordsize) ((uintptr_t)m & (wordsize-1))
#else /* uintptr_t is part of C99 */
#define word_unaligned(m,wordsize) ((unsigned int)m & (wordsize-1))
#endif
static
const
unsigned
char
byte_zeros[256] =
{8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,
5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0};
static
WTYPE count_zero_bits(
const
unsigned
char
* m, WTYPE nbytes)
{
WTYPE count = 0;
#if BITS_PER_WORD == 64
if
(nbytes >= 16) {
while
( word_unaligned(m,
sizeof
(UV)) && nbytes--)
count += byte_zeros[*m++];
if
(nbytes >= 8) {
UV* wordptr = (UV*)m;
UV nwords = nbytes / 8;
UV nzeros = nwords * 64;
m += nwords * 8;
nbytes %= 8;
while
(nwords--)
nzeros -= popcnt(*wordptr++);
count += nzeros;
}
}
#endif
while
(nbytes--)
count += byte_zeros[*m++];
return
count;
}
static
unsigned
char
* prime_cache_sieve = 0;
static
WTYPE prime_cache_size = 0;
static
WTYPE get_prime_cache(WTYPE n,
const
unsigned
char
** sieve)
{
if
(prime_cache_size < n) {
if
(prime_cache_sieve != 0)
Safefree(prime_cache_sieve);
prime_cache_size = 0;
if
(n < (W_FFFF-3840))
n += 3840;
prime_cache_sieve = sieve_erat30(n);
if
(prime_cache_sieve != 0)
prime_cache_size = n;
}
if
(sieve != 0)
*sieve = prime_cache_sieve;
return
prime_cache_size;
}
static
const
unsigned
char
prime_is_small[] =
{0xac,0x28,0x8a,0xa0,0x20,0x8a,0x20,0x28,0x88,0x82,0x08,0x02,0xa2,0x28,0x02,
0x80,0x08,0x0a,0xa0,0x20,0x88,0x20,0x28,0x80,0xa2,0x00,0x08,0x80,0x28,0x82,
0x02,0x08,0x82,0xa0,0x20,0x0a,0x20,0x00,0x88,0x22,0x00,0x08,0x02,0x28,0x82,
0x80,0x20,0x88,0x20,0x20,0x02,0x02,0x28,0x80,0x82,0x08,0x02,0xa2,0x08,0x80,
0x80,0x08,0x88,0x20,0x00,0x0a,0x00,0x20,0x08,0x20,0x08,0x0a,0x02,0x08,0x82,
0x82,0x20,0x0a,0x80,0x00,0x8a,0x20,0x28,0x00,0x22,0x08,0x08,0x20,0x20,0x80,
0x80,0x20,0x88,0x80,0x20,0x02,0x22,0x00,0x08,0x20,0x00,0x0a,0xa0,0x28,0x80,
0x00,0x20,0x8a,0x00,0x20,0x8a,0x00,0x00,0x88,0x80,0x00,0x02,0x22,0x08,0x02};
#define NPRIME_IS_SMALL (sizeof(prime_is_small)/sizeof(prime_is_small[0]))
int
is_prime(WTYPE n)
{
WTYPE d;
unsigned
char
mtab;
if
( n < (NPRIME_IS_SMALL*8))
return
((prime_is_small[n/8] >> (n%8)) & 1);
d = n/30;
mtab = masktab30[ n - d*30 ];
if
(mtab == 0)
return
0;
if
(n <= prime_cache_size)
return
((prime_cache_sieve[d] & mtab) == 0);
if
(!(n%7) || !(n%11) || !(n%13) || !(n%17) || !(n%23) || !(n%29) || !(n%31))
return
0;
{
UV limit = isqrt(n);
UV i = 37;
while
(1) {
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 4;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 2;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 4;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 2;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 4;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 6;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 2;
if
(i > limit)
break
;
if
((n % i) == 0)
return
0; i += 6;
}
}
return
1;
}
static
const
unsigned
char
prime_count_small[] =
{0,0,1,2,2,3,3,4,4,4,4,5,5,6,6,6,6,7,7,8,8,8,8,9,9,9,9,9,9,10,10,
11,11,11,11,11,11,12,12,12,12,13,13,14,14,14,14,15,15,15,15,15,15,
16,16,16,16,16,16,17,17,18,18,18,18,18,18,19};
#define NPRIME_COUNT_SMALL (sizeof(prime_count_small)/sizeof(prime_count_small[0]))
static
const
unsigned
char
primes_small[] =
{0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71};
#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0]))
static
UV nth_prime_upper(WTYPE n)
{
double
fn, flogn, flog2n, upper;
if
(n < NPRIMES_SMALL)
return
primes_small[n];
fn = (
double
) n;
flogn =
log
(n);
flog2n =
log
(flogn);
if
(n >= 688383)
upper = fn * (flogn + flog2n - 1.0 + ((flog2n-2.00)/flogn));
else
if
(n >= 178974)
upper = fn * (flogn + flog2n - 1.0 + ((flog2n-1.95)/flogn));
else
if
(n >= 39017)
upper = fn * (flogn + flog2n - 0.9484);
else
if
(n >= 6)
upper = fn * ( flogn + 0.6000 * flog2n );
else
upper = fn * ( flogn + flog2n );
if
(upper >= (
double
)UV_MAX) {
#if BITS_PER_WORD == 32
if
(n <= W_CONST(203280221))
return
W_CONST(4294967291);
#else
if
(n <= W_CONST(425656284035217743))
return
W_CONST(18446744073709551557);
#endif
croak(
"nth_prime_upper(%"
UVuf
") overflow"
, n);
}
return
(WTYPE)
ceil
(upper);
}
UV nth_prime(WTYPE n)
{
const
unsigned
char
* sieve;
UV upper_limit, start, count, s, bytes_left;
if
(n < NPRIMES_SMALL)
return
primes_small[n];
upper_limit = nth_prime_upper(n);
if
(upper_limit == 0) {
croak(
"nth_prime(%lu) would overflow"
, (unsigned
long
)n);
return
0;
}
if
(get_prime_cache(upper_limit, &sieve) < upper_limit) {
croak(
"Couldn't generate sieve for nth(%lu) [sieve to %lu]"
, (unsigned
long
)n, (unsigned
long
)upper_limit);
return
0;
}
count = 3;
start = 7;
s = 0;
bytes_left = (n-count) / ((n<24000)?8:(n<3000000)?4:3);
while
( bytes_left > 0 ) {
count += count_zero_bits(sieve+s, bytes_left);
assert
(count <= n);
s += bytes_left;
bytes_left = (n-count) / 8;
}
if
(s > 0)
start = s * 30;
START_DO_FOR_EACH_SIEVE_PRIME(sieve, start, upper_limit)
if
(++count == n)
return
p;
END_DO_FOR_EACH_SIEVE_PRIME;
croak(
"nth_prime failed for %lu, not found in range %lu - %lu"
, (unsigned
long
)n, (unsigned
long
) start, (unsigned
long
)upper_limit);
return
0;
}
void
prime_init(WTYPE n)
{
if
( (n == 0) && (prime_cache_sieve == 0) ) {
size_t
initial_primes_to = 30 * (1024-8);
prime_cache_sieve = sieve_erat30(initial_primes_to);
if
(prime_cache_sieve != 0)
prime_cache_size = initial_primes_to;
return
;
}
get_prime_cache(n, 0);
}
UV prime_count(WTYPE n)
{
const
unsigned
char
* sieve;
static
WTYPE last_bytes = 0;
static
UV last_count = 3;
WTYPE s, bytes;
UV count = 3;
if
(n < NPRIME_COUNT_SMALL)
return
prime_count_small[n];
if
(get_prime_cache(n, &sieve) < n) {
croak(
"Couldn't generate sieve for prime_count"
);
return
0;
}
#if 0
START_DO_FOR_EACH_SIEVE_PRIME(sieve, 7, n)
count++;
END_DO_FOR_EACH_SIEVE_PRIME;
#else
bytes = n / 30;
s = 0;
if
(bytes >= last_bytes) {
s = last_bytes;
count = last_count;
}
count += count_zero_bits(sieve+s, bytes-s);
last_bytes = bytes;
last_count = count;
START_DO_FOR_EACH_SIEVE_PRIME(sieve, 30*bytes, n)
count++;
END_DO_FOR_EACH_SIEVE_PRIME;
#endif
return
count;
}
#define PRESIEVE_SIZE (7*11)
static
const
unsigned
char
presieve11[PRESIEVE_SIZE] =
{ 0x06,0x20,0x10,0x81,0x49,0x04,0xc2,0x02,0x28,0x10,0xa1,0x0c,0x04,0x50,0x02,0x61,0x10,0x83,0x08,0x0c,0x40,0x22,0x24,0x10,0x91,0x08,0x45,0x40,0x82,0x20,0x18,0x81,0x28,0x04,0x40,0x12,0x20,0x51,0x81,0x8a,0x04,0x48,0x02,0x20,0x14,0x81,0x18,0x04,0x41,0x02,0xa2,0x10,0x89,0x08,0x24,0x44,0x02,0x30,0x10,0xc1,0x08,0x86,0x40,0x0a,0x20,0x30,0x85,0x08,0x14,0x40,0x43,0x20,0x92,0x81,0x08,0x04,0x60 };
static
void
memtile(unsigned
char
* src, UV from, UV to) {
while
(from < to) {
UV bytes = (2*from > to) ? to-from : from;
memcpy
(src+from, src, bytes);
from += bytes;
}
}
static
UV sieve_prefill(unsigned
char
* mem, UV startd, UV endd)
{
UV nbytes = endd - startd + 1;
if
(nbytes > 0) {
memcpy
(mem, presieve11, (nbytes < PRESIEVE_SIZE) ? nbytes : PRESIEVE_SIZE);
memtile(mem, PRESIEVE_SIZE, nbytes);
if
(startd == 0) mem[0] = 0x01;
}
return
13;
}
unsigned
char
* sieve_erat30(WTYPE end)
{
unsigned
char
* mem;
WTYPE max_buf, limit, prime;
max_buf = (end/30) + ((end%30) != 0);
max_buf = ((max_buf +
sizeof
(UV) - 1) /
sizeof
(UV)) *
sizeof
(UV);
New(0, mem, max_buf, unsigned
char
);
prime = sieve_prefill(mem, 0, max_buf-1);
limit = isqrt(end);
for
( ; prime <= limit; prime = next_prime_in_sieve(mem,prime)) {
WTYPE d = (prime*prime)/30;
WTYPE m = (prime*prime) - d*30;
WTYPE dinc = (2*prime)/30;
WTYPE minc = (2*prime) - dinc*30;
WTYPE wdinc[8];
unsigned
char
wmask[8];
int
i;
for
(i = 1; i <= 8; i++) {
WTYPE dlast = d;
do
{
d += dinc;
m += minc;
if
(m >= 30) { d++; m -= 30; }
}
while
( masktab30[m] == 0 );
wdinc[i-1] = d - dlast;
wmask[i%8] = masktab30[m];
}
d -= prime;
#if 0
assert
(d == ((prime*prime)/30));
assert
(d < max_buf);
assert
(prime = (wdinc[0]+wdinc[1]+wdinc[2]+wdinc[3]+wdinc[4]+wdinc[5]+wdinc[6]+wdinc[7]));
#endif
i = 0;
do
{
mem[d] |= wmask[i];
d += wdinc[i];
i = (i+1) & 7;
}
while
(d < max_buf);
}
return
mem;
}
static
int
gamma_length(WTYPE n)
{
#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
#if BITS_PER_WORD == 64
WTYPE log2 = 63 - __builtin_clzll(n+1);
#else
WTYPE log2 = 31 - __builtin_clzl(n+1);
#endif
#else
WTYPE log2 = 0;
while
(n >= ((2 << log2)-1)) log2++;
#endif
return
((2*log2)+1);
}
int
find_best_pair(WTYPE* basis,
int
basislen, WTYPE val,
int
adder,
int
* a,
int
* b)
{
int
maxbasis;
int
bestlen = INT_MAX;
int
i, j;
assert
( (basis != 0) && (a != 0) && (b != 0) && (basislen >= 1) );
if
((basislen > 15) && (val > basis[15])) {
i = 0;
j = basislen-1;
while
(i < j) {
int
mid = (i+j)/2;
if
(basis[mid] < val) i = mid+1;
else
j = mid;
}
maxbasis = i-1;
}
else
{
maxbasis = 0;
while
( ((maxbasis+1) < basislen) && (basis[maxbasis+1] < val) )
maxbasis++;
}
assert
(maxbasis < basislen);
assert
(basis[maxbasis] <= val);
assert
( ((maxbasis+1) == basislen) || (basis[maxbasis+1] >= val) );
i = 0;
j = maxbasis;
while
(i <= j) {
WTYPE sum = basis[i] + basis[j];
if
(sum > val) {
j--;
}
else
{
if
(sum == val) {
int
p1 = i + adder;
int
p2 = j - i + adder;
int
glen = gamma_length(p1) + gamma_length(p2);
if
(glen < bestlen) {
*a = p1;
*b = p2;
bestlen = glen;
}
}
i++;
}
}
return
(bestlen < INT_MAX);
}
int
find_best_prime_pair(WTYPE val,
int
adder,
int
* a,
int
* b)
{
int
bestlen = INT_MAX;
int
i, j;
WTYPE pi, pj;
const
unsigned
char
* sieve;
assert
( (a != 0) && (b != 0) );
if
(get_prime_cache(val, &sieve) < val) {
croak(
"Couldn't generate sieve for find_best_prime_pair"
);
return
0;
}
pi = 1;
pj = prev_prime_in_sieve(sieve,val+1);
i = 0;
j = (val <= 2) ? 1 : prime_count(pj)-1;
while
(i <= j) {
WTYPE sum = pi + pj;
if
(sum > val) {
j--;
pj = (j == 0) ? 1 : prev_prime_in_sieve(sieve,pj);
}
else
{
if
(sum == val) {
int
p1 = i + adder;
int
p2 = j - i + adder;
int
glen = gamma_length(p1) + gamma_length(p2);
if
(glen <= bestlen) {
*a = p1;
*b = p2;
bestlen = glen;
}
}
i++;
pi = (i == 1) ? 3 : next_prime_in_sieve(sieve,pi);
}
}
return
(bestlen < INT_MAX);
}