#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#define USE_PHI_CACHE 1
#define FUNC_isqrt 1
#include "lehmer.h"
#include "util.h"
#include "cache.h"
#include "sieve.h"
#include "prime_counts.h"
#include "prime_count_cache.h"
#include "legendre_phi.h"
/*****************************************************************************
*
* Counting primes with Legendre, Meissel, and Lehmer methods.
*
* Since we have a reasonable extended LMO, this is just for demonstration.
*
* The first versions of this were in 2012 and 2013, and included a novel
* phi calculation using iterative list merging, which greatly sped up
* the calculations compared to recursive phi calculations, even when caching
* was added.
*
* Kim Walisch started his primecount project in mid-2013, which quickly
* surpassed this in speed. Currently (2021) his project is substantially
* faster, as well as having support for the Deleglise-Rivat and
* Gourdon algorithms, efficient parallelization, and big number support.
*
* Reference: Hans Riesel, "Prime Numbers and Computer Methods for
* Factorization", 2nd edition, 1994.
*/
/* Below this size, just get primecount using standard methods */
#define SIEVE_LIMIT 60000000
/* Bigger prime count cache in Lehmer loop */
#define SIEVE_MULT 1
static int const verbose = 0;
#define STAGE_TIMING 0
#if STAGE_TIMING
#include <sys/time.h>
#define DECLARE_TIMING_VARIABLES struct timeval t0, t1;
#define TIMING_START gettimeofday(&t0, 0);
#define TIMING_END_PRINT(text) \
{ unsigned long long t; \
gettimeofday(&t1, 0); \
t = (t1.tv_sec-t0.tv_sec) * 1000000 + (t1.tv_usec - t0.tv_usec); \
printf("%s: %10.5f\n", text, ((double)t) / 1000000); }
#else
#define DECLARE_TIMING_VARIABLES
#define TIMING_START
#define TIMING_END_PRINT(text)
#endif
static UV P2_with_primes(UV n, UV a, UV b, const uint32_t *primes, uint32_t lastidx)
{
UV P2, lastw, lastwpc, i;
UV lastpc = 6 * primes[lastidx];
void* pcache = prime_count_cache_create(lastpc);
/* Ensure we have a large enough base sieve */
prime_precalc(isqrt(n / primes[a+1]));
P2 = lastw = lastwpc = 0;
for (i = b; i > a; i--) {
UV w = n / primes[i];
lastwpc = (w <= lastpc) ? prime_count_cache_lookup(pcache, w)
: lastwpc + segment_prime_count(lastw+1, w);
lastw = w;
P2 += lastwpc;
}
P2 -= ((b+a-2) * (b-a+1) / 2) - a + 1;
prime_count_cache_destroy(pcache);
return P2;
}
/* b = prime_count(isqrt(n)) */
static UV P2(UV n, UV a, UV b)
{
uint32_t lastidx, *primes;
UV maxn, P2;
maxn = nth_prime_upper( b );
if (maxn > 4294967291U) maxn = 4294967291U;
lastidx = range_prime_sieve_32(&primes, maxn, 1);
MPUassert(lastidx >= b, "failed to generate enough primes\n");
P2 = P2_with_primes(n, a, b, primes, lastidx);
Safefree(primes);
return P2;
}
/* Legendre's method. Interesting and a good test for phi(x,a), but Lehmer's
* method is much faster (Legendre: a = pi(n^.5), Lehmer: a = pi(n^.25)) */
UV legendre_prime_count(UV n)
{
UV a, phina;
if (n < SIEVE_LIMIT)
return segment_prime_count(2, n);
a = legendre_prime_count(isqrt(n));
phina = legendre_phi(n, a);
return phina + a - 1;
}
/* Meissel's method. */
UV meissel_prime_count(UV n)
{
UV a, b, sum;
if (n < SIEVE_LIMIT)
return segment_prime_count(2, n);
a = meissel_prime_count(icbrt(n)); /* a = Pi(floor(n^1/3)) [max 192725] */
b = meissel_prime_count(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
sum = legendre_phi(n, a) + a - 1 - P2(n, a, b);
return sum;
}
/* Lehmer's method. This is basically Riesel's Lehmer function (page 22),
* with some additional code to help optimize it. */
UV lehmer_prime_count(UV n)
{
UV z, a, b, c, sum, i, j, lastidx, lastpc, lastw, lastwpc;
uint32_t* primes = 0; /* small prime cache, first b=pi(z)=pi(sqrt(n)) */
void* pcache; /* Prime count cache */
DECLARE_TIMING_VARIABLES;
if (n < SIEVE_LIMIT)
return segment_prime_count(2, n);
/* Protect against overflow. 2^32-1 and 2^64-1 are both divisible by 3. */
if (n == UV_MAX) {
if ( (n%3) == 0 || (n%5) == 0 || (n%7) == 0 || (n%31) == 0 )
n--;
else
return segment_prime_count(2,n);
}
if (verbose > 0) printf("lehmer %lu stage 1: calculate a,b,c \n", n);
TIMING_START;
z = isqrt(n);
a = lehmer_prime_count(isqrt(z)); /* a = Pi(floor(n^1/4)) [max 6542] */
b = lehmer_prime_count(z); /* b = Pi(floor(n^1/2)) [max 203280221] */
c = lehmer_prime_count(icbrt(n)); /* c = Pi(floor(n^1/3)) [max 192725] */
TIMING_END_PRINT("stage 1")
if (verbose > 0) printf("lehmer %lu stage 2: phi(x,a) (z=%lu a=%lu b=%lu c=%lu)\n", n, z, a, b, c);
TIMING_START;
sum = legendre_phi(n, a) + ((b+a-2) * (b-a+1) / 2);
TIMING_END_PRINT("phi(x,a)")
/* The first b primes are used in stage 4. Hence, primes to isqrt(n). */
TIMING_START;
lastidx = range_prime_sieve_32(&primes, isqrt(n), 1);
MPUassert(lastidx >= b, "failed to generate enough primes\n");
TIMING_END_PRINT("small primes")
if (verbose > 0) printf("lehmer %lu stage 3: %lu small primes\n", n, lastidx);
TIMING_START;
lastpc = SIEVE_MULT * primes[lastidx];
if (SIEVE_MULT == 1)
pcache = prime_count_cache_create_with_primes(primes, lastidx);
else
pcache = prime_count_cache_create(lastpc);
TIMING_END_PRINT("prime count cache")
TIMING_START;
/* Speed up all the prime counts by doing a big base sieve */
prime_precalc( (UV) pow(n, 3.0/5.0) );
/* Ensure we have the base sieve for big prime_count ( n/primes[i] ). */
/* This is about 75k for n=10^13, 421k for n=10^15, 2.4M for n=10^17 */
prime_precalc(isqrt(n / primes[a+1]));
TIMING_END_PRINT("sieve precalc")
if (verbose > 0) printf("lehmer %lu stage 4: loop %lu to %lu, pc to %lu\n", n, a+1, b, n/primes[a+1]);
TIMING_START;
/* Reverse the i loop so w increases. Count w in segments. */
lastw = 0;
lastwpc = 0;
for (i = b; i >= a+1; i--) {
UV w = n / primes[i];
lastwpc = (w <= lastpc) ? prime_count_cache_lookup(pcache, w)
: lastwpc + segment_prime_count(lastw+1, w);
lastw = w;
sum = sum - lastwpc;
if (i <= c) {
UV bi = prime_count_cache_lookup(pcache, isqrt(w));
for (j = i; j <= bi; j++) {
sum = sum - prime_count_cache_lookup(pcache, w / primes[j]) + j - 1;
}
/* We could wrap the +j-1 in: sum += ((bi+1-i)*(bi+i))/2 - (bi-i+1); */
}
}
TIMING_END_PRINT("stage 4")
prime_count_cache_destroy(pcache);
Safefree(primes);
return sum;
}
/* The Lagarias-Miller-Odlyzko method.
* Naive implementation without optimizations.
* About the same speed as Lehmer, a bit less memory.
* A better implementation can be 10-50x faster and much less memory.
*/
UV LMOS_prime_count(UV n)
{
UV n13, a, b, sum, i, j, k, c, lastidx, P2, S1, S2;
uint32_t primec;
uint32_t* primes = 0; /* small prime cache */
signed char* mu = 0; /* moebius to n^1/3 */
uint32_t* lpf = 0; /* least prime factor to n^1/3 */
void *pcache; /* Cache for recursive phi */
DECLARE_TIMING_VARIABLES;
if (n < SIEVE_LIMIT)
return segment_prime_count(2, n);
n13 = icbrt(n); /* n13 = floor(n^1/3) [max 2642245] */
a = lehmer_prime_count(n13); /* a = Pi(floor(n^1/3)) [max 192725] */
b = lehmer_prime_count(isqrt(n)); /* b = Pi(floor(n^1/2)) [max 203280221] */
TIMING_START;
lastidx = range_prime_sieve_32(&primes, isqrt(n), 1);
MPUassert(lastidx >= b, "failed to generate enough primes\n");
TIMING_END_PRINT("small primes")
TIMING_START;
New(0, mu, n13+1, signed char);
memset(mu, 1, sizeof(signed char) * (n13+1));
Newz(0, lpf, n13+1, uint32_t);
mu[0] = 0;
for (i = 1; i <= n13; i++) {
UV primei = primes[i];
for (j = primei; j <= n13; j += primei) {
mu[j] = -mu[j];
if (lpf[j] == 0) lpf[j] = primei;
}
k = primei * primei;
for (j = k; j <= n13; j += k)
mu[j] = 0;
}
lpf[1] = UVCONST(4294967295); /* Set lpf[1] to max */
/* Remove mu[i] == 0 using lpf */
for (i = 1; i <= n13; i++)
if (mu[i] == 0)
lpf[i] = 0;
TIMING_END_PRINT("mu and lpf")
/* Thanks to Kim Walisch for help with the S1+S2 calculations. */
c = (a < tiny_phi_max_a()) ? a : tiny_phi_max_a();
primec = primes[c];
S1 = 0;
S2 = 0;
pcache = prepare_cached_legendre_phi(n, a);
TIMING_START;
for (i = 1; i <= n13; i++)
if (lpf[i] > primec)
S1 += mu[i] * tiny_phi(n/i, c);
TIMING_END_PRINT("S1")
/* TODO: This is about 1.5x slower than the old way. Fix. */
TIMING_START;
for (i = c; i+1 < a; i++) {
uint32_t p = primes[i+1];
for (j = (n13/p)+1; j <= n13; j++)
if (lpf[j] > p)
S2 += -mu[j] * cached_legendre_phi(pcache, n / (j*p), i);
}
TIMING_END_PRINT("S2")
destroy_cached_legendre_phi(pcache);
Safefree(lpf);
Safefree(mu);
TIMING_START;
prime_precalc( (UV) pow(n, 2.9/5.0) );
P2 = P2_with_primes(n, a, b, primes, lastidx);
TIMING_END_PRINT("P2")
Safefree(primes);
/* printf("S1 = %lu\nS2 = %lu\na = %lu\nP2 = %lu\n", S1, S2, a, P2); */
sum = (S1 + S2) + a - 1 - P2;
return sum;
}