#########################################################
##
##  NOTE: Evaluate this cell first!
##
##  Click on it, then press Shift+Enter (or Ctrl+Enter)
##
##  Instructions for starting the slides should appear below.
##  If they don't, try evaluating this cell again.
##  
#########################################################

from helpers import init, timing_plot
init()


# A prime number (or a prime) is a natural number greater than 1
# that has no positive divisors other than 1 and itself.

def is_prime(number):
  if not number > 1:         # If it's not greater than 1
    return False             # It can't be a prime
  for d in range(2, number): # Let's check every possible divisor between 2 and number-1
    if number % d == 0:      # If the remainder when dividing number by d is zero
      return False           # It's not a prime
  return True                # If we get this far, it must be a prime


# All primes less than 20
[i for i in range(20) if is_prime(i)]

[2, 3, 5, 7, 11, 13, 17, 19]


# Primes less than 1000 (print it otherwise Jupyter only displays 1 number per line)
print([i for i in range(1000) if is_prime(i)])

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997]


from itertools import count

# For now let's just loop over all numbers and call is_prime on each one,
# we'll worry about optimising this later.
def first_try():
  for n in count(): # Loop over all positive integers
    if is_prime(n): # Check each one to see if it's prime
      yield n       # If so, yield it (return it and continue)


from itertools import islice

# First 20 primes:
[p for p in islice(first_try(), 20)]

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]


from itertools import takewhile

# Primes less than 50
[p for p in takewhile(lambda x: x < 50, first_try())]

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47]


timing_plot(first_try)


from itertools import count

def skip_even():
  def is_prime(number):           # Let's a local is_prime that skips even numbers
    for d in range(3, number, 2): # check odd divisors between 3 and n-1
      if number % d == 0:         # If there's no remainder, it divides,
        return False              # so we haven't got a prime
    return True                   # If we get this far, it must be prime
  yield 2                         # Make sure we start by yielding the only even prime, 2
  for n in count(3, 2):           # Then loop over odd integers from 3 upwards
    if is_prime(n):               # Checking each (odd) number to see if it's prime
      yield n


timing_plot(skip_even)


from itertools import count
from math import sqrt

def to_sqrt():
  def is_prime(number):
    limit = int(sqrt(number))        # The highest number we have to check
    for d in range(3, limit + 1, 2): # check odd divisors from 3 to limit
      if number % d == 0:            # If there's no remainder, it divides,
        return False                 # so we haven't got a prime
    return True                      # If we get this far, it must be prime
  yield 2                            # Start with 2
  for n in count(3, 2):              # Then do the odd numbers from 3 upwards
    if is_prime(n):                  # checking each to see if it's prime
      yield n


timing_plot(to_sqrt)


from itertools import count

def check_primes():
  yield 2                       # Initially yield 2, then we only consider odd numbers
  primes = []                   # Keep a list of all the primes seen so far
  for candidate in count(3, 2): # Let's check all odd number starting from 3
    isprime = True              # Start by assuming it is a prime
    for p in primes:            # Then start going through all our known primes
      if p * p > candidate:     # If the next prime is > sqrt(candidate)
        break                   # No need to continue looking at higher primes
      if candidate % p == 0:    # Else, if candidate is divisible by this prime
        isprime = False         # Our candidate was not a prime after all
        break                   # And stop looking at more primes
    if isprime:                 # If our candidate turned out to be a prime number
      yield candidate           # Yield it to the caller
      primes.append(candidate)  # And add it to the end of our list of primes


timing_plot(check_primes)


def simple_sieve(max_prime):
  sieve = [True] * max_prime             # Create the "sieve" (an array of booleans)
  for i in range(2, max_prime):          # Loop over the cells of the sieve from 2
    if sieve[i]:                         # If this cell is True
      yield i                            # It's a prime
      for j in range(2*i, max_prime, i): # So loop over all its multiples
        sieve[j] = False                 # and mark them as non-prime


timing_plot(simple_sieve)


def improved_sieve(max_prime):
    yield 2                                      # Yield the only even prime
    sieve = [True] * (max_prime // 2)            # Create sieve of only odd numbers (half the size)
    for i in range(3, max_prime, 2):             # Loop over only odd numbers from 3
        if sieve[i//2]:                          # If this cell is True
            yield i                              # It's a prime
            for j in range(i*i, max_prime, i*2): # Loop over odd multiples starting from its square
                sieve[j//2] = False              # and mark them as non-prime


timing_plot(improved_sieve)


from itertools import count

def unbounded_sieve():
  state = {}
  for candidate in count(2):
    if candidate in state:
      for factor in state[candidate]:
        if candidate + factor in state:
          state[candidate + factor].append(factor)
        else:
          state[candidate + factor] = [factor]
      del state[candidate]
    else:
      yield candidate
      state[2 * candidate] = [candidate]


timing_plot(unbounded_sieve)


from collections import defaultdict
from itertools import count

def unbounded_sieve2():
  yield 2
  state = defaultdict(list)
  for candidate in count(3, 2):
    if candidate in state:
      for inc in state[candidate]:
        state[candidate + inc].append(inc)
      del state[candidate]
    else:
      yield candidate
      state[candidate * candidate] = [2 * candidate]


timing_plot(unbounded_sieve2)


from pyprimesieve import primes

def library(n):
  return primes(n)

timing_plot(library)

Optimised Primes¶

Why?¶

What is a prime?¶

How would that look in code?¶

Let's generate a few¶

Let's make it a bit more flexible¶

But how fast is it?¶

Can we make it faster?¶

We know 2 is the only even prime, so why not skip even numbers (apart from 2)?¶

How much faster is it?¶

Can we reduce the number of checks further?¶

We could also skip multiples of 3¶

Yes!¶

How much faster is this?¶

Is this the best we can do?¶

What about storing a list of primes so far, and only checking those?¶

What next?¶

The sieve of Eratosthenes¶

Problems?¶

Memory use¶

Need to allocate storage upfront¶

What can we do about it?¶

But!¶