エラトステネスの篩 高速版

def _sieve(primes, min=3, size=1_0000_0000)
  table = "\xFF" * (size / 8 + 1)
  max = min + size * 2 - 1
  tmax = size
  mid = Math.sqrt(max).floor
  primes.each do |i|
    next if i > mid
    ti = (i - min) / 2
    ti += i while ti < 0
    ti.step(tmax, i) do |tj|
      table[tj >> 3] &= ~(1 << (tj & 7))
    end
  end
  (0..((mid-min)/2)).each do |ti|
    next unless table[ti >> 3][ti & 7].nonzero?
    i = (ti << 1) + min
    primes << i
    ti.step(tmax, i) do |tj|
      table[tj >> 3] &= ~(1 << (tj & 7))
    end
  end
  ((mid > min ? (mid - min) / 2 : 0)...tmax).each do |ti|
    primes << min + ti * 2 if table[ti >> 3][ti & 7].nonzero?
  end
  return primes
end

def sieve(max=100, size=1_0000_0000)
  max -= 1 unless max % 2 == 0
  primes = []
  3.step(max, size * 2) do |i|
    if max > i + size * 2
      primes = _sieve(primes, i, size)
    else
      size = (max - i) / 2 + 1
      primes = _sieve(primes, i, size)
    end
  end
  return [2]+primes
end

max = ARGV.size == 1 ? ARGV[0].to_i : 100
primes = sieve(max)
puts primes.length

さらに高速化してみました。以下がそのテスト結果です。比較としてsuminさんの primesupto.ar.rbを載せています。

% /usr/bin/time -l ruby primesupto.ar.rb 1000000
        2.12 real         2.12 user         0.00 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
       488  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
        28  involuntary context switches
% /usr/bin/time -l ruby primesupto.nr.rb 1000000
        1.88 real         1.86 user         0.01 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
       467  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
        27  involuntary context switches

% /usr/bin/time -l ruby primesupto.ar.rb 10000000
       24.46 real        24.40 user         0.02 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
      2507  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
       371  involuntary context switches
% /usr/bin/time -l ruby primesupto.nr.rb 10000000
       19.69 real        19.67 user         0.01 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
      2399  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
       281  involuntary context switches

% /usr/bin/time -l ruby primesupto.ar.rb 100000000
      427.89 real       426.75 user         0.22 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
     19023  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
      7298  involuntary context switches
% /usr/bin/time -l ruby primesupto.nr.rb 100000000
      206.52 real       206.26 user         0.06 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
     18000  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
      3021  involuntary context switches

メモリ容量にあたる、page reclaims は ar も nr も同クラスになっています。これは ar で ruby の Fixnum で利用可能な 31bit のうち 8bit しか利用していないからでしょう。31bit 利用するか、nr同様 String を使うようにすれば、大きな差がつくようになると思わ
速度的には ar が予想以上に速いのに驚きます。「改良版」の1千万26秒には勝ってるし。1億になると274秒なのでだいぶ差が付きますが。
考察するに、エラトステネスの篩のホットスポットはループの最も内側である、

if (mask_bit = mask_bit_idx[idx % 2310]) != 0 then
  byte_idx = idx / 2310 * 60 + (mask_bit-1 << -3)
  mask_bit = 255 - (1 << (mask_bit & 7))
  flags[byte_idx] = flags[byte_idx] & mask_bit
end
idx += 2 * n
table[tj >> 3] &= ~(1 << (tj & 7))

であり、この部分の速度が全体の差に大きく影響する。また、Rubyの場合、Rubyメソッドやブロックを呼び出すと大きな差がつくが、Cのメソッドを呼ぶ分には大きな差はつかない。とすると、ar と nr は大きな差はつかないだろうと予想できます。もっとも、そう考えると今度は1億で2倍の差がつくのが疑問となります。ここで、ar の

  flags = Array.new((upto + 2309) / 2310 * 60 + 60, 0xFF)
  # flags = "\377" * ((upto + 2309) / 2310 * 60 + 60)

コメントアウトを逆にしてみます。すると、あら不思議・・・。

% /usr/bin/time -l ruby primesupto.ar.rb 100000000
      334.33 real       333.54 user         0.25 sys
         0  maximum resident set size
         0  average shared memory size
         0  average unshared data size
         0  average unshared stack size
     17114  page reclaims
         0  page faults
         0  swaps
         0  block input operations
         0  block output operations
         0  messages sent
         0  messages received
         0  signals received
         0  voluntary context switches
      4913  involuntary context switches

大幅に高速化されました。page reclaims も減っています。Array/int よりも String/byte の方が高速なんですねぇ。