Раздел «Язык Ruby».CachedMethod:

Advanced caching of method results

Let's cache fibonacci number.

def fib(n)
  return 1 if (0..1) === n
  fib(n-1) + fib(n-2)
end
fib(50) # Yes, it's long to wait

We can cache calculated values in hash @fib :

def fib(n)
  return 1 if (0..1) === n
  (@fib||={})[n] ||= fib(n-1) + fib(n-2)
end
puts fib(100) # Wow! It's fast

Combo "(@fib||={})" is initialization on the fly (initialize variable @fib if not yet).

But it's better to write more general code:

def fib(n)
  return 1 if (0..1) === n
  fib(n-1) + fib(n-2)
end
alias :orig_fib :fib

def fib(n)
   (@fib||={})[n] ||= orig_fib(n)
end

puts fib(100) # Wow! It still works

General purpose make_cached method

Or even more general:

def make_cached(name)
  orig_name = "orig_#{name}"
  eval "alias :#{orig_name} :#{name}"
  cache = {}
  send(:define_method, name) do |*n|
    # nil values will be recalculated; maybe it's ok
    cache[n] ||= send(orig_name, *n)
  end
  cache
end

make_cached :fib

puts fib(100) # Yes, it works

Notice, that local variable cache gets long-life due to block that uses it.

But it is too bad of us to use eval method. Ty to get rid of it in favor of method_alias

Let's add memory concurrency

Now, let's consider we have low memory resources and need to restrict cache size.

It's worth to use HashHeap — data structure providing interface of associative array and priority queue simultaneously.

require 'hash_heap'

def make_cached(name, params={})
  old_name = "old_#{name}"
  eval "alias :#{old_name} :#{name}"
  cache = {}
  h = Heap.new(0, HeapItemValueMin)
  limit = params[:limit] || 1000000
  send(:define_method, name) do |n|
    if h.size > limit
      arg,freq = h.extract_max
      cache.delete(arg)
      # puts "n=#{n}, cache.size=#{cache.size}"
    end
    h[n] += 1
    cache[n] ||= send(old_name, n)
  end
  [cache,h]
end

c,h = make_cached :fib, :limit=>5

100.times {
  fib(rand(100))
}

puts h.inspect

But this heuristic is bad one: cache may be populated with (limit-1) values once frequently used, and there will be no way to drop it off out of cache.

Let's add more intelligence

Let's try to write weight function, that takes into account both — query frequency and last query time.

class HashItemMaxWeight < HeapItem
  def <=(a)
    self.v.weight <= a.v.weight
  end
end

module Cached
  def Cached.included(klass)
    klass.class_eval do
      class <<klass
        attr_accessor :max_cache_size, :decay_time

        def max_cache_size
          @max_cache_size || 10
        end

        def cache
          @@cache ||= Heap.new(nil, HashItemMaxWeight)
        end

        def get(i)
          i = i.to_i
          object = (cache[i] ||= find_and_enhance_object(i))
          cache.extract if cache.size > max_cache_size
          object.update_weight
          object
        end

        def find_and_enhance_object(i)
          object = self.find(i)
          if object
            class <<object
              attr_accessor :idx, :access_time, :gap, :weight, :decay_time
              def update_weight
                tdiff = (t=Time.now) - @access_time
                tdiff = 10 * decay_time if tdiff > 10*decay_time
                r = ( 0.4 + 0.6 * (2.0**(-tdiff/decay_time)) )
                @gap = @gap * r  + tdiff * (1 - r)
                @weight = t - @gap
                @access_time = t
                @weight
              end
            end
          else
            nil
          end
          object.access_time = Time.now
          object.gap = 0.0
          object.decay_time = 60.0*10 # 10 minutes
          object.weight = object.access_time + object.decay_time
          object
        end
      end
    end
  end
end

And, finally, we have something we can use

require 'heap'

class HashItemMinWeight < HeapItem
  def <=(a)
    self.v.weight >= a.v.weight
  end
end

def make_cached(cached_method, params = {})
  orig_method = params[:method] || cached_method
  limit = params[:limit] || 1000
  cache = Heap.new(nil, HashItemMinWeight)
  
  if orig_method == cached_method
    orig_method = "orig_#{cached_method}"
    eval "alias :#{orig_method} :#{cached_method}"
  end
  
 
  decay_time = params[:decay_time] || 3.0 # seconds
  
  eval_and_enhance_result = lambda do |o,*args|
    object = o.send(orig_method, *args)
    if object
      class <<object
        attr_accessor :idx, :access_time, :gap, :weight, :decay_time
        def update_weight
          tdiff = (t=Time.now) - @access_time
          tdiff = 10 * decay_time if tdiff > 10*decay_time
          r = ( 0.4 + 0.6 * (2.0**(-tdiff/decay_time)) )
          @gap = @gap * r  + tdiff * (1 - r) 
          @weight = t - 0.1*@gap
          @access_time = t
          @weight
        end
      end
    else
      nil
    end
    object.access_time = Time.now
    object.decay_time = decay_time 
    object.gap = 0.2*decay_time
    object.weight = object.access_time - 1.8*object.decay_time
    #~ puts "put  [#{args}, #{object.inspect}]"
    object
  end
  
  self.send(:define_method, cached_method) do |*args|
      args = args.first if args.size == 1
      object = (cache[args] ||= eval_and_enhance_result[self, *args])
      if cache.size > limit
        v=cache.extract 
        #~ puts "extr #{v.inspect}"
      end
      object.update_weight
      cache.update(arg)
      object
  end

  cache
end

if $0 == __FILE__
  # Let's test this class

  # emulate real and cpu time
  $time = 0.0
  $cpu_time = 0.0
  
  def sleep(n)
    $time += n
  end
  
  def work(n)
    $time += n
    $cpu_time += n
  end
  
  class Time
    def Time.now; $time; end
    def Time.now=(c); $time=c; end
  end
  
  class Track
    attr_accessor :id, :created_at
    def initialize(id)
      @id = id
      @created_at = Time.now
    end
    class << self
      def find(i)
        work 1.0
        Track.new(i)
      end
      @@cache = make_cached :get, :method=>:find, :limit=>20
    end
  end

  queries = []
  
  srand(12345)
  lookers = Array.new(1000) {|id| [id, rand(1000), (1+rand(15)*(1+rand)).to_i, 1+4*rand]} 
  
  lookers.each {|id, start_time, n, gap|
    n.times {|i|
      queries << [start_time + i*gap + gap*rand, id ]
    }
  }
  puts "Size = #{queries.size}"
  puts "start_time=#{$cpu_time}"
  Time.now = queries[0]
  queries.sort.each do |time, id|
    Time.now = time
    Track.get(id)
  end
  puts "end_time=#{$cpu_time}"
end

Cached value is enhanced with access_time and weight methods, so we can manipulate caching logic for some special values. Although this approach does not allow to cache Fixnum, Float, Symbol, true, false and nil values. It can be fixed. See gem MakeCached.

-- ArtemVoroztsov - 25 Oct 2007

Attachment sort Action Size Date Who Comment
cached.tar manage 50.0 K 03 Jul 2008 - 15:00 ArtemVoroztsov