Changeset 154

Show
Ignore:
Timestamp:
04/12/08 13:39:52 (7 months ago)
Author:
eric.dumin..@gmail.com
Message:

Not messy enough => adding multi-threads indexer!

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • branches/rewrote_indexer/lib/picolena/templates/app/models/finder.rb

    r153 r154  
    1 class Finder 
    2   #FIXME: Should not use all those class methods to access index. 
    3    
     1class Finder   
    42  attr_reader :query 
    53   
     
    1614    @per_page=results_per_page 
    1715    @offset=(page.to_i-1)*results_per_page 
    18     index.validate_that_has_documents 
     16    index.should_have_documents 
    1917  end 
    2018   
  • branches/rewrote_indexer/lib/picolena/templates/app/models/index_reader.rb

    r153 r154  
    11class IndexReader < Ferret::Index::Index 
    22  def initialize(params={}) 
     3    # TODO: Remove those debug lines! 
     4    # puts "##################################################################Creating Reader!!!!!" 
    35    # Add needed parameters 
    46    params.merge!(:path => IndexSavePath, :analyzer => Analyzer) 
     
    810   
    911  # Returns the number of times a file is present in the index. 
     12  # index_reader.doc_freq(field, term) → integer 
     13  # Return the number of documents in which the term term appears in the field field.  
    1014  def occurences_number(complete_path) 
    1115    # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path) 
     
    2125      delete(hit.doc) 
    2226    } 
     27    close 
    2328  end 
    2429   
     
    2631  # Validation methods. 
    2732   
    28   def validate_that_has_documents 
     33  def should_have_documents 
    2934     raise IndexError, "no document found" unless has_documents? 
    3035  end 
  • branches/rewrote_indexer/lib/picolena/templates/app/models/index_writer.rb

    r150 r154  
    11class IndexWriter < Ferret::Index::IndexWriter 
    22  def initialize(params={}) 
     3    # TODO: Remove those debug lines! 
     4    # puts "##################################################################Creating Writer!!!!!" 
     5     
    36    # Add needed parameters 
    4     params.merge!(:create_if_missing => true, :path => IndexSavePath, :analyzer => Analyzer) 
     7    params.merge!(:create_if_missing => true, 
     8                  :path              => IndexSavePath, 
     9                  :analyzer          => Analyzer 
     10                  # huge performance impact? 
     11                  # :auto_flush        => true 
     12                  ) 
    513    # Creates the IndexWriter 
    614    super(params) 
  • branches/rewrote_indexer/lib/picolena/templates/app/models/indexer.rb

    r153 r154  
    11class Indexer 
    2   Exclude = /(Thumbs\.db)/ 
     2  Exclude          = /(Thumbs\.db)/ 
     3  MaxThreadsNumber = 5 
    34   
    45  class << self 
     
    1314      }       
    1415    end     
    15          
     16     
    1617    def index_every_directory(update=true) 
    1718      log :debug => "Indexing every directory" 
     
    2122       
    2223      IndexedDirectories.each{|dir, alias_dir| 
    23         index_directory(dir) 
     24        index_directory_with_multithreads(dir) 
    2425      } 
     26      # FIXME: with those 2 lines, 
    2527      writer.optimize 
    2628      writer.close 
     29      # launching Indexer.index_every_directory twice in a row 
     30      # would raise a SEGFAULT: 
     31      # picolena/lib/picolena/templates/app/models/indexer.rb:27: [BUG] Segmentation fault 
     32      # ruby 1.8.6 (2007-06-07) [i486-linux] 
     33      # 
     34      # Aborted (core dumped) 
    2735    end 
    2836     
    29     def index_directory(dir) 
    30       log :debug => "Indexing #{dir}
     37    def index_directory_with_multithreads(dir) 
     38      log :debug => "Indexing #{dir}, #{MaxThreadsNumber} multithreads
    3139       
    32       Dir.glob(File.join(dir,"**/*")){|filename| 
    33         add_or_update_file(File.expand_path(filename)) if File.file?(filename) && filename !~ Exclude 
     40      @indexing_list=Dir[File.join(dir,"**/*")].select{|filename| 
     41        File.file?(filename) && filename !~ Exclude 
    3442      } 
     43       
     44      threads=(1..MaxThreadsNumber).collect{ 
     45        Thread.new { 
     46          launch_indexing_chain(@indexing_list) 
     47        } 
     48      } 
     49      threads.each { |aThread|  aThread.join } 
    3550    end 
    36      
     51 
    3752    def add_or_update_file(complete_path) 
    3853      should_be_added = true 
     
    98113    private 
    99114     
     115    def launch_indexing_chain(indexing_list) 
     116      return if indexing_list.empty? 
     117      add_or_update_file(indexing_list.shift) 
     118      launch_indexing_chain(indexing_list) 
     119    end 
     120     
    100121    def log(hash) 
    101122      hash.each{|level,message| 
  • branches/rewrote_indexer/lib/picolena/templates/config/environment.rb

    r148 r154  
    1 %w(rubygems paginator fileutils pathname logger).each{|lib| require lib} 
     1%w(rubygems paginator fileutils pathname logger thread).each{|lib| require lib} 
    22 
    33# Uncomment below to force Rails into production mode when