Changeset 154
- Timestamp:
- 04/12/08 13:39:52 (7 months ago)
- Files:
-
- branches/rewrote_indexer/lib/picolena/templates/app/models/finder.rb (modified) (2 diffs)
- branches/rewrote_indexer/lib/picolena/templates/app/models/index_reader.rb (modified) (4 diffs)
- branches/rewrote_indexer/lib/picolena/templates/app/models/index_writer.rb (modified) (1 diff)
- branches/rewrote_indexer/lib/picolena/templates/app/models/indexer.rb (modified) (4 diffs)
- branches/rewrote_indexer/lib/picolena/templates/config/environment.rb (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/rewrote_indexer/lib/picolena/templates/app/models/finder.rb
r153 r154 1 class Finder 2 #FIXME: Should not use all those class methods to access index. 3 1 class Finder 4 2 attr_reader :query 5 3 … … 16 14 @per_page=results_per_page 17 15 @offset=(page.to_i-1)*results_per_page 18 index. validate_that_has_documents16 index.should_have_documents 19 17 end 20 18 branches/rewrote_indexer/lib/picolena/templates/app/models/index_reader.rb
r153 r154 1 1 class IndexReader < Ferret::Index::Index 2 2 def initialize(params={}) 3 # TODO: Remove those debug lines! 4 # puts "##################################################################Creating Reader!!!!!" 3 5 # Add needed parameters 4 6 params.merge!(:path => IndexSavePath, :analyzer => Analyzer) … … 8 10 9 11 # Returns the number of times a file is present in the index. 12 # index_reader.doc_freq(field, term) â integer 13 # Return the number of documents in which the term term appears in the field field. 10 14 def occurences_number(complete_path) 11 15 # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path) … … 21 25 delete(hit.doc) 22 26 } 27 close 23 28 end 24 29 … … 26 31 # Validation methods. 27 32 28 def validate_that_has_documents33 def should_have_documents 29 34 raise IndexError, "no document found" unless has_documents? 30 35 end branches/rewrote_indexer/lib/picolena/templates/app/models/index_writer.rb
r150 r154 1 1 class IndexWriter < Ferret::Index::IndexWriter 2 2 def initialize(params={}) 3 # TODO: Remove those debug lines! 4 # puts "##################################################################Creating Writer!!!!!" 5 3 6 # Add needed parameters 4 params.merge!(:create_if_missing => true, :path => IndexSavePath, :analyzer => Analyzer) 7 params.merge!(:create_if_missing => true, 8 :path => IndexSavePath, 9 :analyzer => Analyzer 10 # huge performance impact? 11 # :auto_flush => true 12 ) 5 13 # Creates the IndexWriter 6 14 super(params) branches/rewrote_indexer/lib/picolena/templates/app/models/indexer.rb
r153 r154 1 1 class Indexer 2 Exclude = /(Thumbs\.db)/ 2 Exclude = /(Thumbs\.db)/ 3 MaxThreadsNumber = 5 3 4 4 5 class << self … … 13 14 } 14 15 end 15 16 16 17 def index_every_directory(update=true) 17 18 log :debug => "Indexing every directory" … … 21 22 22 23 IndexedDirectories.each{|dir, alias_dir| 23 index_directory (dir)24 index_directory_with_multithreads(dir) 24 25 } 26 # FIXME: with those 2 lines, 25 27 writer.optimize 26 28 writer.close 29 # launching Indexer.index_every_directory twice in a row 30 # would raise a SEGFAULT: 31 # picolena/lib/picolena/templates/app/models/indexer.rb:27: [BUG] Segmentation fault 32 # ruby 1.8.6 (2007-06-07) [i486-linux] 33 # 34 # Aborted (core dumped) 27 35 end 28 36 29 def index_directory (dir)30 log :debug => "Indexing #{dir} "37 def index_directory_with_multithreads(dir) 38 log :debug => "Indexing #{dir}, #{MaxThreadsNumber} multithreads" 31 39 32 Dir.glob(File.join(dir,"**/*")){|filename|33 add_or_update_file(File.expand_path(filename)) ifFile.file?(filename) && filename !~ Exclude40 @indexing_list=Dir[File.join(dir,"**/*")].select{|filename| 41 File.file?(filename) && filename !~ Exclude 34 42 } 43 44 threads=(1..MaxThreadsNumber).collect{ 45 Thread.new { 46 launch_indexing_chain(@indexing_list) 47 } 48 } 49 threads.each { |aThread| aThread.join } 35 50 end 36 51 37 52 def add_or_update_file(complete_path) 38 53 should_be_added = true … … 98 113 private 99 114 115 def launch_indexing_chain(indexing_list) 116 return if indexing_list.empty? 117 add_or_update_file(indexing_list.shift) 118 launch_indexing_chain(indexing_list) 119 end 120 100 121 def log(hash) 101 122 hash.each{|level,message| branches/rewrote_indexer/lib/picolena/templates/config/environment.rb
r148 r154 1 %w(rubygems paginator fileutils pathname logger ).each{|lib| require lib}1 %w(rubygems paginator fileutils pathname logger thread).each{|lib| require lib} 2 2 3 3 # Uncomment below to force Rails into production mode when
