Changeset 323
- Timestamp:
- 05/05/08 03:22:33 (7 months ago)
- Files:
-
- trunk/Manifest.txt (modified) (1 diff)
- trunk/lib/picolena/templates/app/models/indexer.rb (modified) (8 diffs)
- trunk/lib/picolena/templates/config/environment.rb (modified) (1 diff)
- trunk/lib/picolena/templates/config/environments/development.rb (modified) (1 diff)
- trunk/lib/picolena/templates/config/environments/production.rb (modified) (1 diff)
- trunk/lib/picolena/templates/config/environments/test.rb (modified) (1 diff)
- trunk/lib/picolena/templates/lib/core_exts.rb (modified) (1 diff)
- trunk/lib/picolena/templates/lib/indexer_logger.rb (added)
- trunk/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/Manifest.txt
r301 r323 51 51 lib/picolena/templates/lang/ui/fr.yml 52 52 lib/picolena/templates/lib/core_exts.rb 53 lib/picolena/templates/lib/indexer_logger.rb 53 54 lib/picolena/templates/lib/plain_text_extractor_DSL.rb 54 55 lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb trunk/lib/picolena/templates/app/models/indexer.rb
r322 r323 3 3 # one IndexWriter exists at any given time, even when used in a multi-threaded 4 4 # way. 5 require 'indexer_logger' 5 6 class Indexer 6 7 # This regexp defines which files should *not* be indexed. … … 23 24 # Forces Finder.searcher and Finder.index to be reloaded, by removing them from the cache. 24 25 Finder.reload! 25 log :debug => "Indexing every directory" 26 start=Time.now 26 logger.start_indexing 27 27 Picolena::IndexedDirectories.each{|dir, alias_dir| 28 28 index_directory_with_multithreads(dir) 29 29 } 30 log :debug =>"Now optimizing index"30 logger.debug "Now optimizing index" 31 31 index.optimize 32 32 index_time_dbm_file['last']=Time.now._dump 33 33 @@do_not_disturb_while_indexing=false 34 log :debug => "Indexing done in #{Time.now-start} s."34 logger.show_report 35 35 end 36 36 … … 39 39 # @@threads_number chunks, and create a new indexing thread for every chunk. 40 40 def index_directory_with_multithreads(dir) 41 log :debug =>"Indexing #{dir}, #{@@threads_number} threads"41 logger.debug "Indexing #{dir}, #{@@threads_number} threads" 42 42 43 43 indexing_list=Dir[File.join(dir,"**/*")].select{|filename| … … 54 54 add_or_update_file(complete_path) 55 55 else 56 log :debug =>"Identical : #{complete_path}"56 logger.debug "Identical : #{complete_path}" 57 57 end 58 58 index_time_dbm_file[complete_path] = Time.now._dump … … 73 73 document.merge! PlainTextExtractor.extract_content_and_language_from(complete_path) 74 74 raise "empty document #{complete_path}" if document[:content].strip.empty? 75 log :debug => ["Added : #{complete_path}",document[:language] && " ("<<document[:language]<<")"].join75 logger.add_document document 76 76 rescue => e 77 log :debug => "\tindexing without content: #{e.message}"77 logger.reject_document document, e 78 78 end 79 79 index << document … … 101 101 index.writer.delete(:complete_path, filename) 102 102 index_time_dbm_file.delete(filename) 103 log :debug =>"Removed : #{filename}"103 logger.debug "Removed : #{filename}" 104 104 } 105 105 index.optimize … … 138 138 139 139 private 140 141 def logger 142 @@logger ||= IndexerLogger.new 143 end 140 144 141 145 # Copied from Ferret book, By David Balmain … … 150 154 def index_filename 151 155 Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first 152 end153 154 def log(hash)155 hash.each{|level,message|156 IndexerLogger.send(level,message)157 }158 156 end 159 157 trunk/lib/picolena/templates/config/environment.rb
r280 r323 8 8 RAILS_GEM_VERSION = '2.0.2' unless defined? RAILS_GEM_VERSION 9 9 10 IndexerLogger=Logger.new($stdout)11 12 10 # Bootstrap the Rails environment, frameworks, and default configuration 13 11 require File.join(File.dirname(__FILE__), 'boot') trunk/lib/picolena/templates/config/environments/development.rb
r148 r323 17 17 # Don't care if the mailer can't send 18 18 config.action_mailer.raise_delivery_errors = false 19 20 21 IndexerLogger.level = Logger::DEBUGtrunk/lib/picolena/templates/config/environments/production.rb
r148 r323 18 18 # Disable delivery errors, bad email addresses will be ignored 19 19 # config.action_mailer.raise_delivery_errors = false 20 21 IndexerLogger.level = Logger::INFOtrunk/lib/picolena/templates/config/environments/test.rb
r148 r323 21 21 # ActionMailer::Base.deliveries array. 22 22 config.action_mailer.delivery_method = :test 23 24 25 IndexerLogger.level = Logger::WARNtrunk/lib/picolena/templates/lib/core_exts.rb
r322 r323 41 41 # and removes added nils. 42 42 (self+[nil]*i).enum_slice(n).to_a.transpose.collect{|e| e.compact} 43 end 44 end 45 46 class Hash 47 def add(category) 48 self[category]||={:size=>0} 49 self[category][:size]+=1 43 50 end 44 51 end trunk/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb
r320 r323 4 4 aka "plain text file" 5 5 with {|source| 6 raise "binary file #{source}" unless File.plain_text?(source)6 raise "binary file" unless File.plain_text?(source) 7 7 encoding=File.encoding(source) 8 8 if encoding.empty? then trunk/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
r320 r323 31 31 it "should not extract content of binary files" do 32 32 bin_file="spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION" 33 lambda{PlainTextExtractor.extract_content_from(bin_file)}.should raise_error(RuntimeError, "binary file "<<bin_file)33 lambda{PlainTextExtractor.extract_content_from(bin_file)}.should raise_error(RuntimeError, "binary file") 34 34 end 35 35 end
