Changeset 150
- Timestamp:
- 04/12/08 05:24:38 (7 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/rewrote_indexer/lib/picolena/templates/app/models/index_writer.rb
r149 r150 3 3 # Add needed parameters 4 4 params.merge!(:create_if_missing => true, :path => IndexSavePath, :analyzer => Analyzer) 5 # Ensure this directory exists6 FileUtils.mkpath File.dirname(IndexSavePath)7 5 # Creates the IndexWriter 8 6 super(params) … … 19 17 # No need to re-create any field. 20 18 return unless field_infos.fields.empty? 21 field_infos.add_field(:complete_path, :store => :yes, :index => :yes)22 field_infos.add_field(:content, :store => :yes, :index => :yes)23 field_infos.add_field(:basename, :store => :no,:index => :yes, :boost => 1.5)24 field_infos.add_field(:file, :store => :no,:index => :yes, :boost => 1.5)25 field_infos.add_field(:filetype, :store => :no,:index => :yes, :boost => 1.5)26 field_infos.add_field(:date, :store=>:yes, :index=>:yes)27 field_infos.add_field(:probably_unique_id, :store =>:no, :index=>:yes)19 field_infos.add_field(:complete_path, :store => :yes, :index => :yes) 20 field_infos.add_field(:content, :store => :yes, :index => :yes) 21 field_infos.add_field(:basename, :store => :no, :index => :yes, :boost => 1.5) 22 field_infos.add_field(:file, :store => :no, :index => :yes, :boost => 1.5) 23 field_infos.add_field(:filetype, :store => :no, :index => :yes, :boost => 1.5) 24 field_infos.add_field(:date, :store => :yes, :index => :yes) 25 field_infos.add_field(:probably_unique_id, :store => :no, :index => :yes) 28 26 end 29 27 end branches/rewrote_indexer/lib/picolena/templates/app/models/indexer.rb
r149 r150 2 2 Exclude = /(Thumbs\.db)/ 3 3 4 def self.index_every_directory 5 log :debug => "Indexing every directory" 6 begin 4 class << self 5 def fields_for(filename) 6 { 7 :complete_path=> complete_path=File.expand_path(filename), 8 :probably_unique_id => complete_path.base26_hash, 9 :file => File.basename(filename), 10 :basename => File.basename(filename, File.extname(filename)).gsub(/_/,' '), 11 :filetype => File.extname(filename), 12 :date => File.mtime(filename).strftime("%Y%m%d%H%M") 13 } 14 end 15 16 def index_every_directory 17 log :debug => "Indexing every directory" 18 7 19 IndexedDirectories.each{|dir, alias_dir| 8 20 index_directory(dir) 9 21 } 10 22 writer.optimize 11 ensure12 23 writer.close 13 24 end 14 end 15 16 def self.index_directory(dir) 17 log :debug => "Indexing #{dir}" 18 Dir.glob(File.join(dir,"**/*")){|filename| 19 if File.file?(filename) && filename !~ Exclude then 20 mime_type = File.mime(filename) 21 begin 22 index_file(filename, mime_type) 25 26 27 def index_directory(dir) 28 log :debug => "Indexing #{dir}" 29 30 Dir.glob(File.join(dir,"**/*")){|filename| 31 index_file(filename, File.mime(filename)) if File.file?(filename) && filename !~ Exclude 32 } 33 end 34 35 36 def index_file(filename,mime_type = nil) 37 log :debug => "Indexing #{filename}" 38 39 fields = fields_for(filename) 40 41 if mime_type then 42 begin 43 text = PlainText.extract_content_from(filename) 44 raise "empty document #{filename}" if text.strip.empty? 45 fields[:content] = text 23 46 rescue => e 24 47 log :debug => "indexing without content: #{e.message}" 25 index_file(filename)26 48 end 27 49 end 28 } 29 end 30 31 def self.index_file(filename,mime_type = nil) 32 log :debug => "Indexing #{filename}" 33 34 complete_path=File.expand_path(filename) 35 36 fields = { 37 :complete_path=> complete_path, 38 :probably_unique_id => complete_path.base26_hash, 39 :file => File.basename(filename), 40 :basename => File.basename(filename, File.extname(filename)).gsub(/_/,' '), 41 :filetype => File.extname(filename), 42 :date => File.mtime(filename).strftime("%Y%m%d%H%M") 43 } 44 45 if mime_type then 46 text = PlainText.extract_content_from(filename) 47 raise "empty document #{filename}" if text.strip.empty? 48 fields[:content] = text 50 51 writer << fields 49 52 end 50 53 51 writer << fields 52 end 53 54 def self.writer 55 @@writer ||= IndexWriter.new 56 end 57 58 def self.reset! 59 log :debug => "Resetting Index" 60 @@writer=nil 61 IndexWriter.remove 62 end 63 64 private 65 66 def self.log(hash) 67 hash.each{|level,message| 68 puts "#{level} -> #{message}" 69 #IndexerLogger.send(level,message) 70 } 54 def writer 55 @@writer ||= IndexWriter.new 56 end 57 58 def reset! 59 log :debug => "Resetting Index" 60 @@writer=nil 61 IndexWriter.remove 62 end 63 64 private 65 66 def log(hash) 67 hash.each{|level,message| 68 #puts "#{level} -> #{message}" 69 IndexerLogger.send(level,message) 70 } 71 end 71 72 end 72 73 end
