Changeset 151
- Timestamp:
- 04/12/08 08:43:50 (7 months ago)
- Files:
-
- branches/rewrote_indexer/lib/picolena/templates/app/models/document.rb (modified) (2 diffs)
- branches/rewrote_indexer/lib/picolena/templates/app/models/finder.rb (modified) (3 diffs)
- branches/rewrote_indexer/lib/picolena/templates/app/models/index_reader.rb (modified) (1 diff)
- branches/rewrote_indexer/lib/picolena/templates/app/models/indexer.rb (modified) (3 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/rewrote_indexer/lib/picolena/templates/app/models/document.rb
r136 r151 77 77 end 78 78 79 def mtime 80 get_index_id! unless index_id 81 Finder.index[index_id][:date].to_i 82 end 83 79 84 private 80 85 … … 85 90 def self.find_by_unique_id(some_id) 86 91 Finder.new("probably_unique_id:"<<some_id).matching_document 92 end 93 94 def self.find_by_complete_path(complete_path) 95 Finder.new('complete_path:"'<<complete_path<<'"').matching_document 87 96 end 88 97 branches/rewrote_indexer/lib/picolena/templates/app/models/finder.rb
r149 r151 1 #require 'ff'2 3 1 class Finder 4 2 #FIXME: Should not use all those class methods to access index. … … 77 75 end 78 76 79 # Returns matching document for any given query , if only77 # Returns matching document for any given query only if 80 78 # exactly one document is found. 81 79 # Raises otherwise. … … 118 116 119 117 def self.force_index_creation 120 #create_index(IndexedDirectories.keys) 121 Indexer.reset! 122 Indexer.index_every_directory 118 #Index every directory, without updating. 119 Indexer.index_every_directory(false) 123 120 end 124 121 branches/rewrote_indexer/lib/picolena/templates/app/models/index_reader.rb
r148 r151 1 class IndexReader 1 class IndexReader < Ferret::Index::Index 2 def initialize(params={}) 3 # Add needed parameters 4 params.merge!(:path => IndexSavePath, :analyzer => Analyzer) 5 # Creates the IndexReader 6 super(params) 7 end 2 8 9 # Returns the number of times a file is present in the index. 10 def occurences_number(complete_path) 11 # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path) 12 search_by_complete_path(complete_path).total_hits 13 end 14 15 def document(complete_path) 16 search_by_complete_path(path) 17 end 18 19 def cached_mtime(complete_path) 20 21 end 22 23 def search_by_complete_path(complete_path) 24 search('complete_path:"'<<complete_path<<'"') 25 end 26 27 def delete_by_complete_path(complete_path) 28 search_by_complete_path(complete_path).hits.each{|hit| 29 delete(hit.doc) 30 } 31 end 3 32 end branches/rewrote_indexer/lib/picolena/templates/app/models/indexer.rb
r150 r151 3 3 4 4 class << self 5 def fields_for( filename)5 def fields_for(complete_path) 6 6 { 7 :complete_path => complete_path=File.expand_path(filename),7 :complete_path => complete_path, 8 8 :probably_unique_id => complete_path.base26_hash, 9 :file => File.basename(filename),10 :basename => File.basename(filename, File.extname(filename)).gsub(/_/,' '),11 :filetype => File.extname(filename),12 :date => File.mtime(filename).strftime("%Y%m%d%H%M")9 :file => File.basename(complete_path), 10 :basename => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '), 11 :filetype => File.extname(complete_path), 12 :date => File.mtime(complete_path).strftime("%Y%m%d%H%M%S") 13 13 } 14 14 end 15 15 16 def index_every_directory 16 def index_every_directory(update=true) 17 17 log :debug => "Indexing every directory" 18 19 @update = update 20 reset! unless update 18 21 19 22 IndexedDirectories.each{|dir, alias_dir| … … 24 27 end 25 28 26 27 29 def index_directory(dir) 28 30 log :debug => "Indexing #{dir}" 29 31 30 32 Dir.glob(File.join(dir,"**/*")){|filename| 31 index_file(filename, File.mime(filename)) if File.file?(filename) && filename !~ Exclude33 add_or_update_file(File.expand_path(filename)) if File.file?(filename) && filename !~ Exclude 32 34 } 33 35 end 34 36 37 def add_or_update_file(complete_path) 38 should_be_added = true 39 if @update then 40 log :debug => "What to do with #{complete_path} ?" 41 occurences = reader.occurences_number(complete_path) 42 log :debug => "\tappears #{occurences} times in the index" 43 case occurences 44 when 0 45 #Nothing to do here, the file will be added. 46 when 1 47 d=Document.find_by_complete_path(complete_path) 48 if File.mtime(complete_path).strftime("%Y%m%d%H%M%S").to_i > d.mtime then 49 log :debug => "\thas been modified" 50 delete(complete_path) 51 else 52 should_be_added = false 53 log :debug => "\thas not been modified. leaving it" 54 end 55 else 56 delete_file(complete_path) 57 end 58 end 59 add_file(complete_path) if should_be_added 60 end 35 61 36 def index_file(filename,mime_type = nil) 37 log :debug => "Indexing #{filename}" 62 def add_file(complete_path) 63 log :debug => "Adding #{complete_path}" 64 mime_type=File.mime(complete_path) 65 fields = fields_for(complete_path) 38 66 39 fields = fields_for(filename) 40 41 if mime_type then 42 begin 43 text = PlainText.extract_content_from(filename) 44 raise "empty document #{filename}" if text.strip.empty? 45 fields[:content] = text 46 rescue => e 47 log :debug => "indexing without content: #{e.message}" 48 end 67 begin 68 text = PlainText.extract_content_from(complete_path) 69 raise "\tempty document #{complete_path}" if text.strip.empty? 70 fields[:content] = text 71 rescue => e 72 log :debug => "\tindexing without content: #{e.message}" 49 73 end 50 74 … … 56 80 end 57 81 82 def reader 83 @@reader ||= IndexReader.new 84 end 85 58 86 def reset! 59 87 log :debug => "Resetting Index" 60 88 @@writer=nil 89 @@reader=nil 61 90 IndexWriter.remove 91 end 92 93 def delete_file(complete_path) 94 log :debug => "\tRemoving from index" 95 reader.delete_by_complete_path(complete_path) 62 96 end 63 97
