Changeset 170
- Timestamp:
- 04/19/08 12:21:58 (7 months ago)
- Files:
-
- branches/oo_indexer/lib/picolena/templates/app/models/document.rb (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/app/models/filter.rb (modified) (4 diffs)
- branches/oo_indexer/lib/picolena/templates/app/models/indexer.rb (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/config/initializers/004_load_filters.rb (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/lib/filter_DSL.rb (modified) (3 diffs)
- branches/oo_indexer/lib/picolena/templates/lib/filters/adobe.pdf.rb (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/lib/tasks/install_dependencies.rake (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/spec/helpers/documents_helper_spec.rb (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/spec/models/filters_spec.rb (modified) (1 diff)
- branches/oo_indexer/lib/picolena/templates/spec/models/host_indexing_system_spec.rb (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
branches/oo_indexer/lib/picolena/templates/app/models/document.rb
r167 r170 55 55 # Document.new("presentation.some_weird_extension").supported? => false 56 56 def supported? 57 PlainText.supported_extensions.include?(self.ext_as_sym)57 Filter.supported_extensions.include?(self.ext_as_sym) 58 58 end 59 59 60 60 # Retrieves content as it is *now*. 61 61 def content 62 PlainText.extract_content_from(complete_path)62 Filter.extract_content_from(complete_path) 63 63 end 64 64 branches/oo_indexer/lib/picolena/templates/app/models/filter.rb
r168 r170 1 1 require 'filter_DSL' 2 2 3 module PlainText 3 class Filter 4 4 @@filters=[] 5 5 6 6 #returns every defined filter 7 def self. filters7 def self.all 8 8 @@filters 9 end 10 11 def self.each(&block) 12 all.each(&block) 9 13 end 10 14 11 15 #returns every required dependency for every defined filter 12 def self. filter_dependencies13 @@dependencies||= filters.collect{|filter| filter.dependencies}.flatten.compact.uniq.sort16 def self.dependencies 17 @@dependencies||=all.collect{|filter| filter.dependencies}.flatten.compact.uniq.sort 14 18 end 15 19 16 20 #returns every supported file extensions 17 21 def self.supported_extensions 18 @@supported_exts||= filters.collect{|filter| filter.exts}.flatten.compact.uniq22 @@supported_exts||=all.collect{|filter| filter.exts}.flatten.compact.uniq 19 23 end 20 24 … … 22 26 def self.find_filter_for(filename) 23 27 ext=File.ext_as_sym(filename) 24 filter= filters.find{|filter| filter.exts.include?(ext)} || raise(ArgumentError, "no convertor for #{filename}")28 filter=all.find{|filter| filter.exts.include?(ext)} || raise(ArgumentError, "no convertor for #{filename}") 25 29 filter.source=filename 26 30 filter … … 33 37 34 38 35 class Filter36 39 attr_accessor :source 37 40 … … 72 75 end 73 76 end 74 end75 77 end branches/oo_indexer/lib/picolena/templates/app/models/indexer.rb
r167 r170 100 100 101 101 begin 102 text = PlainText.extract_content_from(complete_path)102 text = Filter.extract_content_from(complete_path) 103 103 raise "\tempty document #{complete_path}" if text.strip.empty? 104 104 fields[:content] = text branches/oo_indexer/lib/picolena/templates/config/initializers/004_load_filters.rb
r81 r170 1 1 require 'core_exts' 2 require 'filter '2 require 'filter_DSL' 3 3 4 4 Dir.glob(File.join(RAILS_ROOT,'lib/filters/*.rb')).each{|filter| branches/oo_indexer/lib/picolena/templates/lib/filter_DSL.rb
r81 r170 1 1 #Module used to define Filters with DSL 2 2 #For example, to convert "Microsoft Office Word document" to plain text 3 # PlainText.extract{3 # Filter.new { 4 4 # from :doc, :dot 5 5 # as "application/msword" … … 8 8 # which_should_for_example_extract 'district heating', :from => 'Types of malfunction in DH substations.doc' 9 9 # } 10 module PlainText 11 #defines a new Filter with DSL 12 def self.extract(&block) 13 filter = Filter.new 14 filter.instance_eval(&block) 15 @@filters<<filter 16 MimeType.add(filter.exts,filter.mime_name) 17 end 18 19 #defined by DSL described in PlainText 20 class Filter 10 require 'app/models/filter.rb' 11 class Filter 21 12 attr_reader :exts, :mime_name, :description, :command, :content_and_file_examples 22 13 23 def initialize 24 @content_and_file_examples=[] 14 def initialize(&block) 15 @content_and_file_examples=[] 16 self.instance_eval(&block) 17 @@filters<<self 18 MimeType.add(self.exts,self.mime_name) 25 19 end 26 20 … … 74 68 @command<<' 2>/dev/null' if (@command.is_a?(String) && platform==:on_linux && !@command.include?('|')) 75 69 end 76 end77 70 end branches/oo_indexer/lib/picolena/templates/lib/filters/adobe.pdf.rb
r169 r170 6 6 7 7 Filter.new { 8 puts self.inspect 8 9 from :pdf 9 10 as "application/pdf" branches/oo_indexer/lib/picolena/templates/lib/tasks/install_dependencies.rake
r84 r170 30 30 task :deb_packages do 31 31 root_privileges_required! 32 #TODO: Should load this list from defined PlainText.filters32 #TODO: Should load this list from defined Filters 33 33 packages=%w{antiword poppler-utils odt2txt html2text catdoc unrtf}.join(" ") 34 34 puts "Installing "<<packages branches/oo_indexer/lib/picolena/templates/spec/helpers/documents_helper_spec.rb
r81 r170 4 4 it "shouldn't raise if matching not in content field" 5 5 6 PlainText.supported_extensions.each{|ext|6 Filter.supported_extensions.each{|ext| 7 7 it "should have an icon for .#{ext} filetype" do 8 8 icon_for(ext.to_s).should_not be_nil branches/oo_indexer/lib/picolena/templates/spec/models/filters_spec.rb
r153 r170 6 6 end 7 7 8 PlainText.filters.each{|filter|8 Filter.each{|filter| 9 9 filter.exts.each{|ext| 10 10 should_extract= "should be able to extract content from #{filter.description} (.#{ext})" branches/oo_indexer/lib/picolena/templates/spec/models/host_indexing_system_spec.rb
r167 r170 2 2 3 3 describe "Host indexing system" do 4 PlainText.filter_dependencies.each do |dependency|4 Filter.dependencies.each do |dependency| 5 5 it "should have #{dependency} installed" do 6 6 IO.popen("which #{dependency}"){|i| i.read.should_not be_empty}
