Changeset 310
- Timestamp:
- 05/02/08 06:30:44 (7 months ago)
- Files:
-
- trunk/lib/picolena/templates/lib/core_exts.rb (modified) (1 diff)
- trunk/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/finder_spec.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/lib/picolena/templates/lib/core_exts.rb
r309 r310 69 69 content 70 70 end 71 72 def self.plain_text?(filename) 73 %x{file -i "#{filename}"} =~ /: text\// 74 end 71 75 end trunk/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb
r263 r310 4 4 aka "plain text file" 5 5 with {|source| 6 encoding=File.encoding(source) 7 #TODO: Return "binary file" if binary 8 if encoding.empty? then 9 File.read(source) 6 if File.plain_text?(source) then 7 encoding=File.encoding(source) 8 #TODO: Return "binary file" if binary 9 if encoding.empty? then 10 File.read(source) 11 else 12 %x{iconv -f #{encoding} -t utf8 "#{source}" 2>/dev/null} 13 end 10 14 else 11 %x{iconv -f #{encoding} -t utf8 "#{source}" 2>/dev/null}15 "binary file!" 12 16 end 13 17 } trunk/lib/picolena/templates/spec/models/finder_spec.rb
r301 r310 124 124 end 125 125 126 it "should not index content of binary files"127 126 128 127 # Ferret sometimes SEGFAULT crashed with '*.pdf' queries trunk/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
r301 r310 28 28 } 29 29 } 30 31 it "should not extract content of binary files" do 32 PlainTextExtractor.extract_content_from("spec/test_dirs/indexed/others/BIN_FILE_WITHOUT_EXTENSION").should == "binary file!" 33 end 30 34 end
