root/trunk/lib/picolena/templates/lib/plain_text_extractors/plain_text.rb

Revision 323, 1.6 kB (checked in by eric.dumin..@gmail.com, 5 months ago)

Moved log methods to IndexerLogger?

  • Property svn:executable set to *
Line 
1 PlainTextExtractor.new {
2   every :txt, :text, :tex, :for, :cpp, :c, :rb, :ins, :vee, :java, :no_extension
3   as "application/plain"
4   aka "plain text file"
5   with {|source|
6     raise "binary file" unless File.plain_text?(source)
7     encoding=File.encoding(source)
8     if encoding.empty? then
9       File.read(source)
10     else
11       %x{iconv -f #{encoding} -t utf8  "#{source}" 2>/dev/null}
12     end
13   }
14   # for dependencies spec
15   which_requires 'iconv'
16
17   # to check if the extractor is working with basic plain text files
18   which_should_for_example_extract 'Hello world!', :from => 'hello.rb'
19   or_extract 'text inside!', :from => 'crossed.txt'
20   or_extract 'txt inside!', :from => 'crossed.text'
21   or_extract 'should index LaTeX too', :from => 'basic.tex'
22   or_extract 'public static void main', :from => 'myfirstjavaprog.java'
23   or_extract '"void CMatrix::inv()"', :from => 'ccmatrix1.cpp'
24   or_extract 'CALL TEST(BOARD,X,Y,POSSIBLE)', :from=>'queens.for'
25   or_extract 'This program calculates greatest common divisors', :from=>'gcd.c'
26   or_extract 'p 3 square.dat', :from => 'square.ins'
27   or_extract 'Do loop (global)', :from => 'xor.vee'
28
29   # to check if other charsets are supported
30   or_extract 'pÌöÌökÀößß AND ßklÌöÌ', :from => 'utf-8.txt'
31   or_extract 'Themenliste fÃŒr AdsorptionskÀlte', :from => 'iso-8859-1.txt'
32   or_extract 'F€rnwÀrme', :from => 'iso-8859-15.txt'
33   or_extract 'The previous line includes some weird chars. Will this file be indexed\?', :from => 'weird_chars.txt'
34   or_extract 'Incidentally this file should get indexed as well', :from => 'README'
35   or_extract 'OMG_thIs_Is_A_w3ird_fileN4mE!', :from => "'weird'filename.txt"
36 }
Note: See TracBrowser for help on using the browser.