Changeset 216

Show
Ignore:
Timestamp:
04/23/08 02:13:22 (7 months ago)
Author:
eric.dumin..@gmail.com
Message:

Ready for 0.1.4?

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/History.txt

    r205 r216  
    1 == 0.1.4  2008-04- 
    2  
    3  
     1== 0.1.4  2008-04-23 
     2* 1 minor enhancement: 
     3  * minimal MacOS support 
    44 
    55== 0.1.3  2008-04-20 
  • trunk/lib/picolena/templates/lib/plain_text_extractor_DSL.rb

    r178 r216  
    5555    platform=case RUBY_PLATFORM 
    5656    when /linux/ 
    57       :on_linux 
     57      :linux 
    5858    when /win/ 
    59       :on_windows 
     59      :windows 
     60    when /darwin/ 
     61      :mac_os 
    6062    end 
    6163    @command=case command_as_hash_or_string 
     
    6365      command_as_hash_or_string 
    6466    when Hash 
    65       #dup must be used, otherwise @command gets frozen. No idea why though.... 
    66       command_as_hash_or_string.invert[platform].dup 
     67      # Allows to write 
     68      #     with "pdftotext -enc UTF-8 SOURCE -" => :on_linux_and_mac_os, 
     69      #          "some other command" => :on_windows 
     70      # 
     71      # On linux and mac_os platforms, it returns "pdftotext -enc UTF-8 SOURCE -", 
     72      # on windows, it returns "some other command" 
     73      # 
     74      # If commands for linux & mac os were different :  
     75      #     with "some command"        => :on_linux, 
     76      #          "another command"     => :on_mac_os, 
     77      #          "yet another command" => :on_windows 
     78      # 
     79      #TODO: Make it clearer and more robust. 
     80      #NOTE: What to do when no command is defined for a given platform? 
     81      command_as_hash_or_string.invert.find{|platforms,command| 
     82        platforms.to_s.split(/_?and_?/i).collect{|on_platform| on_platform.sub(/on_/,'').to_sym}.include?(platform) 
     83      }.last.dup 
    6784    else 
    6885      block || raise("No command defined for this extractor: #{description}") 
    6986    end 
    70     @command<<' 2>/dev/null' if (@command.is_a?(String) && platform==:on_linux && !@command.include?('|')) 
     87    # TODO, replace it with Open3 or something. 
     88    @command<<' 2>/dev/null' if (@command.is_a?(String) && platform.to_s=~/(linux|mac_os)/ && !@command.include?('|')) 
    7189  end 
    7290end 
  • trunk/lib/picolena/templates/lib/plain_text_extractors/adobe.pdf.rb

    r178 r216  
    99  as "application/pdf" 
    1010  aka "Adobe Portable Document Format" 
    11   with "pdftotext -enc UTF-8 SOURCE -" => :on_linux, "some other command" => :on_windows 
     11  with "pdftotext -enc UTF-8 SOURCE -" => :on_linux_and_mac_os, 
     12       "some other command" => :on_windows 
    1213  which_should_for_example_extract 'in a pdf file', :from => 'basic.pdf' 
    1314} 
  • trunk/lib/picolena/templates/lib/plain_text_extractors/ms.excel.rb

    r178 r216  
    55  as "application/excel" 
    66  aka "Microsoft Office Excel document" 
    7   with "xls2csv SOURCE 2>/dev/null | grep -i [a-z] | sed -e 's/\"//g' -e 's/,*$//' -e 's/,/ /g'" => :on_linux, "some other command" => :on_windows 
     7  with "xls2csv SOURCE 2>/dev/null | grep -i [a-z] | sed -e 's/\"//g' -e 's/,*$//' -e 's/,/ /g'" => :on_linux_and_mac_os, 
     8       "some other command" => :on_windows 
    89  which_should_for_example_extract 'Some text (should be indexed!)', :from => 'table.xls' 
    910} 
  • trunk/lib/picolena/templates/lib/plain_text_extractors/ms.powerpoint.rb

    r178 r216  
    55  as "application/powerpoint" 
    66  aka "Microsoft Office Powerpoint document" 
    7   with "catppt SOURCE" => :on_linux, "some other command" => :on_windows 
     7  with "catppt SOURCE" => :on_linux_and_mac_os, 
     8       "some other command" => :on_windows 
    89  which_should_for_example_extract 'unofficial written by OOo Impress', :from => 'one_page.ppt' 
    910  #FIXME: it seems that catppt cannot open .pps files. 
  • trunk/lib/picolena/templates/lib/plain_text_extractors/ms.rtf.rb

    r178 r216  
    99  as "application/rtf" 
    1010  aka "Microsoft Rich Text Format" 
    11   with "unrtf  SOURCE -t text" => :on_linux, "some other command" => :on_windows 
     11  with "unrtf  SOURCE -t text" => :on_linux_and_mac_os, 
     12       "some other command" => :on_windows 
    1213  which_should_for_example_extract 'Resampling when limiting', :from => 'ReadMe.rtf' 
    1314} 
  • trunk/lib/picolena/templates/lib/plain_text_extractors/ms.word.rb

    r178 r216  
    55  as "application/msword" 
    66  aka "Microsoft Office Word document" 
    7   with "antiword SOURCE" => :on_linux, "some other command" => :on_windows 
     7  with "antiword SOURCE" => :on_linux_and_mac_os, 
     8       "some other command" => :on_windows 
    89  which_should_for_example_extract 'district heating', :from => 'Types of malfunction in DH substations.doc' 
    910  or_extract 'Basic Word template for Picolena specs', :from => 'office2003-word-template.dot'