Changeset 213
- Timestamp:
- 04/22/08 04:17:50 (7 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/lib/picolena/templates/app/models/plain_text_extractor.rb
r212 r213 49 49 end 50 50 51 # Launches extractor on given file and outputs plain text result and language (if found) 51 52 def extract_content_and_language_from(source) 52 53 find_by_filename(source).extract_content_and_language 53 54 end 54 55 56 # Returns which language guesser should be used by the system. 57 # Returns nil if none is found. 55 58 def language_guesser 56 59 @@language_guesser||=('mguesser -n1' unless IO.popen("which mguesser"){|i| i.read}.empty?) … … 112 115 def extract_content_and_language 113 116 content=extract_content 114 # Language recognition is too unreliable for small files. 115 return [content, nil] unless Picolena::UseLanguageRecognition && PlainTextExtractor.language_guesser && content.size > 500 117 return [content, nil] unless [# Is LanguageRecognition turned on? (cf config/custom/picolena.rb) 118 Picolena::UseLanguageRecognition, 119 # Is a language guesser already installed? 120 PlainTextExtractor.language_guesser, 121 # Language recognition is too unreliable for small files. 122 content.size > 500].all? 116 123 language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser| 117 124 lang_guesser.write content
