root/trunk/lib/picolena/templates/lib/core_exts.rb

Revision 323, 3.2 kB (checked in by eric.dumin..@gmail.com, 7 months ago)

Moved log methods to IndexerLogger?

  • Property svn:executable set to *
Line 
1 class String
2   # Creates a "probably unique" id with the desired length, composed only of lowercase letters.
3   def base26_hash(length=Picolena::HashLength)
4     Digest::MD5.hexdigest(self).to_i(16).to_s(26).tr('0-9a-p', 'a-z')[-length,length]
5   end
6 end
7
8 module Enumerable
9   # Similar to Enumerable#each, but creates a new thread for each element.
10   # Used for the indexer to make it multi-threaded.
11   # It ensures that threads are joined together before returning.
12   def each_with_thread(&block)
13     tds=self.collect{|elem|
14       Thread.new(elem) {|elem|
15         block.call(elem)
16       }
17     }
18     tds.each{|aThread| aThread.join}
19   end
20 end
21
22 class Array
23   # Returns a partition of n arrays.
24   # Transposition is used to avoid getting arrays that are too different.
25   #   >> (0..17).to_a.in_transposed_slices(5)
26   #   => [[0, 5, 10, 15], [1, 6, 11, 16], [2, 7, 12, 17], [3, 8, 13], [4, 9, 14]]
27   # while
28   #   >> (0..17).enum_slice(5).to_a
29   #   => [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14], [15, 16, 17]]
30   #
31   # If some folders contain big files and some others contain small ones,
32   # every indexing thread will get some of both!
33   def in_transposed_slices(n)
34     # no need to compute anything if n==1
35     return [self] if n==1
36     # Array#transpose would raise if Array is not a square array of arrays.
37     i=n-self.size%n
38     # Adds nils so that size is a multiple of n,
39     # cuts array in slices of size n,
40     # transposes to get n slices,
41     # and removes added nils.
42     (self+[nil]*i).enum_slice(n).to_a.transpose.collect{|e| e.compact}
43   end
44 end
45
46 class Hash
47   def add(category)
48     self[category]||={:size=>0}
49     self[category][:size]+=1
50   end
51 end
52
53 class File
54   # Returns the filetype of filename as a symbol.
55   # Returns :no_extension unless an extension is found
56   #  >> File.ext_as_sym("test.pdf")
57   #  => :pdf
58   #  >> File.ext_as_sym("test.tar.gz")
59   #  => :gz
60   #  >> File.ext_as_sym("test")
61   #  => :no_extension
62   def self.ext_as_sym(filename)
63     File.extname(filename).sub(/^\./,'').downcase.to_sym rescue :no_extension
64   end
65
66   # Returns a probable encoding for a given plain text file
67   # If source is a html file, it parses for metadata to retrieve encoding,
68   # and uses file -i otherwise.
69   # Returns iso-8859-15 instead of iso-8859-1, to be sure € char can be
70   # encoded
71   def self.encoding(source)
72     parse_for_charset="grep -io charset=[a-z0-9\\-]* | sed 's/charset=//i'"
73     if File.extname(source)[0,4]==".htm" then
74       enc=%x{head -n20 \"#{source}\" | #{parse_for_charset}}.chomp
75     else
76       enc=%x{file -i \"#{source}\"  | #{parse_for_charset}}.chomp
77     end
78     #iso-8859-15 should be used instead of iso-8859-1, for € char
79     case enc
80      when "iso-8859-1"
81        "iso-8859-15"
82      when "unknown"
83        ""
84      else
85        enc
86      end
87   end
88
89   # Returns the content of a file and removes it after.
90   # Could be used to read temporary output file written by a PlainTextExtractor.
91   def self.read_and_remove(filename)
92     content=read(filename)
93     FileUtils.rm filename, :force=>true
94     content
95   end
96  
97   # Returns nil unless filename is a plain text file.
98   # It requires file command.
99   # NOTE: What to use for Win32?
100   def self.plain_text?(filename)
101     %x{file -i "#{filename}"} =~ /: text\//
102   end
103 end
Note: See TracBrowser for help on using the browser.