Changeset 301
- Timestamp:
- 04/30/08 07:53:08 (7 months ago)
- Files:
-
- trunk/History.txt (modified) (1 diff)
- trunk/Manifest.txt (modified) (2 diffs)
- trunk/config/files_to_clean (modified) (1 diff)
- trunk/lib/picolena/config/basic.rb (modified) (1 diff)
- trunk/lib/picolena/config/indexing_performance.yml (copied) (copied from branches/log_indexing_time_with_ferret/lib/picolena/config/indexing_performance.yml)
- trunk/lib/picolena/picolena_generator.rb (modified) (2 diffs)
- trunk/lib/picolena/templates/app/controllers/documents_controller.rb (modified) (1 diff)
- trunk/lib/picolena/templates/app/helpers/documents_helper.rb (modified) (1 diff)
- trunk/lib/picolena/templates/app/models/document.rb (modified) (1 diff)
- trunk/lib/picolena/templates/app/models/indexer.rb (modified) (1 diff)
- trunk/lib/picolena/templates/app/views/documents/show.html.haml (modified) (1 diff)
- trunk/lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb (copied) (copied from branches/log_indexing_time_with_ferret/lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb)
- trunk/lib/picolena/templates/spec/models/document_spec.rb (modified) (3 diffs)
- trunk/lib/picolena/templates/spec/models/finder_spec.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/host_indexing_system_spec.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/indexer_spec.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb (modified) (1 diff)
- trunk/lib/picolena/templates/spec/models/query_spec.rb (modified) (2 diffs)
- trunk/website/index.html (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/History.txt
r284 r301 1 == 0.1.7 2008-04- 1 == 0.1.7 2008-04-30 2 2 3 * 4minor enhancements:3 * 5 minor enhancements: 4 4 * added cache highlighting à la Google 5 5 * rake index:update implemented as described in Ferret book by David Balmain 6 * rake index:prune removes missing files from indexer .6 * rake index:prune removes missing files from indexer 7 7 * possibility to sort results by relevance / by date 8 * one configuration file for performance tweaks 8 9 9 10 == 0.1.6 2008-04-25 trunk/Manifest.txt
r251 r301 12 12 lib/picolena/config/icons_and_filetypes.yml 13 13 lib/picolena/config/indexed_directories.yml 14 lib/picolena/config/indexing_performance.yml 14 15 lib/picolena/config/title_and_names_and_links.yml 15 16 lib/picolena/config/white_list_ip.yml … … 43 44 lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb 44 45 lib/picolena/templates/config/initializers/006_load_icons.rb 46 lib/picolena/templates/config/initializers/007_load_performance_tweaks.rb 45 47 lib/picolena/templates/config/routes.rb 46 48 lib/picolena/templates/lang/ui/de.yml trunk/config/files_to_clean
r166 r301 6 6 lib/picolena/templates/config/custom/title_and_names_and_links.yml 7 7 lib/picolena/templates/config/custom/icons_and_filetypes.yml 8 lib/picolena/templates/config/custom/indexing_performance.yml 8 9 lib/picolena/templates/log 9 10 lib/picolena/templates/spec/test_dirs/indexed/others/bÀñÌÃé.txt trunk/lib/picolena/config/basic.rb
r263 r301 43 43 # see http://ferret.davebalmain.com/api/classes/Ferret/QueryParser.html for more information. 44 44 Ferret::Search::FuzzyQuery.default_min_similarity=0.6 45 Analyzer=Ferret::Analysis::StandardAnalyzer.new 45 46 # PerFieldAnalyzer is used to prevent queries like "language:it" to be broken by StopFilter. 47 per_field_analyzer=Ferret::Analysis::PerFieldAnalyzer.new(Ferret::Analysis::StandardAnalyzer.new) 48 per_field_analyzer[:language]=Ferret::Analysis::WhiteSpaceAnalyzer.new 49 Analyzer=per_field_analyzer 46 50 end trunk/lib/picolena/picolena_generator.rb
r263 r301 17 17 @destination_root = options[:destination] 18 18 19 @directories_to_index=ARGV.collect{|relative_path| 20 abs_dir=Pathname.new(relative_path).realpath.to_s 21 "\"#{abs_dir}\" : \"#{abs_dir}\"" 22 }.join("\n ") 19 @directories_to_index=if options[:spec_only] then 20 "/whatever : /whatever" 21 else 22 ARGV.collect{|relative_path| 23 abs_dir=Pathname.new(relative_path).realpath.to_s 24 "\"#{abs_dir}\" : \"#{abs_dir}\"" 25 }.join("\n ") 26 end 23 27 24 28 extract_options … … 64 68 m.template '../config/title_and_names_and_links.yml', 'config/custom/title_and_names_and_links.yml', :assigns => {:version => Picolena::VERSION::STRING} 65 69 m.file '../config/icons_and_filetypes.yml', 'config/custom/icons_and_filetypes.yml' 70 m.file '../config/indexing_performance.yml', 'config/custom/indexing_performance.yml' 66 71 67 72 # README, License & Rakefile trunk/lib/picolena/templates/app/controllers/documents_controller.rb
r283 r301 23 23 start=Time.now 24 24 @query=[params[:id],params.delete(:format)].compact.join('.') 25 sort=params[:sort]25 @sort=params[:sort] 26 26 page=params[:page]||1 27 finder=Finder.new(@query, sort,page)27 finder=Finder.new(@query,@sort,page) 28 28 finder.execute! 29 29 pager=::Paginator.new(finder.total_hits, Picolena::ResultsPerPage) do trunk/lib/picolena/templates/app/helpers/documents_helper.rb
r283 r301 7 7 # Very basic pagination. 8 8 # Provides liks to Next, Prev and FirstPage when needed. 9 def should_paginate(page,query )10 [(link_to("←←", :action => :show, :id => query, : page => 1) if page.number>2),11 (link_to("←", :action => :show, :id => query, :page => page.prev.number ) if page.prev?),12 (link_to("→", :action => :show, :id => query, :page => page.next.number ) if page.next?)].compact.join(" | ")9 def should_paginate(page,query, sort) 10 [(link_to("←←", :action => :show, :id => query, :sort=>sort) if page.number>2), 11 (link_to("←", :action => :show, :id => query, :page => page.prev.number, :sort=>sort) if page.prev?), 12 (link_to("→", :action => :show, :id => query, :page => page.next.number, :sort=>sort) if page.next?)].compact.join(" | ") 13 13 end 14 14 trunk/lib/picolena/templates/app/models/document.rb
r279 r301 70 70 :field => :content, :excerpt_length => :all, 71 71 :pre_tag => "<<", :post_tag => ">>" 72 ) 72 ).first 73 73 end 74 74 trunk/lib/picolena/templates/app/models/indexer.rb
r283 r301 135 135 # Great way to ensure that no file is indexed twice! 136 136 :key => :probably_unique_id 137 } 137 }.merge Picolena::IndexingConfiguration 138 138 end 139 139 trunk/lib/picolena/templates/app/views/documents/show.html.haml
r283 r301 8 8 =show_time_needed(@time_needed) 9 9 -else 10 %span{:class=>'pagination'}=should_paginate(@matching_documents, @query )10 %span{:class=>'pagination'}=should_paginate(@matching_documents, @query, @sort) 11 11 =describe_results(@matching_documents, @total_hits, @time_needed, h(@query)) 12 12 -unless nothing_found? trunk/lib/picolena/templates/spec/models/document_spec.rb
r270 r301 6 6 :complete_path=>File.join(RAILS_ROOT, '/spec/test_dirs/indexed/basic/basic.pdf'), 7 7 :extname=>'.pdf', 8 :filename=>'basic.pdf' 8 :ext_as_sym => :pdf, 9 :filename=>'basic.pdf', 10 :size => 9380 9 11 } 10 12 11 13 describe Document do 12 14 before(:each) do 13 @valid_ random_doc=Document.find(:random) rescueDocument.new("spec/test_dirs/indexed/basic/basic.pdf")15 @valid_document=Document.new("spec/test_dirs/indexed/basic/basic.pdf") 14 16 end 15 17 16 18 it "should be an existing file" do 17 19 lambda {Document.new("/patapouf.txt")}.should raise_error(Errno::ENOENT) 18 lambda {@valid_ random_doc}.should_not raise_error20 lambda {@valid_document}.should_not raise_error 19 21 lambda {Document.new("spec/test_dirs/not_indexed/Rakefile")}.should_not raise_error(Errno::ENOENT) 20 22 end 21 23 22 24 it "should belong to an indexed directory" do 23 lambda {@valid_ random_doc}.should_not raise_error25 lambda {@valid_document}.should_not raise_error 24 26 lambda {Document.new("spec/test_dirs/not_indexed/Rakefile")}.should raise_error(ArgumentError, "required document is not in indexed directory") 25 27 end … … 27 29 basic_pdf_attribute.each{|attribute,expected_value| 28 30 it "should know its #{attribute}" do 29 @valid_ random_doc.should respond_to(attribute)31 @valid_document.should respond_to(attribute) 30 32 @basic_pdf=Document.new('spec/test_dirs/indexed/basic/basic.pdf') 31 33 @basic_pdf.send(attribute).should == expected_value … … 37 39 another_doc.content.should == "just a content test\nin a txt file" 38 40 end 41 42 it "should know its cached content" do 43 another_doc=Document.new("spec/test_dirs/indexed/basic/plain.txt") 44 another_doc.cached.should == "just a content test\nin a txt file" 45 end 46 47 it "should know its highlighted cached content for a given query" do 48 another_doc=Document.new("spec/test_dirs/indexed/basic/plain.txt") 49 another_doc.highlighted_cache('a content test').should == "just a <<content>> <<test>>\nin a txt file" 50 end 39 51 40 52 it "should know its alias_path" do 41 @valid_ random_doc.should respond_to(:alias_path)42 @valid_ random_doc.alias_path.starts_with?("http://picolena.devjavu.com/browser/trunk/lib/picolena/templates/spec/test_dirs/indexed").should be_true53 @valid_document.should respond_to(:alias_path) 54 @valid_document.alias_path.starts_with?("http://picolena.devjavu.com/browser/trunk/lib/picolena/templates/spec/test_dirs/indexed").should be_true 43 55 end 56 57 it "should know its probably_unique_id" do 58 @valid_document.should respond_to(:probably_unique_id) 59 @valid_document.probably_unique_id.should =~/^[a-z]+$/ 60 @valid_document.probably_unique_id.size.should == Picolena::HashLength 61 end 62 63 it "should know its modification date" do 64 @valid_document.pretty_date.class.should == String 65 @valid_document.pretty_date.should =~/^\d{4}\-\d{2}\-\d{2}$/ 66 end 67 68 it "should know its modification time and returns it in a pretty way" do 69 @valid_document.should respond_to(:mtime) 70 @valid_document.mtime.should be_kind_of(Integer) 71 @valid_document.should respond_to(:pretty_mtime) 72 @valid_document.pretty_mtime.class.should == String 73 @valid_document.pretty_mtime.should =~/^\d{4}\-\d{2}\-\d{2} \d{2}:\d{2}:\d{2}$/ 74 end 75 76 it "should know if its content can be extracted" do 77 @valid_document.should respond_to(:supported?) 78 @valid_document.should be_supported 79 Document.new("spec/test_dirs/indexed/others/ghjopdfg.xyz").should_not be_supported 80 end 81 82 it "should know its language when enough content is available" do 83 Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de" 84 Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en" 85 Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es" 86 Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr" 87 end if Picolena::UseLanguageRecognition 88 89 it "should not try to guess language when file is too small" do 90 Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil 91 Document.new("spec/test_dirs/indexed/README").language.should be_nil 92 end if Picolena::UseLanguageRecognition 44 93 45 94 it "should let finder specify its score" do 46 @valid_ random_doc.should respond_to(:score)47 @valid_ random_doc.score.should be_nil48 @valid_ random_doc.score=2549 @valid_ random_doc.score.should == 2595 @valid_document.should respond_to(:score) 96 @valid_document.score.should be_nil 97 @valid_document.score=25 98 @valid_document.score.should == 25 50 99 end 51 100 52 101 it "should let finder specify its matching content" do 53 @valid_ random_doc.should respond_to(:matching_content)54 @valid_ random_doc.matching_content.should be_nil55 @valid_ random_doc.matching_content=["thermal cooling", "heat driven cooling"]56 @valid_ random_doc.matching_content.should include("thermal cooling")102 @valid_document.should respond_to(:matching_content) 103 @valid_document.matching_content.should be_nil 104 @valid_document.matching_content=["thermal cooling", "heat driven cooling"] 105 @valid_document.matching_content.should include("thermal cooling") 57 106 end 58 107 end trunk/lib/picolena/templates/spec/models/finder_spec.rb
r283 r301 20 20 describe Finder do 21 21 before(:all) do 22 Globalite.language = :en 22 23 # SVN doesn't like non-ascii filenames. 23 24 revert_changes!('spec/test_dirs/indexed/others/bÀñÌÃé.txt',"just to know if files are indexed with utf8 filenames") trunk/lib/picolena/templates/spec/models/host_indexing_system_spec.rb
r263 r301 14 14 it "should know which IP addresses are allowed (config/custom/white_list_ip.yml)" do 15 15 File.should be_readable('config/custom/white_list_ip.yml') 16 ip_conf=YAML.load_file('config/custom/white_list_ip.yml') 17 ip_conf.class.should == Hash 18 ip_conf['Allow'].should_not be_nil 16 19 end 17 20 18 21 it "should know which directories are to be indexed (config/custom/indexed_directories.yml)" do 19 22 File.should be_readable('config/custom/indexed_directories.yml') 23 dirs_conf=YAML.load_file('config/custom/indexed_directories.yml') 24 dirs_conf.class.should == Hash 25 %w(development test production).all?{|env| 26 dirs_conf[env].should_not be_nil 27 } 20 28 end 21 29 trunk/lib/picolena/templates/spec/models/indexer_spec.rb
r148 r301 2 2 3 3 describe Indexer do 4 before(:each)do5 @indexer = Indexer.new4 it "should have at least 32MB memory allocated" do 5 Indexer.index.writer.max_buffer_memory.should > 2**25-1 6 6 end 7 7 end trunk/lib/picolena/templates/spec/models/plain_text_extractor_spec.rb
r263 r301 28 28 } 29 29 } 30 31 it "should guess language when enough content is available" do32 Document.new("spec/test_dirs/indexed/lang/goethe").language.should == "de"33 Document.new("spec/test_dirs/indexed/lang/shakespeare").language.should == "en"34 Document.new("spec/test_dirs/indexed/lang/lorca").language.should == "es"35 Document.new("spec/test_dirs/indexed/lang/hugo").language.should == "fr"36 end if Picolena::UseLanguageRecognition37 38 it "should not try to guess language when file is too small" do39 Document.new("spec/test_dirs/indexed/basic/hello.rb").language.should be_nil40 Document.new("spec/test_dirs/indexed/README").language.should be_nil41 end if Picolena::UseLanguageRecognition42 30 end trunk/lib/picolena/templates/spec/models/query_spec.rb
r263 r301 2 2 3 3 describe Query do 4 it "should return a BooleanQuery " do4 it "should return a BooleanQuery, a TermQuery or a RangeQuery" do 5 5 Query.extract_from("whatever").class.should == Ferret::Search::BooleanQuery 6 Query.extract_from("lang:de").class.should == Ferret::Search::TermQuery 7 Query.extract_from("date:<1990").class.should == Ferret::Search::RangeQuery 8 end 9 10 it "should not remove stop-words from TermQuery" do 11 # it means "Italian language", but also is a stop-word. 12 Query.extract_from("lang:it").class.should == Ferret::Search::TermQuery 13 Query.extract_from("lang:it").to_s.should == "language:it" 6 14 end 7 15 … … 13 21 } 14 22 23 Globalite.language = :en 15 24 english_query_with_like_and_not=Query.extract_from("LIKE something NOT something") 16 25 english_query_with_or=Query.extract_from("test OR another") trunk/website/index.html
r273 r301 115 115 <p>Comments are welcome. Send an email to <a href="mailto:eric_duminil@rubyforge.org">Eric Duminil</a> email via the <a href="http://groups.google.com/group/picolena">forum</a></p> 116 116 <p class="coda"> 117 <a href="eric_duminil@rubyforge.org">Eric DUMINIL</a>, 2 6th April 2008<br>117 <a href="eric_duminil@rubyforge.org">Eric DUMINIL</a>, 25th April 2008<br> 118 118 Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>, 119 119 by Daniel Cadenas via <a href="http://depgraph.rubyforge.org/">DepGraph</a>
