Author: ben
Date: 2007-11-17 19:08:09 -0800 (Sat, 17 Nov 2007)
New Revision: 7309

Modified:
   sandbox/ben/ruby/linkchecker.rb
Log:
progress reporting results of link checker.

Modified: sandbox/ben/ruby/linkchecker.rb
===================================================================
--- sandbox/ben/ruby/linkchecker.rb     2007-11-18 02:39:12 UTC (rev 7308)
+++ sandbox/ben/ruby/linkchecker.rb     2007-11-18 03:08:09 UTC (rev 7309)
@@ -6,7 +6,7 @@
 # start at a particular server...
 LASZLO_SERVER = "localhost"
 LASZLO_PORT = 8080
-CHECK_EXTERNAL_LINKS = false
+CHECK_EXTERNAL_LINKS = true
 
 class HTMLPage
   @url
@@ -64,7 +64,7 @@
   
   def absolute_url
     if @target =~ %r{http:} then
-      return "BBBB" + @target
+      return  @target
     end 
     if @source =~ %r{http:} then
       # the source url is absolute, and the target is not, so let's assemble a 
new absolute url
@@ -90,7 +90,7 @@
         # puts "relative_url is #{relative_url}"
 
         # strip one directory off the end of the relative url so far
-        if relative_url =~ %r{(\S*)/[^\s\d.]+$} 
+        if relative_url =~ %r{(\S*)/\S+$} 
           relative_url = $1
         else
           puts "TROUBLE didn't match relative_url #{relative_url}"
@@ -137,11 +137,18 @@
     page = HTMLPage.new( url ) 
     if page.is_ok? then
       @@urls_that_exist.push( url ) 
-      filenames = page.find_urls 
-      @@files_to_check.concat( filenames )          
+      urls = page.find_urls 
+      # ignore any urls we've already checked, 
+      # and ignore any urls we're already planning to check
+      unique_new_urls = urls.uniq - @@files_to_check - @@files_weve_checked; 
+      @@files_to_check.concat( unique_new_urls )
     else
       @@missing_urls.push( url ) 
     end
+    
+    @@files_weve_checked.uniq!
+    @@urls_that_exist.uniq!
+    @@missing_urls.uniq!
   end
   
   def LinkChecker.num_files_checked 
@@ -156,6 +163,14 @@
     @@files_to_check.pop
   end
   
+  def LinkChecker.generate_report 
+    "LinkChecker report****\n" + 
+    "\n\nUnique urls we checked:  #{@@files_weve_checked.length}\n(?) " + 
@@files_weve_checked.join("\n(?) ") +
+    "\n\nGood urls we checked:  #{@@urls_that_exist.length} \n(OK) " + 
@@urls_that_exist.join("\n(OK) ")  +  
+    "\n\nBroken links found: #{@@missing_urls.length}\n(-) " + 
@@missing_urls.join("\n(-) ")  
+    
+  end 
+  
 end 
 
 class TestLink < Test::Unit::TestCase
@@ -223,6 +238,13 @@
 
   end
   
+  def test_implicit_page_name 
+    url = "http://localhost:8080/trunk/docs/";
+    p = HTMLPage.new( url )
+    assert( p.is_ok?, "docs root" )
+  end
+  
+  
   def test_something_simple 
     assert_equal( LinkChecker.num_files_to_check, 0 ) 
     assert_equal( LinkChecker.num_files_checked, 0 ) 
@@ -237,12 +259,13 @@
       assert_not_nil( next_file_to_check ) 
       LinkChecker.check_next_file
       num_checked_now = LinkChecker.num_files_checked
-      assert( num_checked_now > num_checked_so_far ) 
+      assert( num_checked_now >= num_checked_so_far ) 
       num_checked_so_far = num_checked_now       
       i += 1 
     end
 
     puts "done. Checked #{i} files."
+    puts LinkChecker.generate_report
   end 
   
   


_______________________________________________
Laszlo-checkins mailing list
[email protected]
http://www.openlaszlo.org/mailman/listinfo/laszlo-checkins

Reply via email to