license report should handle html documents more gracefully
-----------------------------------------------------------

         Key: MNG-903
         URL: http://jira.codehaus.org/browse/MNG-903
     Project: Maven 2
        Type: Improvement
  Components: maven-project-info-reports-plugin  
    Versions: 2.0-beta-1    
 Reporter: Matthew Pocock
    Priority: Minor


At the moment, the license report sucks in whatever is on the end of the 
license URL and dumps it into the page. If this happens to be raw text, 
everything is fine. If it is HTML, then it escapes all the markup and you end 
up with a very ugly page.

The attached patch checks to see if the license looks like it is HTML 
formatted. If not, the old method is used for including it inline in the 
license report page. If it is, then it extracts the body, rewrites the relative 
links and includes that HTML.


Index: src/main/java/org/apache/maven/report/projectinfo/LicenseReport.java
===================================================================
--- src/main/java/org/apache/maven/report/projectinfo/LicenseReport.java       
(revision 280838)
+++ src/main/java/org/apache/maven/report/projectinfo/LicenseReport.java       
(working copy)
@@ -37,6 +37,8 @@
 import java.util.Locale;
 import java.util.MissingResourceException;
 import java.util.ResourceBundle;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 
 /**
  * Generates the Project License report.
@@ -210,7 +212,7 @@
                     catch ( MalformedURLException e )
                     {
                         throw new MissingResourceException( "The license url 
[" + url + "] seems to be invalid: "
-                            + e.getMessage(), null, null );
+                                                            + e.getMessage(), 
null, null );
                     }
                 }
                 else
@@ -219,7 +221,7 @@
                     if ( !licenseFile.exists() )
                     {
                         throw new MissingResourceException( "Maven can't find 
the file " + licenseFile
-                            + " on the system.", null, null );
+                                                            + " on the 
system.", null, null );
                     }
                     try
                     {
@@ -228,7 +230,7 @@
                     catch ( MalformedURLException e )
                     {
                         throw new MissingResourceException( "The license url 
[" + url + "] seems to be invalid: "
-                            + e.getMessage(), null, null );
+                                                            + e.getMessage(), 
null, null );
                     }
                 }
 
@@ -256,8 +258,26 @@
                     paragraph( comments );
                 }
 
-                verbatimText( licenseContent );
+                String licenseContentLC = licenseContent.toLowerCase();
+                int bodyStart = licenseContentLC.indexOf("<body");
+                int bodyEnd = licenseContentLC.indexOf("</body>");
+                if( (licenseContentLC.startsWith("<!doctype html") ||
+                     licenseContentLC.startsWith("<html>"))
+                    && bodyStart >= 0 && bodyEnd >= 0)
+                {
+                  bodyStart = licenseContentLC.indexOf(">", bodyStart) + 1;
+                  String body = licenseContent.substring(bodyStart, bodyEnd);
 
+                  link("[Original text]", licenseUrl.toExternalForm());
+                  paragraph ( "Copy of the license follows." );
+
+                  body = replaceRelativeLinks(body, 
baseURL(licenseUrl).toExternalForm());
+                  sink.rawText(body);
+                }
+                else
+                {
+                  verbatimText( licenseContent );
+                }
                 endSection();
             }
 
@@ -269,4 +289,79 @@
     {
         return ResourceBundle.getBundle( "project-info-report", locale, 
LicenseReport.class.getClassLoader() );
     }
+
+    private static URL baseURL(URL aUrl)
+    {
+      String urlTxt = aUrl.toExternalForm();
+      int lastSlash = urlTxt.lastIndexOf('/');
+      if(lastSlash > -1)
+      {
+        try
+        {
+          return new URL(urlTxt.substring(0, lastSlash + 1));
+        }
+        catch (MalformedURLException e)
+        {
+          throw new AssertionError(e);
+        }
+      }
+      else
+      {
+        return aUrl;
+      }
+    }
+
+  private static String replaceRelativeLinks(String html, String baseURL)
+  {
+    if(!baseURL.endsWith("/"))
+    {
+      baseURL += "/";
+    }
+
+    String serverURL = baseURL.substring(0, baseURL.indexOf('/', 
baseURL.indexOf("//") + 2));
+
+    html = replaceParts(html, baseURL, serverURL, "[aA]", "[hH][rR][eE][fF]");
+    html = replaceParts(html, baseURL, serverURL, "[iI][mM][gG]", 
"[sS][rR][cC]");
+    return html;
+  }
+
+  private static String replaceParts(String html, String baseURL, String 
serverURL, String tagPattern, String attributePattern)
+  {
+    Pattern anchor = Pattern
+            .compile("(<\\s*" + tagPattern + "\\s+[^>]*" + attributePattern + 
"\\s*=\\s*\")([^\"]*)\"([^>]*>)");
+    StringBuilder sb = new StringBuilder(html);
+
+    int indx = 0;
+    do
+    {
+      Matcher mAnchor = anchor.matcher(sb);
+      mAnchor.region(indx, sb.length());
+      if(!mAnchor.find())
+      {
+        System.err.println("No more matches");
+        break; // no more matches
+      }
+
+      indx = mAnchor.end(3);
+
+      if(mAnchor.group(2).startsWith("#"))
+      {
+        // relative link - don't want to alter this one!
+      }
+      if(mAnchor.group(2).startsWith("/"))
+      {
+        // root link
+        sb.insert(mAnchor.start(2), serverURL);
+        indx += serverURL.length();
+      }
+      else if(mAnchor.group(2).indexOf(':') < 0)
+      {
+        // relative link
+        sb.insert(mAnchor.start(2), baseURL);
+        indx += baseURL.length();
+      }
+    } while(true);
+
+    return sb.toString();
+  }
 }
\ No newline at end of file

-- 
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
   http://jira.codehaus.org/secure/Administrators.jspa
-
For more information on JIRA, see:
   http://www.atlassian.com/software/jira


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to