license report should handle html documents more gracefully
-----------------------------------------------------------
Key: MNG-903
URL: http://jira.codehaus.org/browse/MNG-903
Project: Maven 2
Type: Improvement
Components: maven-project-info-reports-plugin
Versions: 2.0-beta-1
Reporter: Matthew Pocock
Priority: Minor
At the moment, the license report sucks in whatever is on the end of the
license URL and dumps it into the page. If this happens to be raw text,
everything is fine. If it is HTML, then it escapes all the markup and you end
up with a very ugly page.
The attached patch checks to see if the license looks like it is HTML
formatted. If not, the old method is used for including it inline in the
license report page. If it is, then it extracts the body, rewrites the relative
links and includes that HTML.
Index: src/main/java/org/apache/maven/report/projectinfo/LicenseReport.java
===================================================================
--- src/main/java/org/apache/maven/report/projectinfo/LicenseReport.java
(revision 280838)
+++ src/main/java/org/apache/maven/report/projectinfo/LicenseReport.java
(working copy)
@@ -37,6 +37,8 @@
import java.util.Locale;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
/**
* Generates the Project License report.
@@ -210,7 +212,7 @@
catch ( MalformedURLException e )
{
throw new MissingResourceException( "The license url
[" + url + "] seems to be invalid: "
- + e.getMessage(), null, null );
+ + e.getMessage(),
null, null );
}
}
else
@@ -219,7 +221,7 @@
if ( !licenseFile.exists() )
{
throw new MissingResourceException( "Maven can't find
the file " + licenseFile
- + " on the system.", null, null );
+ + " on the
system.", null, null );
}
try
{
@@ -228,7 +230,7 @@
catch ( MalformedURLException e )
{
throw new MissingResourceException( "The license url
[" + url + "] seems to be invalid: "
- + e.getMessage(), null, null );
+ + e.getMessage(),
null, null );
}
}
@@ -256,8 +258,26 @@
paragraph( comments );
}
- verbatimText( licenseContent );
+ String licenseContentLC = licenseContent.toLowerCase();
+ int bodyStart = licenseContentLC.indexOf("<body");
+ int bodyEnd = licenseContentLC.indexOf("</body>");
+ if( (licenseContentLC.startsWith("<!doctype html") ||
+ licenseContentLC.startsWith("<html>"))
+ && bodyStart >= 0 && bodyEnd >= 0)
+ {
+ bodyStart = licenseContentLC.indexOf(">", bodyStart) + 1;
+ String body = licenseContent.substring(bodyStart, bodyEnd);
+ link("[Original text]", licenseUrl.toExternalForm());
+ paragraph ( "Copy of the license follows." );
+
+ body = replaceRelativeLinks(body,
baseURL(licenseUrl).toExternalForm());
+ sink.rawText(body);
+ }
+ else
+ {
+ verbatimText( licenseContent );
+ }
endSection();
}
@@ -269,4 +289,79 @@
{
return ResourceBundle.getBundle( "project-info-report", locale,
LicenseReport.class.getClassLoader() );
}
+
+ private static URL baseURL(URL aUrl)
+ {
+ String urlTxt = aUrl.toExternalForm();
+ int lastSlash = urlTxt.lastIndexOf('/');
+ if(lastSlash > -1)
+ {
+ try
+ {
+ return new URL(urlTxt.substring(0, lastSlash + 1));
+ }
+ catch (MalformedURLException e)
+ {
+ throw new AssertionError(e);
+ }
+ }
+ else
+ {
+ return aUrl;
+ }
+ }
+
+ private static String replaceRelativeLinks(String html, String baseURL)
+ {
+ if(!baseURL.endsWith("/"))
+ {
+ baseURL += "/";
+ }
+
+ String serverURL = baseURL.substring(0, baseURL.indexOf('/',
baseURL.indexOf("//") + 2));
+
+ html = replaceParts(html, baseURL, serverURL, "[aA]", "[hH][rR][eE][fF]");
+ html = replaceParts(html, baseURL, serverURL, "[iI][mM][gG]",
"[sS][rR][cC]");
+ return html;
+ }
+
+ private static String replaceParts(String html, String baseURL, String
serverURL, String tagPattern, String attributePattern)
+ {
+ Pattern anchor = Pattern
+ .compile("(<\\s*" + tagPattern + "\\s+[^>]*" + attributePattern +
"\\s*=\\s*\")([^\"]*)\"([^>]*>)");
+ StringBuilder sb = new StringBuilder(html);
+
+ int indx = 0;
+ do
+ {
+ Matcher mAnchor = anchor.matcher(sb);
+ mAnchor.region(indx, sb.length());
+ if(!mAnchor.find())
+ {
+ System.err.println("No more matches");
+ break; // no more matches
+ }
+
+ indx = mAnchor.end(3);
+
+ if(mAnchor.group(2).startsWith("#"))
+ {
+ // relative link - don't want to alter this one!
+ }
+ if(mAnchor.group(2).startsWith("/"))
+ {
+ // root link
+ sb.insert(mAnchor.start(2), serverURL);
+ indx += serverURL.length();
+ }
+ else if(mAnchor.group(2).indexOf(':') < 0)
+ {
+ // relative link
+ sb.insert(mAnchor.start(2), baseURL);
+ indx += baseURL.length();
+ }
+ } while(true);
+
+ return sb.toString();
+ }
}
\ No newline at end of file
--
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
http://jira.codehaus.org/secure/Administrators.jspa
-
For more information on JIRA, see:
http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]