Update of /var/cvs/speeltuin/andre/mmget/src/org/mmbase/mmget
In directory james.mmbase.org:/tmp/cvs-serv23174/src/org/mmbase/mmget
Modified Files:
MMGet.java UrlReader.java
Added Files:
CSSReader.java HTMLReader.java UrlReaders.java
Log Message:
getting there
See also:
http://cvs.mmbase.org/viewcvs/speeltuin/andre/mmget/src/org/mmbase/mmget
CSSReader.java is new
HTMLReader.java is new
UrlReaders.java is new
Index: MMGet.java
===================================================================
RCS file: /var/cvs/speeltuin/andre/mmget/src/org/mmbase/mmget/MMGet.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- MMGet.java 26 Feb 2009 19:33:13 -0000 1.1
+++ MMGet.java 27 Feb 2009 10:38:28 -0000 1.2
@@ -25,7 +25,7 @@
* TODO: init rootURL early on, and check all urls against it (so we don't
travel up the rootURL)
*
* @author André van Toly
- * @version $Id: MMGet.java,v 1.1 2009/02/26 19:33:13 andre Exp $
+ * @version $Id: MMGet.java,v 1.2 2009/02/27 10:38:28 andre Exp $
*/
public final class MMGet {
@@ -214,7 +214,7 @@
UrlReader reader = null;
try {
- reader = new UrlReader(url);
+ reader = UrlReaders.getUrlReader(url);
} catch (IOException e) {
log.error("Can't parse: " + e);
return;
Index: UrlReader.java
===================================================================
RCS file: /var/cvs/speeltuin/andre/mmget/src/org/mmbase/mmget/UrlReader.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- UrlReader.java 26 Feb 2009 20:02:03 -0000 1.2
+++ UrlReader.java 27 Feb 2009 10:38:28 -0000 1.3
@@ -1,6 +1,6 @@
package org.mmbase.mmget;
-import java.io.*;
+import java.io.IOException;
import java.net.*;
import java.util.*;
@@ -13,73 +13,27 @@
/**
* Reads a web resource an returns its tags that may contain links to other
resources.
- * Originally made for webpages, altered to be able to parse css-files.
*
- * @author <>
- * @version $Rev$
+ * @author André van Toly
+ * @version $Id: UrlReader.java,v 1.3 2009/02/27 10:38:28 andre Exp $
*/
-public class UrlReader {
+public abstract class UrlReader {
private static final Logger log =
Logging.getLoggerInstance(UrlReader.class);
- protected URL url = null;
- protected BufferedReader inrdr = null;
- protected static int contenttype = -1;
-
/**
- * Constructor
+ * Gets all links to resources
+ *
+ * @return list with tags that can contain links
*/
- public UrlReader(String str) throws IOException, MalformedURLException {
- this(new URL(str));
- }
-
- public UrlReader(URL url) throws IOException {
- this.url = url;
-
- // open the URL for reading
- URLConnection uc = url.openConnection();
- contenttype = MMGet.contentType(uc);
-
- if (contenttype == MMGet.CONTENTTYPE_HTML) {
- new HTMLReader(url);
- } else {
- new CSSReader(url);
- }
- }
-
- protected int getContentType() {
- return contenttype;
- }
+ protected abstract ArrayList<String> getLinks() throws IOException;
/**
- * Gets all links that look they can contain to resources
- * @return list contain links
+ * Contenttype from urlconection
+ *
+ * @return contenttype constant
*/
- public ArrayList<String> getLinks() throws IOException {
- return new ArrayList<String>();
- }
-
- public void close() throws IOException {
- //inrdr.close();
- }
-
- /* return a String representation of this object */
- public String toString() {
- return "UrlReader[" + url.toString() + "]";
- }
+ protected abstract int getContentType();
- /**
- * Main method for command-line invocation.
- * @param argv the argument String array
- */
- public static void main (String[] args) throws MalformedURLException,
IOException {
- if (args.length == 0) {
- System.out.println("Usage: UrlReader [...]");
- return;
- }
- for (int i = 0; i < args.length; i++) {
- UrlReader ur = new UrlReader(args[0]);
- ur.close();
- }
- }
+ protected abstract void close() throws IOException;
}
_______________________________________________
Cvs mailing list
[email protected]
http://lists.mmbase.org/mailman/listinfo/cvs