Hello all,
First post here. I've got a little program which uses JTidy
<http://jtidy.sourceforge.net/> to tidy-ize a url. Recently I've
discovered neko,
<http://people.apache.org/~andyc/neko/doc/index.html>, and am thinking
about using neko instead. I haven't installed neko, nor xerces, yet,
but am soliciting input as to a how write a simple neko program in
Java. I'd like to convert HTML to XML (or perhaps extract would be a
better term).
I'm not terribly experienced, but can learn and read ;)
Here's the JTidy Java code I have, I'd like to do something very
similar with xerces and neko:
[EMAIL PROTECTED] tidyXhtml]$ date
Sun Aug 7 06:32:07 IST 2005
[EMAIL PROTECTED] tidyXhtml]$ pwd
/home/thufir/java/src/atreides/tidyXhtml
[EMAIL PROTECTED] tidyXhtml]$ ll
total 16
-rw-rw-r-- 1 thufir thufir 1491 Aug 5 10:57 BasicTidy.java
-rw-rw-r-- 1 thufir thufir 554 Aug 5 11:08 ControlTidy.java
[EMAIL PROTECTED] tidyXhtml]$ cat BasicTidy.java -n
1 package atreides.tidyXhtml;
2
3 import java.io.IOException;
4 import java.net.URL;
5 import java.io.BufferedInputStream;
6 import java.io.FileOutputStream;
7 import java.io.PrintWriter;
8 import java.io.FileWriter;
9 import org.w3c.tidy.Tidy;
10
11
12 public class BasicTidy{
13
14 private String url;
15 private String outFileName;
16 private String errOutFileName;
17 private boolean xmlOut;
18
19 public BasicTidy(){
20 this.url = "http://www.google.com/";
21 this.outFileName = "out.txt";
22 this.errOutFileName = "err.txt";
23 this.xmlOut = true;
24 }//BasicTidy
25
26 public void convert() {
27 URL u;
28 BufferedInputStream in;
29 FileOutputStream out;
30 Tidy tidy = new Tidy();
31
32 tidy.setXmlOut(xmlOut);
33 try {
34 tidy.setErrout(new PrintWriter(new
FileWriter(errOutFileName), true));
35 u = new URL(url);
36 in = new BufferedInputStream(u.openStream());
37 out = new FileOutputStream(outFileName);
38 tidy.parse(in, out);
39 }//try
40 catch ( IOException e ) {
41 System.out.println( this.toString() + e.toString() );
42 }//catch
43 }//convert
44
45 public void setUrl (String url){
46 this.url = url;
47 }//setUrl
48
49 public void setOutFileName (String outFileName){
50 this.outFileName = outFileName;
51 }//setOutFileName
52
53 public void setErrOutFileName (String errOutFileName){
54 this.errOutFileName = errOutFileName;
55 }//setErrOutFileName
56
57 public void setXmlOut (boolean xmlOut) {
58 this.xmlOut = xmlOut;
59 }//setXmlOut
60
61 public static void main( String[] args ) {
62 }//main
63 }//BasicTidy
[EMAIL PROTECTED] tidyXhtml]$ cat ControlTidy.java -n
1 package atreides.tidyXhtml;
2
3 public class ControlTidy{
4
5 public static void main (String[] args) {
6 String yahooUrl = "http://www.yahoo.com/";
7 String googleUrl = "http://www.google.com/";
8 String aFileUrl =
"file:///home/thufir/Desktop/tidyStuff/a";
9 String out = "out.html";
10 String err = "err.txt";
11 boolean xml = true;
12 boolean xhtml = false;
13 BasicTidy tidy = new BasicTidy();
14
15 tidy.setUrl (aFileUrl);
16 tidy.setOutFileName (out);
17 tidy.setErrOutFileName (err);
18 tidy.setXmlOut(xml);
19
20 tidy.convert();
21
22 }//main
23 }//ControlTidy
24
[EMAIL PROTECTED] tidyXhtml]$
Thanks,
Thufir
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]