Nice, some comments:
1. using character to character parsing makes it more time - consuming while
processing
2. i have fixed the current work to parse the DTD elements too (something that
is not done by most small C++ xml parsers, they can crash with them).
3. if it always expects to find <. or > it can crash or loop forever, even in
valid documents.
4. I do not like loading the entire file in memory too. Infact i want a sax -
like parser, loading it for now is just for testing purposes.
5. Right now I am concentrated on minimizing loop demands.
6. There is the possibility that we end up having a validator part within the
parser too :)
7. Remember that the syntactically important elements within XML are ONLY the <,> characters, slashes come as second priority AFTER you have created a formatted
XML string.
8. When handling memory aspects in C++, since it offers some "garbage"
collection (of some sort) it is very useful to use constructor/destructor stuff.
9. I very much like the simplicity in your code; less lines mean less
debugging, and mostly that things are done right.
You will have my version ready for prime time soon.
More to come soon...
Thank you all
George Makrydakis
gmak
Jean Charles Passard wrote:
Here is a try on the same idea (<> and ><)
But I have prefered not to load the complete file in memory.
It's my really first try in c++, then I have surely not use really well
the objects.
off course, I suppose there is no syntax errors in xml, then I do not
make controls.
JC Passard
---------------------------------------------------------------------------
#include <iostream>
#include <istream>
#include <fstream>
using namespace std;
int decode_stream (istream& is, string& cdata) {
char c;
static int is_open = 0;
is.get(c);
while (is.good()) {
if (c == '<') {
if (!is_open) break;
is_open ++;
}
if (c == '>') {
is_open --;
if (!is_open) break;
}
cdata += c;
is.get(c);
}
if (!is.good()) return 0;
if (c == '<') {
is_open ++;
return 1;
}
return 2;
}
int ismisc (string& cdata) {
if (!isalpha (cdata[0]) && cdata[0] != '/') return 1;
return 0;
}
int istag (string &cdata) {
if (isalpha (cdata[0]) || cdata[0] == '/') return 1;
return 0;
}
int analyze_outside (string& cdata) {
cout << "Outside : " << cdata << endl;
return 0;
}
int analyze_inside (string& cdata) {
if (ismisc (cdata)) {
cout << "Misc Data : " << endl;
cout << cdata << endl << endl;
return 0;
}
if (istag (cdata)) {
cout << "Tag data : " << endl;
cout << cdata << endl << endl;
return 0;
}
}
int analyze_stream (istream& is) {
string cdata;
int find_it;
while (find_it = decode_stream (is, cdata)) {
if (find_it == 1) analyze_outside (cdata); // find_it <
if (find_it == 2) analyze_inside (cdata); // find_it >
cdata.clear();
}
return 0;
}
int main () {
fstream file;
file.open ("test.xml");
analyze_stream (file);
file.close ();
return 0;
}
---------------------------------------------------------------------------
George Makrydakis a écrit :
No misunderstandings please... This is what I was working on:
The only bug to fix has to do with DTD (minor one but it crashes
it...)
Working together means that I must do marathon running?
Geez..., do not mix premature constructive criticism with the need
to not be
releasing buggy stuff..
The code below works if you take out DTD elements out of any xml
file that is VALID.
Handles the <,> and >,< pairs correctly no matter how weird the
syntax is...
IT IS BUGGY BUT IT IS UNINFORMED, and most of all *SMALL*
Thank you for making my trouble worth nothing, you could not wait
a couple of days more, could you...
----------------------------------------CUT---------------------------------------------
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <cstdio>
using namespace std;
int main ()
{
string linebuffer;
int lnct = 0;
vector<string> myvector;
vector<string> processing;
string testing;
string grabITEM;
myvector.clear();
ifstream myfile("coreutils.xml"); // take out DTD stuff please...
if ( myfile.is_open() )
{
while (getline(myfile,linebuffer,'\n'))
{
myvector.push_back(linebuffer);
}
myfile.close();
linebuffer.clear();
}
else
{
cout << "file not found!" << endl;
}
for (lnct = 0; lnct < myvector.size(); lnct++)
{
testing = myvector.at(lnct);
while ( !testing.empty() )
{
if (!linebuffer.empty()) { testing = linebuffer + " "
+ testing; }
linebuffer.clear();
int stopTAG = testing.find_first_of(">");
int openTAG = testing.find_first_of("<");
if ( ( openTAG == string::npos ) || ( stopTAG ==
string::npos ) )
{
if (( openTAG == string::npos ) && ( stopTAG ==
string::npos ))
{
cout << testing << endl;
testing.clear();
break;
}
else if (( openTAG != string::npos ) && ( stopTAG
== string::npos ))
{
linebuffer = testing.substr(openTAG);
cout << testing.substr(0, openTAG) << endl;
testing.clear();
break;
}
}
cout << testing.substr(0, openTAG) << endl;
grabITEM = testing.substr(openTAG, stopTAG + 1 -
openTAG);
cout << grabITEM << endl;
testing = testing.substr(stopTAG + 1);
}
}
myvector.clear();
return 0;
}
------------------------------------- CUT
---------------------------------------------------
--
http://linuxfromscratch.org/mailman/listinfo/alfs-discuss
FAQ: http://www.linuxfromscratch.org/faq/
Unsubscribe: See the above information page