Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

QuickDirtyXMLParser.C

Go to the documentation of this file.
00001 #include "QuickDirtyXMLParser.h"
00002 #include <stdio.h>
00003 QuickDirtyXMLParser::QuickDirtyXMLParser() {
00004 
00005 }
00006 string trim(const string & str) {
00007         string out();
00008         int size = str.size();
00009         int eindex = size;
00010         int sindex = 0;
00011         for (int i = size-1 ; i >= 0; i-- ) {
00012                 if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n' || str[i]=='\r') {
00013                         eindex--;
00014                 } else {
00015                         break;
00016                 }
00017         }
00018         for (int i = 0; i < size; i++ ) {
00019                 if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n' || str[i]=='\r') {
00020                         sindex++; // = i;       
00021                 } else {
00022                         break;
00023                 }
00024         
00025         }
00026         if (eindex==sindex || eindex < sindex) {
00027                 return "";
00028         }
00029         return str.substr(sindex,eindex-(sindex));
00030 }
00031 bool matches(string & tag,string & in, int start,int tagsize,int insize) {
00032         if (start+tagsize > insize) {
00033                 return false;
00034         }
00035         for (int i = 0; i < tagsize ; i++ ) {
00036                 if (!(in[start + i] == tag[i])) {
00037                         return false;
00038                 }
00039         }
00040         return true;
00041 }
00042 
00043 //Also have to deal with Upper Level Stuff
00044 //<COMPOUND><COMPOUND/><COMPOUND/><SCALAR></COMPOUND>
00045 //Alg:
00046 //      first name we find quickly find next occurance. Parse inbetween, set
00047 //      value or recurse. Then back to top level add to childlist carry on to
00048 //      next and do the same thing
00049 
00050 void parseXML(TreeNode * parentNode,string & xml, int current, int end) {
00051         bool hasXML = false;
00052         for (int i = current; i < end; i++) { //if current >  end then we will have a scalar
00053                 if (xml[i] == '<' || xml[i] == '>') {
00054                         hasXML = true;
00055                         break;
00056                 }
00057         }
00058         if (!hasXML) { // SCALAR
00059                 int size = end - current;
00060                 if (size < 0) { 
00061                         parentNode->value = "";
00062                 } else {
00063                         parentNode->value = trim(xml.substr(current,end-current)); //It's SCALAR we are done!
00064                 }
00065                 return;
00066         }
00067         vector<TreeNode *> * children = new vector<TreeNode*>();
00068         string ttag = ""; //We don't quite moving til we reach the other tag end
00069         bool readingTag = false; //Are we inside < >
00070         bool parent     = false;
00071         bool startTag   = false; //Are we reading a startTag?
00072         bool endTag     = false;
00073         int tagStart = 0;
00074         int tagEnd = 0;
00075         int contentStart = 0;
00076         int contentEnd = 0;
00077         for (int i = current ; i < end; i++) {
00078                 switch (xml[i]) {
00079                         case '<': {
00080                                 readingTag = true; //start reading tag
00081                                 contentEnd = i-1;
00082                                 break;
00083                         };
00084                         case '>': {
00085                                 readingTag = false; //we're done reading the tag
00086                                 if (startTag) {
00087                                         tagEnd = i - 1;
00088                                         contentStart = i + 1; //this spot is start of content
00089                                         ttag = xml.substr(tagStart,tagEnd-tagStart+1); //remember the tag;
00090                                         parent = true;  //we are now in parent mode
00091                                         startTag = false;
00092                                 } else if (endTag) {
00093                                         tagEnd = i - 1;
00094                                         if (ttag == xml.substr(tagStart,tagEnd-tagStart+1)) {
00095                                                 parent = false;
00096                                                 //Now everything between contentStart
00097                                                 //and contentEnd is stuff we want to
00098                                                 //parse
00099                                                 TreeNode * node = new TreeNode(ttag,"");
00100                                                 parseXML(node,xml,contentStart,contentEnd+1);
00101                                                 children->push_back(node);
00102                                                 endTag = false;
00103                                         } else {
00104                                                 endTag = false;
00105                                                 //it was inside
00106                                         }
00107                                 } else {
00108                                         //intermediate tag, just ignore
00109                                 }
00110                                 break;
00111                         };
00112                         case '/':  {
00113                                 if (readingTag) { //</
00114                                         if (startTag) {
00115                                                 tagEnd = i-1;
00116                                                 startTag = false;
00117                                                 parent = false;
00118                                                 children->push_back(new TreeNode(xml.substr(tagStart,tagEnd-tagStart+1),""));
00119                                                 //singleton! <stuff/>
00120                                         } else {
00121                                                 tagStart = i+1;
00122                                                 endTag  = true;
00123                                                 //we'ved started to read an end tag  "stuff</_"
00124                                         }
00125                                 }
00126                                 break;
00127                         }; 
00128                         default: {
00129                                 if (readingTag && !parent) {
00130                                         parent = true;
00131                                         startTag = true;
00132                                         tagStart = i;
00133                                 }
00134                         };
00135                 }
00136         }
00137         if (parentNode->children!=NULL) {
00138                 parentNode->clearChildren();
00139                 delete parentNode->children;
00140         }
00141         parentNode->children = children;
00142 }
00143 TreeNode * QuickDirtyXMLParser::parseString(string & xml) {
00144         TreeNode * root = new TreeNode("root","");
00145         parseXML(root,xml,0,xml.size());
00146         return root;
00147 }
00148 /*
00149  *      <top>
00150  *              <inner1><inner12>inner12</inner12></inner1>
00151  *              <inner2><inner22/></inner2>
00152  *              <inner3>
00153  *                      <inner32/>
00154  *                      <inner33>
00155  *                              <inner34>inner34</inner34>
00156  *                      </inner33>
00157  *              </inner3>
00158  *      </top>
00159  *      Should get
00160  *      (top,"")
00161  *              (inner1,"")
00162  *                      (inner12,"inner12")
00163  *              (inner2,"")
00164  *                      (inner22,"")
00165  *              (inner3,"")
00166  *                      (inner32,"") (inner33,"")
00167  *                                      (inner34,"inner34")
00168  * */
00169 
00170 string * traverseTree(TreeNode * node) {
00171         if (node == NULL) {
00172                 return new string("");
00173         }
00174         string * work = new string(node->getValue());
00175         vector<TreeNode*> * children = node->getChildren();
00176         if (children!=NULL) {
00177                 vector<TreeNode*>::iterator iter;
00178                 for (iter = children->begin(); iter < children->end(); iter++) {
00179                         string * result = traverseTree(*iter);
00180                         (*work) += (*result);
00181                         delete result; //is this the right place to do it?
00182                 }
00183         }
00184         return work;
00185 }
00186 int mainTest() {
00187         string xml []  = {string("<top> <inner1><inner12>inner12 </inner12></inner1> <inner2><inner22/></inner2> <inner3> <inner32/> <inner33> <inner34>inner34</inner34> </inner33> </inner3> </top>"),
00188         string("<top> <inner1>  <inner12>  inner12 </inner12></inner1> <inner2><inner22/></inner2> <inner3> <inner32/> <inner33> <inner34>   inner34   </inner34> </inner33> </inner3> </top>"),
00189         string("<top> <inner1>  <inner12>inner12</inner12></inner1> <inner2><inner22/></inner2> <inner3> <inner32/> <inner33> <inner34>inner34</inner34> </inner33> </inner3> <inner4></inner4><inner5/></top>")
00190         };
00191         string expect = "inner12inner34";
00192         string work = "";
00193         QuickDirtyXMLParser p = QuickDirtyXMLParser();
00194         for (int i = 0 ; i < 3; i++) {
00195                 TreeNode * node = p.parseString(xml[i]);
00196                 work = *traverseTree(node);
00197                 if (expect == work) {
00198                         cout << "Excellent Test "<< (1+i) <<" Worked Fine\n";   
00199                 } else {
00200                         cout << "Test Failed! expected[" << expect << "] received [" << work << "]\n";
00201                 }
00202                 try {
00203                         string value = node->getChild("top")->getChild("inner1")->getChild("inner12")->getValue();
00204                         if (value == "inner12") {
00205                                 cout << "Test Succeeded!\n";
00206                         } else {
00207                                 cout << "Test Failed inner12 != " << value << " !\n";
00208                         }
00209                 } catch (string * err) {
00210                         cout << "Test Failed! top inner1 inner12 not found!\n";
00211                 }
00212                 try {
00213                         string value = node->getChild("top")->getChild("inner1")->getChild("inner13")->getValue();
00214                         cout << "Test Failed inner13 found!\n";
00215                 } catch (string * err) {
00216                         cout << "Test Succeeded! inner13 not found [this is good]!\n";
00217                 }
00218         }
00219         return 0;
00220 }
00221 

Generated on Tue Dec 17 21:14:13 2002 for AUSS_Connector by doxygen1.2.18