00001 #include "QuickDirtyXMLParser.h"
00002 #include <stdio.h>
00003 QuickDirtyXMLParser::QuickDirtyXMLParser() {
00004
00005 }
00006 string trim(const string & str) {
00007 string out();
00008 int size = str.size();
00009 int eindex = size;
00010 int sindex = 0;
00011 for (int i = size-1 ; i >= 0; i-- ) {
00012 if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n' || str[i]=='\r') {
00013 eindex--;
00014 } else {
00015 break;
00016 }
00017 }
00018 for (int i = 0; i < size; i++ ) {
00019 if (str[i] == ' ' || str[i] == '\t' || str[i] == '\n' || str[i]=='\r') {
00020 sindex++;
00021 } else {
00022 break;
00023 }
00024
00025 }
00026 if (eindex==sindex || eindex < sindex) {
00027 return "";
00028 }
00029 return str.substr(sindex,eindex-(sindex));
00030 }
00031 bool matches(string & tag,string & in, int start,int tagsize,int insize) {
00032 if (start+tagsize > insize) {
00033 return false;
00034 }
00035 for (int i = 0; i < tagsize ; i++ ) {
00036 if (!(in[start + i] == tag[i])) {
00037 return false;
00038 }
00039 }
00040 return true;
00041 }
00042
00043
00044
00045
00046
00047
00048
00049
00050 void parseXML(TreeNode * parentNode,string & xml, int current, int end) {
00051 bool hasXML = false;
00052 for (int i = current; i < end; i++) {
00053 if (xml[i] == '<' || xml[i] == '>') {
00054 hasXML = true;
00055 break;
00056 }
00057 }
00058 if (!hasXML) {
00059 int size = end - current;
00060 if (size < 0) {
00061 parentNode->value = "";
00062 } else {
00063 parentNode->value = trim(xml.substr(current,end-current));
00064 }
00065 return;
00066 }
00067 vector<TreeNode *> * children = new vector<TreeNode*>();
00068 string ttag = "";
00069 bool readingTag = false;
00070 bool parent = false;
00071 bool startTag = false;
00072 bool endTag = false;
00073 int tagStart = 0;
00074 int tagEnd = 0;
00075 int contentStart = 0;
00076 int contentEnd = 0;
00077 for (int i = current ; i < end; i++) {
00078 switch (xml[i]) {
00079 case '<': {
00080 readingTag = true;
00081 contentEnd = i-1;
00082 break;
00083 };
00084 case '>': {
00085 readingTag = false;
00086 if (startTag) {
00087 tagEnd = i - 1;
00088 contentStart = i + 1;
00089 ttag = xml.substr(tagStart,tagEnd-tagStart+1);
00090 parent = true;
00091 startTag = false;
00092 } else if (endTag) {
00093 tagEnd = i - 1;
00094 if (ttag == xml.substr(tagStart,tagEnd-tagStart+1)) {
00095 parent = false;
00096
00097
00098
00099 TreeNode * node = new TreeNode(ttag,"");
00100 parseXML(node,xml,contentStart,contentEnd+1);
00101 children->push_back(node);
00102 endTag = false;
00103 } else {
00104 endTag = false;
00105
00106 }
00107 } else {
00108
00109 }
00110 break;
00111 };
00112 case '/': {
00113 if (readingTag) {
00114 if (startTag) {
00115 tagEnd = i-1;
00116 startTag = false;
00117 parent = false;
00118 children->push_back(new TreeNode(xml.substr(tagStart,tagEnd-tagStart+1),""));
00119
00120 } else {
00121 tagStart = i+1;
00122 endTag = true;
00123
00124 }
00125 }
00126 break;
00127 };
00128 default: {
00129 if (readingTag && !parent) {
00130 parent = true;
00131 startTag = true;
00132 tagStart = i;
00133 }
00134 };
00135 }
00136 }
00137 if (parentNode->children!=NULL) {
00138 parentNode->clearChildren();
00139 delete parentNode->children;
00140 }
00141 parentNode->children = children;
00142 }
00143 TreeNode * QuickDirtyXMLParser::parseString(string & xml) {
00144 TreeNode * root = new TreeNode("root","");
00145 parseXML(root,xml,0,xml.size());
00146 return root;
00147 }
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170 string * traverseTree(TreeNode * node) {
00171 if (node == NULL) {
00172 return new string("");
00173 }
00174 string * work = new string(node->getValue());
00175 vector<TreeNode*> * children = node->getChildren();
00176 if (children!=NULL) {
00177 vector<TreeNode*>::iterator iter;
00178 for (iter = children->begin(); iter < children->end(); iter++) {
00179 string * result = traverseTree(*iter);
00180 (*work) += (*result);
00181 delete result;
00182 }
00183 }
00184 return work;
00185 }
00186 int mainTest() {
00187 string xml [] = {string("<top> <inner1><inner12>inner12 </inner12></inner1> <inner2><inner22/></inner2> <inner3> <inner32/> <inner33> <inner34>inner34</inner34> </inner33> </inner3> </top>"),
00188 string("<top> <inner1> <inner12> inner12 </inner12></inner1> <inner2><inner22/></inner2> <inner3> <inner32/> <inner33> <inner34> inner34 </inner34> </inner33> </inner3> </top>"),
00189 string("<top> <inner1> <inner12>inner12</inner12></inner1> <inner2><inner22/></inner2> <inner3> <inner32/> <inner33> <inner34>inner34</inner34> </inner33> </inner3> <inner4></inner4><inner5/></top>")
00190 };
00191 string expect = "inner12inner34";
00192 string work = "";
00193 QuickDirtyXMLParser p = QuickDirtyXMLParser();
00194 for (int i = 0 ; i < 3; i++) {
00195 TreeNode * node = p.parseString(xml[i]);
00196 work = *traverseTree(node);
00197 if (expect == work) {
00198 cout << "Excellent Test "<< (1+i) <<" Worked Fine\n";
00199 } else {
00200 cout << "Test Failed! expected[" << expect << "] received [" << work << "]\n";
00201 }
00202 try {
00203 string value = node->getChild("top")->getChild("inner1")->getChild("inner12")->getValue();
00204 if (value == "inner12") {
00205 cout << "Test Succeeded!\n";
00206 } else {
00207 cout << "Test Failed inner12 != " << value << " !\n";
00208 }
00209 } catch (string * err) {
00210 cout << "Test Failed! top inner1 inner12 not found!\n";
00211 }
00212 try {
00213 string value = node->getChild("top")->getChild("inner1")->getChild("inner13")->getValue();
00214 cout << "Test Failed inner13 found!\n";
00215 } catch (string * err) {
00216 cout << "Test Succeeded! inner13 not found [this is good]!\n";
00217 }
00218 }
00219 return 0;
00220 }
00221