Line data Source code
1 : /**
2 : * @file
3 : *
4 : * @brief deserialization implementation for xerces plugin
5 : *
6 : * @copyright BSD License (see LICENSE.md or https://www.libelektra.org)
7 : */
8 :
9 : #include "deserializer.hpp"
10 : #include "util.hpp"
11 :
12 : #include <xercesc/dom/DOM.hpp>
13 : #include <xercesc/dom/DOMAttr.hpp>
14 : #include <xercesc/dom/DOMDocument.hpp>
15 : #include <xercesc/dom/DOMImplementation.hpp>
16 : #include <xercesc/dom/DOMNode.hpp>
17 : #include <xercesc/parsers/XercesDOMParser.hpp>
18 :
19 : #include <algorithm>
20 : #include <iostream>
21 : #include <locale>
22 : #include <map>
23 :
24 : #include <kdbease.h>
25 : #include <kdblogger.h>
26 : #include <key.hpp>
27 :
28 : XERCES_CPP_NAMESPACE_USE
29 : using namespace std;
30 : using namespace kdb;
31 : using namespace xerces;
32 :
33 : namespace
34 : {
35 :
36 13 : XercesPtr<DOMDocument> doc2dom (std::string const & src)
37 : {
38 26 : XercesDOMParser parser;
39 13 : parser.setValidationScheme (XercesDOMParser::Val_Auto);
40 :
41 39 : parser.parse (asXMLCh (src));
42 39 : return XercesPtr<DOMDocument> (parser.adoptDocument ());
43 : }
44 :
45 388 : string trim (string const & str)
46 : {
47 776 : stringstream ss (str);
48 776 : string to;
49 : string trimmed;
50 :
51 2500 : while (getline (ss, to, '\n'))
52 : {
53 : // Remove whitespace lines, most likely caused by pretty printing
54 8130 : if (!all_of (to.begin (), to.end (), [](char c) { return isspace (c, locale ()); })) trimmed += to;
55 : }
56 :
57 388 : return trimmed;
58 : }
59 :
60 388 : string getElementText (DOMNode const * parent)
61 : {
62 776 : string str;
63 :
64 1474 : for (auto child = parent->getFirstChild (); child != NULL; child = child->getNextSibling ())
65 : {
66 1086 : if (DOMNode::NodeType::TEXT_NODE == child->getNodeType () || DOMNode::NodeType::CDATA_SECTION_NODE == child->getNodeType ())
67 : {
68 708 : DOMText * data = dynamic_cast<DOMText *> (child);
69 2124 : if (!data->getIsElementContentWhitespace ()) str += toStr (data->getData ());
70 : }
71 : }
72 :
73 : // Trim whitespace that is most likely due to pretty printing
74 776 : return trim (str);
75 : }
76 :
77 1086 : Key newNodeKey (Key const & parent, DOMNode const * node)
78 : {
79 2172 : Key childKey (parent.getFullName (), KEY_END);
80 2172 : const string keyName = toStr (node->getNodeName ());
81 1086 : childKey.addBaseName (keyName);
82 1086 : return childKey;
83 : }
84 :
85 388 : void node2key (DOMNode const * n, Key const & parent, KeySet const & ks, Key & current)
86 : {
87 776 : const string keyName = toStr (n->getNodeName ());
88 : ELEKTRA_LOG_DEBUG ("Encountered Element: %s with parent %s", keyName.c_str (), current.getFullName ().c_str ());
89 :
90 388 : if (!ks.size ())
91 : { // we map the parent key to the xml root element
92 : // preserve the original name if it is different
93 13 : auto parentName = parent.rbegin ();
94 78 : if (parentName != parent.rend () && (*parentName) != keyName)
95 : {
96 : ELEKTRA_LOG_DEBUG ("parent name %s differs from root element name %s", (*parentName).c_str (), keyName.c_str ());
97 55 : current.setMeta (ELEKTRA_XERCES_ORIGINAL_ROOT_NAME, keyName);
98 : }
99 : }
100 : else
101 375 : current.addBaseName (keyName);
102 :
103 776 : const string text = getElementText (n);
104 776 : current.set<string> (text);
105 :
106 388 : if (!current.isValid ()) throw XercesPluginException ("Given keyset contains invalid keys to serialize");
107 :
108 : ELEKTRA_LOG_DEBUG ("new parent is %s with value %s", current.getFullName ().c_str (), current.get<string> ().c_str ());
109 :
110 388 : if (n->hasAttributes ())
111 : {
112 : // get all the attributes of the node
113 52 : DOMNamedNodeMap * pAttributes = n->getAttributes ();
114 52 : const XMLSize_t nSize = pAttributes->getLength ();
115 : ELEKTRA_LOG_DEBUG ("\tAttributes");
116 184 : for (XMLSize_t i = 0; i < nSize; ++i)
117 : {
118 66 : DOMAttr * pAttributeNode = dynamic_cast<DOMAttr *> (pAttributes->item (i));
119 : ELEKTRA_LOG_DEBUG ("\t%s=%s", asCStr (pAttributeNode->getName ()), asCStr (pAttributeNode->getValue ()));
120 198 : current.setMeta (toStr (pAttributeNode->getName ()), toStr (pAttributeNode->getValue ()));
121 : }
122 : }
123 388 : }
124 :
125 1099 : void analyzeMultipleElements (DOMNode const * n, Key const & current, map<Key, bool> & arrays)
126 : {
127 2185 : for (auto child = n->getFirstChild (); child != 0; child = child->getNextSibling ())
128 : {
129 2172 : Key childKey = newNodeKey (current, child);
130 :
131 1086 : auto it = arrays.find (childKey);
132 1086 : if (it != arrays.end ())
133 : {
134 422 : if (!it->second)
135 : {
136 : ELEKTRA_LOG_DEBUG ("There are multiple elements of %s, mapping this as an array",
137 : childKey.getFullName ().c_str ());
138 127 : arrays[childKey] = true;
139 : }
140 : }
141 : else
142 664 : arrays[childKey] = false;
143 : }
144 1099 : }
145 :
146 53 : Key newArrayKey (Key const & arrayKey, KeySet & ks)
147 : {
148 212 : KeySet result (elektraArrayGet (arrayKey.getKey (), ks.getKeySet ()));
149 53 : if (!result.size ())
150 : {
151 33 : Key arrayBaseKey = arrayKey.dup ();
152 44 : arrayBaseKey.addBaseName ("#");
153 11 : result.append (arrayBaseKey);
154 : }
155 159 : return elektraArrayGetNextKey (result.getKeySet ());
156 : }
157 :
158 1099 : void dom2keyset (DOMNode const * n, Key const & parent, KeySet & ks, map<Key, bool> & arrays)
159 : {
160 1099 : if (n)
161 : {
162 3297 : Key current (parent.getFullName (), KEY_END);
163 :
164 1099 : if (n->getNodeType () == DOMNode::ELEMENT_NODE)
165 : {
166 388 : node2key (n, parent, ks, current);
167 :
168 388 : auto it = arrays.find (current);
169 1151 : const bool array = it != arrays.end () && it->second;
170 : // Multiple elements with that name, map as an array
171 547 : if (array) current.addBaseName (newArrayKey (current, ks).getBaseName ());
172 :
173 : // Only add keys with a value, attributes or leafs or the root to preserve the original name or array keys
174 1152 : if (n->hasAttributes () || !current.getString ().empty () || !n->getFirstChild () || !ks.size () || array)
175 : {
176 : ELEKTRA_LOG_DEBUG ("adding %s", current.getFullName ().c_str ());
177 : ks.append (current);
178 : }
179 : else
180 : {
181 : ELEKTRA_LOG_DEBUG ("skipping %s", current.getFullName ().c_str ());
182 : }
183 : }
184 : // the first level cannot have more children so its enough to check that here
185 1099 : analyzeMultipleElements (n, current, arrays);
186 2185 : for (auto child = n->getFirstChild (); child != 0; child = child->getNextSibling ())
187 1086 : dom2keyset (child, current, ks, arrays);
188 : }
189 1099 : }
190 :
191 : } // namespace
192 :
193 15 : void xerces::deserialize (Key const & parentKey, KeySet & ks)
194 : {
195 21 : if (!parentKey.isValid ()) throw XercesPluginException ("Parent key is invalid");
196 39 : if (parentKey.get<string> ().empty ()) throw XercesPluginException ("No source file specified as key value");
197 :
198 : ELEKTRA_LOG_DEBUG ("deserializing relative to %s from file %s", parentKey.getFullName ().c_str (),
199 : parentKey.get<string> ().c_str ());
200 39 : auto document = doc2dom (parentKey.get<string> ());
201 26 : map<Key, bool> arrays;
202 26 : if (document) dom2keyset (document->getDocumentElement (), parentKey, ks, arrays);
203 69 : }
|