Line data Source code
1 : /**
2 : * @file
3 : *
4 : * @brief This file contains a lexer that scans YAML data.
5 : *
6 : * @copyright BSD License (see LICENSE.md or https://www.libelektra.org)
7 : */
8 :
9 : #ifndef ELEKTRA_PLUGIN_YAMBI_LEXER_HPP
10 : #define ELEKTRA_PLUGIN_YAMBI_LEXER_HPP
11 :
12 : // -- Imports ------------------------------------------------------------------
13 :
14 : #include <deque>
15 : #include <fstream>
16 : #include <memory>
17 : #include <stack>
18 :
19 : #include "input.hpp"
20 : #include "parser.hpp"
21 : #include "symbol.hpp"
22 :
23 : typedef yambi::Parser::symbol_type symbol_type;
24 : typedef yambi::Parser::location_type location_type;
25 :
26 : // -- Class --------------------------------------------------------------------
27 :
28 252 : class Lexer
29 : {
30 : /** This class stores information about indentation that starts a new block node. */
31 : class Level
32 : {
33 : public:
34 : /** This enumeration specifies the type of a block node. */
35 : enum class Type
36 : {
37 : MAP, ///< The current indentation starts a block map
38 : SEQUENCE, ///< The current indentation starts a block sequence
39 : OTHER ///< The current indentation starts a block scalar
40 : };
41 : size_t indent = 0;
42 : Type type = Level::Type::OTHER;
43 :
44 : /**
45 : * @brief This constructor creates a level object from the given arguments.
46 : *
47 : * @param indentation This number specifies the number of spaces used to start this level object.
48 : * @param levelType This argument specifies the type of node `indentation` created.
49 : */
50 162 : Level (size_t indentation, Level::Type levelType = Level::Type::OTHER) : indent{ indentation }, type{ levelType }
51 : {
52 : }
53 : };
54 :
55 : /** This attribute represents the input the lexer tokenizes. */
56 : Input input;
57 :
58 : /** This variable stores the current line and column number in Bison’s
59 : location format. */
60 : location_type location;
61 :
62 : /** This queue stores the list of tokens produced by the lexer. */
63 : std::deque<Symbol> tokens;
64 :
65 : /**
66 : * This counter stores the number of tokens already emitted by the lexer.
67 : * The lexer needs this variable, to keep track of the insertion point of
68 : * `KEY` tokens in the token queue.
69 : */
70 : size_t tokensEmitted = 0;
71 :
72 : /**
73 : * This stack stores the indentation (in number of characters) and block
74 : * type for each block node.
75 : */
76 : std::stack<Level> levels{ std::deque<Level>{ Level{ 0 } } };
77 :
78 : /**
79 : * This boolean specifies if the lexer has already scanned the whole input or
80 : * not.
81 : */
82 : bool done = false;
83 :
84 : /**
85 : * This pair stores a simple key candidate token (first part) and its
86 : * position in the token queue (second part).
87 : *
88 : * Since the lexer only supports block syntax for mappings and sequences we
89 : * use a single token here. If we need support for flow collections we have
90 : * to store a candidate for each flow level (block context = flow level 0).
91 : */
92 : std::pair<std::unique_ptr<Symbol>, size_t> simpleKey;
93 :
94 : /**
95 : * @brief This method consumes characters from the input stream keeping
96 : * track of line and column numbers.
97 : *
98 : * @param characters This parameter specifies the number of characters the
99 : * the function should consume.
100 : */
101 : void forward (size_t const characters);
102 :
103 : /**
104 : * @brief This function adds an indentation value if the given value is smaller
105 : * than the current indentation.
106 : *
107 : * @param lineIndex This parameter specifies the indentation value that this
108 : * function compares to the current indentation.
109 : *
110 : * @param type This value specifies the block collection type that
111 : * `lineIndex` might start.
112 : *
113 : * @retval true If the function added an indentation value
114 : * false Otherwise
115 : */
116 : bool addIndentation (size_t const column, Level::Type type);
117 :
118 : /**
119 : * @brief This method removes uninteresting characters from the input.
120 : */
121 : void scanToNextToken ();
122 :
123 : /**
124 : * @brief This function checks if the lexer needs to scan additional tokens.
125 : *
126 : * @retval true If the lexer should fetch additional tokens
127 : * @retval false Otherwise
128 : */
129 : bool needMoreTokens () const;
130 :
131 : /**
132 : * @brief This method adds new tokens to the token queue.
133 : */
134 : void fetchTokens ();
135 :
136 : /**
137 : * @brief This method checks if the input at the specified offset starts a key
138 : * value token.
139 : *
140 : * @param offset This parameter specifies an offset to the current position,
141 : * where this function will look for a key value token.
142 : *
143 : * @retval true If the input matches a key value token
144 : * @retval false Otherwise
145 : */
146 : bool isValue (size_t const offset = 1) const;
147 :
148 : /**
149 : * @brief This method checks if the current input starts a list element.
150 : *
151 : * @retval true If the input matches a list element token
152 : * @retval false Otherwise
153 : */
154 : bool isElement () const;
155 :
156 : /**
157 : * @brief This method checks if the input at the specified offset starts a
158 : * line comment.
159 : *
160 : * @param offset This parameter specifies an offset to the current position,
161 : * where this function will look for a comment token.
162 : *
163 : * @retval true If the input matches a comment token
164 : * @retval false Otherwise
165 : */
166 : bool isComment (size_t const offset) const;
167 :
168 : /**
169 : * @brief This method saves a token for a simple key candidate located at the
170 : * current input position.
171 : */
172 : void addSimpleKeyCandidate ();
173 :
174 : /**
175 : * @brief This method adds block closing tokens to the token queue, if the
176 : * indentation decreased.
177 : *
178 : * @param lineIndex This parameter specifies the column (indentation in number
179 : * of spaces) for which this method should add block end
180 : * tokens.
181 : */
182 : void addBlockEnd (size_t const lineIndex);
183 :
184 : /**
185 : * @brief This method adds the token for the start of the YAML stream to
186 : * `tokens`.
187 : */
188 : void scanStart ();
189 :
190 : /**
191 : * @brief This method adds the token for the end of the YAML stream to
192 : * the token queue.
193 : */
194 : void scanEnd ();
195 :
196 : /**
197 : * @brief This method scans a single quoted scalar and adds it to the token
198 : * queue.
199 : */
200 : void scanSingleQuotedScalar ();
201 :
202 : /**
203 : * @brief This method scans a double quoted scalar and adds it to the token
204 : * queue.
205 : */
206 : void scanDoubleQuotedScalar ();
207 :
208 : /**
209 : * @brief This method scans a plain scalar and adds it to the token queue.
210 : */
211 : void scanPlainScalar ();
212 :
213 : /**
214 : * @brief This method counts the number of non space characters that can be
215 : * part of a plain scalar at position `offset`.
216 : *
217 : * @param offset This parameter specifies an offset to the current input
218 : * position, where this function searches for non space
219 : * characters.
220 : *
221 : * @return The number of non-space characters at the input position `offset`
222 : */
223 : size_t countPlainNonSpace (size_t const offset) const;
224 :
225 : /**
226 : * @brief This method counts the number of space characters that can be part
227 : * of a plain scalar at the current input position.
228 : *
229 : * @return The number of space characters at the current input position
230 : */
231 : size_t countPlainSpace () const;
232 :
233 : /**
234 : * @brief This method scans a comment and adds it to the token queue.
235 : */
236 : void scanComment ();
237 :
238 : /**
239 : * @brief This method scans a mapping value token and adds it to the token
240 : * queue.
241 : */
242 : void scanValue ();
243 :
244 : /**
245 : * @brief This method scans a list element token and adds it to the token
246 : * queue.
247 : */
248 : void scanElement ();
249 :
250 : public:
251 : /**
252 : * @brief This constructor initializes a lexer with the given input.
253 : *
254 : * @param stream This stream specifies the text which this lexer analyzes.
255 : */
256 : Lexer (std::ifstream & stream);
257 :
258 : /**
259 : * @brief This method returns the next token the lexer produced from `input`.
260 : *
261 : * @return The next token the parser has not emitted yet
262 : */
263 : symbol_type nextToken ();
264 :
265 : /**
266 : * @brief This method returns the current input of the lexer
267 : *
268 : * @return A UTF-8 encoded string version of the parser input
269 : */
270 : std::string getText ();
271 : };
272 :
273 : #endif // ELEKTRA_PLUGIN_YAMBI_LEXER_HPP
|