Line data Source code
1 : /******************************************************************************
2 : * Nickel - a library for hierarchical maps and .ini files
3 : * One of the Bohr Game Libraries (see chaoslizard.org/devel/bohr)
4 : * Copyright (C) 2008 Charles Lindsay. Some rights reserved; see COPYING.
5 : * $Id: io.c 345 2008-01-19 17:02:54Z chaz $
6 : ******************************************************************************/
7 :
8 :
9 : #include "internal.h"
10 : #include <bohr/ds_str.h>
11 : #include <bohr/ni.h>
12 :
13 : #include <stdio.h>
14 : #include <stdlib.h>
15 :
16 :
17 : // Define some character classes, some of which are duplicates of ctype.h
18 : // classes (NOTE however that we DO NOT want to use ctype.h because we do NOT
19 : // want locale-dependent parsing):
20 :
21 : // Is space: ' ' or 9-13, which are tabs, linefeeds, etc.
22 : #define isspace(c) ((c) == ' ' || ((c) >= 9 && (c) <= 13))
23 :
24 : // Is octal digit: '0'-'7'.
25 : #define isoctal(c) ((c) >= '0' && (c) <= '7')
26 :
27 : // Is digit: '0'-'9' (used only in isxdigit and ascii2hex).
28 : #define isdigit(c) ((c) >= '0' && (c) <= '9')
29 :
30 : // Hex lower case: a-f only (not a ctype.h class; used only in isxdigit and
31 : // ascii2hex).
32 : #define isxlower(c) ((c) >= 'a' && (c) <= 'f')
33 :
34 : // Hex upper case: A-F only.
35 : #define isxupper(c) ((c) >= 'A' && (c) <= 'F')
36 :
37 : // Is hex digit: digit or a-f or A-F.
38 : #define isxdigit(c) (isdigit (c) || isxlower (c) || isxupper (c))
39 :
40 :
41 : // Conversions between ascii values and integer values:
42 :
43 : // Returns int value of octal ascii digit.
44 : #define ascii2oct(c) ((c) - '0')
45 :
46 : // Returns int value of hex ascii char
47 : #define ascii2hex(c) (isdigit (c) ? ((c) - '0') : (isxlower (c) ? ((c) - 'a' + 10) : ((c) - 'A' + 10)))
48 :
49 : // Sets a to the ascii hex digit of the first bits of c.
50 : #define hex2ascii1(c, a) (a = ((c) >> 4) & 0xf, a = (a < 10 ? a + '0' : a - 10 + 'a'))
51 :
52 : // Sets a to the ascii hex digit of the last bits of c.
53 : #define hex2ascii2(c, a) (a = (c) &0xf, a = (a < 10 ? a + '0' : a - 10 + 'a'))
54 :
55 :
56 : // Tokens for parsing (defined only to make it easier to change them if
57 : // necessary).
58 : #define T_EOL '\n' // end of line
59 : #define T_OB '[' // open bracket, i.e. what introduces a section name
60 : #define T_CB ']' // close bracket, finishes section name
61 : #define T_EQ '=' // equal sign, switches between key and value
62 : #define T_OQ '"' // open quote sign, starts off a quoted value
63 : #define T_CQ '"' // close quote, ends a quoted value
64 : #define T_ESC '\\' // introduces escape sequence
65 : #define T_X 'x' // after \, introduces a hex sequence
66 : #define T_CMT ';' // introduces a comment
67 :
68 :
69 : // Converts the next char(s) into their escaped value.
70 : static int DoEscape (file_buf * restrict fb, int * restrict out, int eol_valid);
71 :
72 : // Writes a section/key name.
73 : static int PutString (FILE * restrict f, const char * restrict str, int str_len, int is_key, int is_section);
74 :
75 : // Puts a single UTF-8 character into the file.
76 : static int PutUtf8Char (FILE * restrict f, const unsigned char * restrict str, int str_len);
77 :
78 :
79 : /* Reads from fb until it finds the next identifier (either a section name or a
80 : * key of a key/value), and places the identifier name into idfr_out, and the
81 : * size of the buffer required to hold it into len_out. Returns 0 if it
82 : * reaches the eof before it finds a valid identifier, or 1 if it found a
83 : * section identifier, or 2 if it found a key of a key/value pair. May return
84 : * -1 on error. idfr_out must be at least elektraNi_KEY_SIZE chars in length--this
85 : * function stops after that minus one, placing a NULL as the last character.
86 : * If this function returns 0, the contents of idfr_out and len_out may have
87 : * changed, or they may not've. Note that to parse a .ini file correctly, if
88 : * this function returns 2, you must call GetValue() before another call to
89 : * GetNextIdentifier(). level_out will be filled with how many ['s were before
90 : * the section name, assuming the function returns 1.
91 : */
92 1795 : elektraNi_PRIVATE int GetNextIdentifier (file_buf * restrict fb, char * restrict idfr_out, int * restrict len_out, int * restrict level_out)
93 : {
94 : // State values for the FSM.
95 : #define ST_DONE 0 // stop parsing
96 : #define ST_START 1 // at start of line, skipping whitespace
97 : #define ST_COMMENT 2 // invalid character, ignore whole line
98 : #define ST_SKIP 3 // valid line found, skip rest of line
99 : #define ST_IN_BRACKET 4 // found [, look for section name identifier
100 : #define ST_IN_SEC_ID 5 // found identifier after [, put it into idfr_out
101 : #define ST_IN_Q_SEC_ID 6 // found quotes inside [
102 : #define ST_AFTER_Q_SEC 7 // after ["" before ]
103 : #define ST_IN_KEY_ID 8 // found key identifier as first non-space char, put it into idfr_out
104 : #define ST_IN_Q_KEY_ID 9 // found quotes on the beginning of the line
105 : #define ST_AFTER_Q_KEY 10 // after "" before =
106 :
107 1795 : int rc = 0; // return code, initially set to "we got nothing"
108 :
109 1795 : int len = 0; // length of output
110 1795 : int graph_len = 0; // length of the string up to last graphical character (so we can skip trailing spaces)
111 1795 : int level = 0; // how many ['s we catch at the beginning of this identifier
112 : int c; // current character
113 :
114 : // Macro to conserve space in code below--updates graph_len if the input
115 : // character isn't whitespace.
116 : #define chkgr(c) \
117 : if (!isspace (c)) graph_len = len + 1
118 :
119 : // Another space-saver--checks size of existing data and puts c into out,
120 : // incrementing len if it'll fit.
121 : #define put(c) \
122 : if (len < elektraNi_KEY_SIZE - 1) idfr_out[len++] = (c)
123 :
124 : // Another space-saver--resets len and graph_len to 0, i.e. erases what we
125 : // already had in the output.
126 : #define invalid() (len = 0, graph_len = 0)
127 :
128 1795 : int state = ST_START; // holds current state for FSM, duh
129 28828 : while (state != ST_DONE) // do this until we're done
130 : {
131 : // Get char into c; if it's eof, dip out.
132 25341 : if ((c = BufGetC (fb)) == EOF) break;
133 :
134 25238 : switch (state)
135 : {
136 : // What state are we in? See defines above for description of states.
137 :
138 : // Start state ignores whitespace, looking for [, an identifier, or an
139 : // invalid character.
140 : case ST_START:
141 3145 : if (c == T_OB)
142 : {
143 : state = ST_IN_BRACKET; // if [, go to "in bracket" state
144 : level = 1;
145 : }
146 2768 : else if (c == T_CMT)
147 : {
148 : state = ST_COMMENT;
149 : } // if ;, do comment then come back here
150 2592 : else if (c == T_OQ)
151 : {
152 : state = ST_IN_Q_KEY_ID;
153 : } // if ", go to quoted key id
154 2568 : else if (c == T_EQ)
155 : {
156 : state = ST_DONE; // if =, empty key, we'll allow it
157 : rc = 2;
158 : }
159 2519 : else if (c == T_ESC)
160 : {
161 14 : state = ST_IN_KEY_ID; // if \, let key id handle it
162 14 : BufSeekBack (fb, 1);
163 : }
164 2505 : else if (!isspace (c))
165 : {
166 1228 : state = ST_IN_KEY_ID; // otherwise, if not a space, assume it's an identifier
167 1228 : chkgr (c);
168 1228 : put (c);
169 : }
170 : break;
171 :
172 : // Comment ignores till eol, goes back to start.
173 : case ST_COMMENT:
174 5984 : if (c == T_EOL)
175 : {
176 176 : state = ST_START; // if we hit eol, go back to start
177 : }
178 : break;
179 :
180 : // Skip ignores till eol, then finishes.
181 : case ST_SKIP:
182 451 : if (c == T_EOL)
183 : {
184 377 : state = ST_DONE; // if we hit eol, we're done
185 : }
186 : break;
187 :
188 : // We found a [, look for an identifier.
189 : case ST_IN_BRACKET:
190 377 : if (c == T_EOL)
191 : {
192 : state = ST_START;
193 : } // if eol, false alarm, go back to start
194 377 : else if (c == T_CMT)
195 : {
196 : state = ST_COMMENT;
197 : } // if ;, do comment
198 377 : else if (c == T_OB)
199 : {
200 0 : ++level;
201 : } // if another [, just up the bracket level
202 377 : else if (c == T_CB)
203 : {
204 : state = ST_SKIP; // if ], it's an empty section name--we'll allow it
205 : rc = 1;
206 : }
207 318 : else if (c == T_OQ)
208 : {
209 : state = ST_IN_Q_SEC_ID;
210 : } // if ", do quoted section name
211 318 : else if (c == T_ESC)
212 : {
213 0 : state = ST_IN_SEC_ID; // if \, let section id handle it
214 0 : BufSeekBack (fb, 1);
215 : }
216 318 : else if (!isspace (c))
217 : {
218 318 : state = ST_IN_SEC_ID; // otherwise, if it's not space, assume it's an identifier
219 318 : chkgr (c);
220 318 : put (c);
221 : }
222 : break;
223 :
224 : // In an identifier after a [, that is, a section name.
225 : case ST_IN_SEC_ID:
226 2949 : if (c == T_EOL)
227 : {
228 : state = ST_START; // if eol, invalidate what we had saved and start over
229 : invalid ();
230 : }
231 2949 : else if (c == T_CMT)
232 : {
233 : state = ST_COMMENT; // if ;, it's invalid so start over
234 : invalid ();
235 : }
236 2949 : else if (c == T_CB)
237 : {
238 : state = ST_SKIP; // if ], it was valid, so set rc and ignore till eol
239 : rc = 1;
240 : }
241 : else
242 : {
243 2631 : chkgr (c); // otherwise, if it's an escape sequence
244 2631 : if (c == T_ESC)
245 : {
246 0 : DoEscape (fb, &c, 0); // translate it
247 : }
248 2631 : put (c);
249 : } // and either way save it
250 : break;
251 :
252 : // In an identifier in quotes in a [, a quoted section name.
253 : case ST_IN_Q_SEC_ID:
254 0 : if (c == T_CQ)
255 : {
256 : state = ST_AFTER_Q_SEC;
257 : } // if we found close quote, go to after quote logic
258 : else
259 : {
260 0 : if (c == T_ESC)
261 : { // otherwise, if it's an escape sequence
262 0 : DoEscape (fb, &c, 0); // translate it
263 : }
264 0 : put (c);
265 : } // and either way put it in output
266 : break;
267 :
268 : // After ["something", looking for ].
269 : case ST_AFTER_Q_SEC:
270 0 : if (c == T_EOL)
271 : {
272 : state = ST_START; // if eol, it was bullshit, start over
273 : invalid ();
274 : }
275 0 : else if (c == T_OQ)
276 : {
277 : state = ST_IN_Q_SEC_ID;
278 : } // if we found another open quote, keep going
279 0 : else if (c == T_CB)
280 : {
281 : state = ST_SKIP; // if ], skip remainder of line (no trim spaces) and return ok
282 : rc = 1;
283 : graph_len = elektraNi_KEY_SIZE - 1;
284 : }
285 0 : else if (!isspace (c))
286 : {
287 0 : state = ST_COMMENT; // if any other char, skip rest of line, start over
288 0 : invalid ();
289 : }
290 : break;
291 :
292 : // In an identifier as first thing on line, that is, a key name.
293 : case ST_IN_KEY_ID:
294 12116 : if (c == T_EOL)
295 : {
296 : state = ST_START; // if eol, invalidate and start over
297 : invalid ();
298 : }
299 12116 : else if (c == T_CMT)
300 : {
301 : state = ST_COMMENT; // if ;, invalidate and start over
302 : invalid ();
303 : }
304 12116 : else if (c == T_EQ)
305 : {
306 : state = ST_DONE; // if =, stop here and set rc to indicate value comes next
307 : rc = 2;
308 : }
309 : else
310 : {
311 10874 : chkgr (c); // otherwise, if it's an escape sequence
312 10874 : if (c == T_ESC)
313 : {
314 38 : DoEscape (fb, &c, 0); // translate that
315 : }
316 10874 : put (c);
317 : } // either way, save it
318 : break;
319 :
320 : // In quotes at the beginning of the line, potentially a quoted key name.
321 : case ST_IN_Q_KEY_ID:
322 168 : if (c == T_CQ)
323 : {
324 : state = ST_AFTER_Q_KEY;
325 : } // if close quote, go to after quote logic
326 : else
327 : {
328 144 : if (c == T_ESC)
329 : { // otherwise, if escape sequence
330 8 : DoEscape (fb, &c, 0); // translate it
331 : }
332 144 : put (c);
333 : } // either way, put it into output
334 : break;
335 :
336 : // After "something", looking for =.
337 : case ST_AFTER_Q_KEY:
338 48 : if (c == T_EOL)
339 : {
340 : state = ST_START; // if eol, invalidate and start over
341 : invalid ();
342 : }
343 48 : else if (c == T_OQ)
344 : {
345 : state = ST_IN_Q_KEY_ID;
346 : } // if another open quote, keep going
347 48 : else if (c == T_EQ)
348 : {
349 : state = ST_DONE; // if =, we're GOOD and done (and don't strip spaces)
350 : rc = 2;
351 : graph_len = elektraNi_KEY_SIZE - 1;
352 : }
353 24 : else if (!isspace (c))
354 : {
355 0 : state = ST_COMMENT; // if any other char, invalidate and start over
356 0 : invalid ();
357 : }
358 : break;
359 :
360 : // This should never happen.
361 : default:
362 : rc = -1; // so set rc to error
363 : state = ST_DONE; // and stop in our tracks
364 : break;
365 : }
366 : }
367 :
368 : // Trim the length down if it was longer than the last graphical character.
369 1795 : if (graph_len < len)
370 : {
371 1135 : len = graph_len;
372 : }
373 :
374 1795 : idfr_out[len] = '\0'; // null-terminate the output
375 :
376 1795 : if (level_out)
377 : {
378 1795 : *level_out = level; // set level_out if it wasn't NULL
379 : }
380 1795 : if (len_out)
381 : {
382 1795 : *len_out = len; // set len_out if it wasn't NULL
383 : }
384 :
385 : // Flush the buffer, since we'll never need anything in it again.
386 1795 : BufFlush (fb);
387 :
388 1795 : return rc;
389 :
390 : // We don't need these to be defined anymore.
391 : #undef ST_DONE
392 : #undef ST_START
393 : #undef ST_COMMENT
394 : #undef ST_SKIP
395 : #undef ST_IN_BRACKET
396 : #undef ST_IN_SEC_ID
397 : #undef ST_IN_Q_SEC_ID
398 : #undef ST_AFTER_Q_SEC
399 : #undef ST_IN_KEY_ID
400 : #undef ST_IN_Q_KEY_ID
401 : #undef ST_AFTER_Q_KEY
402 : #undef chkgr
403 : #undef put
404 : #undef invalid
405 : }
406 :
407 : /* Parses a value of a key/value pair in the .ini file. Must be called only
408 : * after GetNextIdentifier() returns 2, and it must be called then. Returns 0
409 : * on error, or 1 if ok. Puts the value into value_out. Erases anything that
410 : * was in value_out before.
411 : */
412 1315 : elektraNi_PRIVATE int GetValue (file_buf * restrict fb, Ds_str * restrict value_out)
413 : {
414 : // State values for the FSM.
415 : #define ST_DONE 0 // done parsing
416 : #define ST_START 1 // at the start of a value, or on a new line of a continued value
417 : #define ST_IGNORE 2 // ignoring till eol
418 : #define ST_IN_Q 3 // inside the quotes of a quoted value, saving to output
419 : #define ST_AFTER_Q 4 // after the end quote of quoted value, ignoring things (mostly)
420 : #define ST_IN_U 5 // inside unquoted value, saving to output
421 :
422 1315 : int rc = 1; // return code--default to ok
423 :
424 1315 : int graph_len = 0; // length of string up to last graphical char
425 : int c; // current character
426 :
427 1315 : int state = ST_START; // that state
428 :
429 :
430 : // Macro to conserve space in code below--updates graph_len if the input
431 : // character isn't whitespace.
432 : #define chkgr(c) \
433 : if (!isspace (c)) graph_len = value_out->len + 1
434 :
435 : // Macro to conserve space below--puts a char into value_out, dips out if
436 : // error.
437 : #define put(c) \
438 : do \
439 : { \
440 : if (value_out->len + 1 > value_out->size /* check for space */ \
441 : && !Ds_ResizeStr (value_out, value_out->size << 1)) /* grow if necessary */ \
442 : { \
443 : state = ST_DONE; \
444 : rc = 0; \
445 : break; \
446 : } /* quit everything if error */ \
447 : value_out->str[value_out->len++] = (c); /* else set next char */ \
448 : } while (0)
449 :
450 : // Space-conserving macro--sets the state to the start value and sets
451 : // graph_len to be the current length, so we don't go overboard getting rid
452 : // of spaces.
453 : #define cont() (state = ST_START, graph_len = value_out->len)
454 :
455 : // Yet another--moves strlen back to the size of up to the last non-space
456 : // character.
457 : #define strip() \
458 : if (graph_len < value_out->len) value_out->len = graph_len
459 :
460 :
461 1315 : value_out->len = 0; // set length to 0
462 :
463 11112 : while (state != ST_DONE) // until we decide to stop
464 : {
465 : // Get next char; dip out (successfully) if EOF.
466 8484 : if ((c = BufGetC (fb)) == EOF) break;
467 :
468 8482 : switch (state)
469 : {
470 : // What state are we in? See defines above for what these mean.
471 :
472 : // At the start of a value, or beginning of continued line.
473 : case ST_START:
474 2521 : if (c == T_EOL)
475 : {
476 : state = ST_DONE;
477 : } // if eol or eof, it's valid even if we have nothing
478 2132 : else if (c == T_CMT)
479 : {
480 : state = ST_IGNORE;
481 : } // if ;, ignore the whole thing
482 2132 : else if (c == T_OQ)
483 : {
484 : state = ST_IN_Q;
485 : } // if ", go to quoted value
486 2100 : else if (c == T_ESC)
487 : {
488 10 : state = ST_IN_U; // if \, do unquoted value, put \ back so no duplicated code
489 10 : BufSeekBack (fb, 1);
490 : }
491 2090 : else if (!isspace (c))
492 : {
493 884 : state = ST_IN_U; // other non-ws chars, save and go to unquoted value
494 884 : chkgr (c);
495 884 : put (c);
496 : }
497 : break;
498 :
499 : // Ignoring till end of line--rc should have been set to valid before
500 : // going to this state if it is indeed valid.
501 : case ST_IGNORE:
502 0 : if (c == T_EOL)
503 : {
504 0 : state = ST_DONE;
505 : } // if eol/eof, we done an' shit
506 : break;
507 :
508 : // In quoted value.
509 : case ST_IN_Q:
510 176 : if (c == T_CQ)
511 : {
512 : state = ST_AFTER_Q;
513 : } // if end ", do after quotes deals
514 : else
515 : {
516 144 : if (c == T_ESC) // otherwise, look for escape start
517 8 : DoEscape (fb, &c, 0); // if escape sequence, get the escaped value instead
518 144 : put (c);
519 : } // output the maybe-escaped char
520 : break;
521 :
522 : // After end quote, looking for \ or more ""s.
523 : case ST_AFTER_Q:
524 32 : if (c == T_EOL)
525 : {
526 : state = ST_DONE;
527 : } // if eof/eol, we're done
528 0 : else if (c == T_OQ)
529 : {
530 : state = ST_IN_Q;
531 : } // if another ", keep parsing
532 : else
533 : {
534 0 : if (c == T_ESC // if \, look for eol
535 0 : && DoEscape (fb, NULL, 1))
536 : {
537 0 : cont ();
538 : }
539 0 : else if (!isspace (c))
540 : {
541 0 : state = ST_IGNORE;
542 : }
543 : }
544 : break;
545 :
546 : // In unquoted value.
547 : case ST_IN_U:
548 5753 : if (c == T_EOL)
549 : {
550 892 : state = ST_DONE; // if eof or eol, strip trailing space, we done
551 892 : strip ();
552 : }
553 4861 : else if (c == T_CMT)
554 : {
555 0 : state = ST_IGNORE; // if ;, ignore till eol and we done
556 0 : strip ();
557 : }
558 : else
559 : {
560 4861 : if (c == T_ESC) // otherwise, if escaping
561 : {
562 31 : if (DoEscape (fb, &c, 1)) // if it's the line continue
563 : {
564 0 : strip ();
565 0 : cont ();
566 : } // strip and continue
567 : else
568 : {
569 31 : chkgr (T_ESC);
570 : }
571 : } // if not line continue, it was graphical
572 : else
573 : {
574 4830 : chkgr (c);
575 : } // if not escaping, check whether it was graphical
576 4861 : put (c);
577 : } // and regardless, put something in the output
578 : break;
579 :
580 : // This should never happen.
581 : default:
582 : rc = 0;
583 : state = ST_DONE;
584 : break;
585 : }
586 : }
587 :
588 1315 : if (rc)
589 : {
590 : // Null-terminate if no error.
591 :
592 1315 : put ('\0'); // this might set rc to 0
593 :
594 : // put always adds to strlen, but we don't want that NULL in there
595 1315 : if (rc) value_out->len--;
596 : }
597 :
598 : // Flush the buffer, since we'll never need anything in it again.
599 1315 : BufFlush (fb);
600 :
601 1315 : return rc;
602 :
603 : #undef ST_DONE
604 : #undef ST_START
605 : #undef ST_IGNORE
606 : #undef ST_IN_Q
607 : #undef ST_AFTER_Q
608 : #undef ST_IN_U
609 : #undef chkgr
610 : #undef put
611 : #undef cont
612 : #undef strip
613 : }
614 :
615 : /* Puts the section name into the file, surrounded by brackets. Returns
616 : * nonzero on success, 0 on failure. May have written only part of the string
617 : * to f if it fails.
618 : */
619 159 : elektraNi_PRIVATE int PutSection (FILE * restrict f, const char * restrict name, int name_len, int level)
620 : {
621 : int i;
622 159 : int success = 0;
623 :
624 : do
625 : {
626 159 : if (fputc (T_EOL, f) == EOF) // put an initial eol
627 : break;
628 :
629 2 : for (i = 0; i < level - 1; ++i) // put initial spaces
630 : {
631 2 : if (fputc (' ', f) == EOF) break;
632 : }
633 159 : if (i < level - 1) break;
634 :
635 161 : for (i = 0; i < level; ++i)
636 : {
637 161 : if (fputc (T_OB, f) == EOF) // put as many ['s as level indicates
638 : break;
639 : }
640 159 : if (i < level) break;
641 :
642 159 : if (!PutString (f, name, name_len, 0, 1)) // put section name
643 : break;
644 :
645 161 : for (i = 0; i < level; ++i)
646 : {
647 161 : if (fputc (T_CB, f) == EOF) // put as many ]'s as level indicates
648 : break;
649 : }
650 159 : if (i < level || fputc (T_EOL, f) == EOF) // put eol
651 : break;
652 :
653 159 : success = 1;
654 : } while (0);
655 :
656 159 : return success;
657 : }
658 :
659 : /* Puts the key/value pair into the file, separated by an =. Returns nonzero
660 : * on success, 0 on failure. May have written only part of the string to f if
661 : * it fails.
662 : */
663 521 : elektraNi_PRIVATE int PutEntry (FILE * restrict f, const char * restrict key, int key_len, const char * restrict value, int value_len,
664 : int level)
665 : {
666 : int i;
667 521 : int success = 0;
668 :
669 : do
670 : {
671 817 : for (i = 0; i < level - 1; ++i) // initial spaces
672 : {
673 296 : if (fputc (' ', f) == EOF) break;
674 : }
675 521 : if (i < level - 1) break;
676 :
677 521 : if (!PutString (f, key, key_len, 1, 0)) // key
678 : break;
679 :
680 521 : if (fputc (' ', f) == EOF // space
681 521 : || fputc (T_EQ, f) == EOF //=
682 521 : || fputc (' ', f) == EOF) // space
683 : break;
684 :
685 521 : if (!PutString (f, value, value_len, 0, 0)) // value
686 : break;
687 :
688 521 : if (fputc (T_EOL, f) == EOF) // eol
689 : break;
690 :
691 521 : success = 1;
692 : } while (0);
693 :
694 521 : return success;
695 : }
696 :
697 : /* Internal to GetNextIdentifier() and GetValue()--assumes fb is on the
698 : * character AFTER a \ in an identifier/value. Parses the next characters for
699 : * a valid escape sequence, returning the result in out, using a '\\' if it
700 : * wasn't valid. GetNextIdentifier() and GetValue() put this character into
701 : * the output. If eol_valid is nonzero, the function will accept \<ws>\n (the
702 : * line-continue escape) as a valid escape sequence, replacing it with a single
703 : * space. Returns 1/0 indicating whether the line-continue escape sequence is
704 : * what was just parsed (thus, can only return 1 if eol_valid is 1). Positions
705 : * fb so the next character will be the first character after the (maybe
706 : * invalid) escape sequence. Either way, putting *out then the next characters
707 : * in fb into the output will result in the correct sequence.
708 : */
709 85 : static int DoEscape (file_buf * restrict fb, int * restrict out, int eol_valid)
710 : {
711 : int c; // current character
712 85 : int esc = -1; // value of escape sequence
713 85 : int line_cont = 0; // whether the line-continue escape is what we just parsed
714 :
715 85 : switch (c = BufGetC (fb))
716 : {
717 :
718 : // Normal escapes--put them in esc.
719 : case 'a':
720 : esc = '\a';
721 : break;
722 : case 'b':
723 4 : esc = '\b';
724 4 : break;
725 : case 'f':
726 4 : esc = '\f';
727 4 : break;
728 : case 'n':
729 4 : esc = '\n';
730 4 : break;
731 : case 'r':
732 4 : esc = '\r';
733 4 : break;
734 : case 't':
735 4 : esc = '\t';
736 4 : break;
737 : case 'v':
738 4 : esc = '\v';
739 4 : break;
740 :
741 : // These are the same after translation.
742 : case '\'':
743 : case '?':
744 : case T_ESC:
745 : case T_OQ:
746 : #if (T_OQ != T_CQ)
747 : case T_CQ:
748 : #endif
749 : case T_CMT:
750 : case T_OB:
751 : case T_CB:
752 : case T_EQ:
753 42 : esc = c;
754 42 : break;
755 :
756 : // Hex escape. Look for hex chars.
757 : case T_X:
758 14 : c = BufGetC (fb); // get next char
759 14 : if (!isxdigit (c)) // if it's NOT hex
760 : {
761 0 : BufSeekBack (fb, 1); // put it back
762 0 : break;
763 : }
764 14 : esc = ascii2hex (c); // otherwise, save hex digit value
765 14 : c = BufGetC (fb); // and get next char
766 14 : if (!isxdigit (c)) // if it's not a hex char
767 : {
768 0 : BufSeekBack (fb, 1); // just go back one so it'll come out next
769 0 : break;
770 : }
771 14 : esc <<= 4; // otherwise, shift previous char over by 4
772 14 : esc += ascii2hex (c); // and add this char's value
773 14 : break;
774 :
775 : // Might be an octal escape or a line-continue escape.
776 : default:
777 1 : if (isoctal (c)) // if we've got an octal char
778 : {
779 0 : esc = ascii2oct (c); // get its int value
780 0 : c = BufGetC (fb); // look at next character
781 0 : if (!isoctal (c)) // if not octal
782 : {
783 0 : BufSeekBack (fb, 1); // put it back, dip out
784 0 : break;
785 : }
786 0 : esc <<= 3; // if it is octal, shift previous value over 3
787 0 : esc += ascii2oct (c); // and add it
788 0 : c = BufGetC (fb); // look at third character
789 0 : if (!isoctal (c)) // and do the exact same thing
790 : {
791 0 : BufSeekBack (fb, 1);
792 0 : break;
793 : }
794 0 : esc <<= 3;
795 0 : esc += ascii2oct (c);
796 0 : esc &= 0xff;
797 : } // or, if we should parse for line-contine escape
798 1 : else if (eol_valid && (c == EOF || isspace (c)))
799 : {
800 : size_t n = 0; // how many chars we've gone past initial space
801 : int comment = 0; // whether we found a comment
802 :
803 : while (1)
804 : {
805 0 : if (c == T_CMT) // if we found a comment
806 0 : comment = 1;
807 :
808 : // if we're done or char is invalid
809 0 : if (c == T_EOL || c == EOF || (!comment && !isspace (c))) break;
810 :
811 0 : c = BufGetC (fb); // get next char
812 0 : ++n; // we've gone one farther
813 : }
814 0 : if (c != T_EOL) // if we stopped because of a non-space character or eof
815 : {
816 0 : BufSeekBack (fb, n); // invalid, so go back however many chars we just went forward
817 0 : break; // dip out
818 : }
819 : esc = ' '; // otherwise, it's valid, so replace it with a single space
820 : line_cont = 1; // set our return value to true
821 : }
822 : break;
823 : }
824 :
825 : // If we didn't get a valid sequence, we gotta put back the backslash.
826 85 : if (esc < 0)
827 : {
828 1 : esc = T_ESC; // set it
829 1 : BufSeekBack (fb, 1); // and go back so we haven't gotten any other chars after backslash
830 : }
831 85 : if (out)
832 : { // and set *out if we can
833 85 : *out = esc;
834 : }
835 :
836 85 : return line_cont; // return whether it was a line continuation escape
837 : }
838 :
839 : /* Outputs a string, surrounding it in quotes if necessary, and escaping
840 : * everything that needs it as it goes.
841 : */
842 1201 : static int PutString (FILE * restrict f, const char * restrict str, int str_len, int is_key, int is_section)
843 : {
844 1201 : int quote = 0; // whether to quote the string
845 1201 : int success = 1; // return value
846 1201 : int first = 1; // whether we're processing the first character
847 : int advance; // how many bytes to advance
848 : int c;
849 :
850 1201 : if (str_len > 0)
851 : {
852 986 : c = *(str + str_len - 1); // set c to last character in string
853 986 : if (*str == ' ' || c == ' ')
854 : { // if initial or trailing spaces (\t etc. are
855 48 : quote = 1; // always escaped, so we just care about ' ')
856 : }
857 : }
858 :
859 1201 : if (quote && fputc (T_OQ, f) == EOF)
860 : {
861 0 : success = 0;
862 : }
863 :
864 8879 : while (success && str_len > 0)
865 : {
866 7678 : c = *str;
867 7678 : advance = 1;
868 :
869 7678 : if (quote)
870 : {
871 : // In quotes, we just need to escape \ and "
872 288 : if (c == T_ESC || c == T_CQ)
873 : {
874 16 : if (fputc (T_ESC, f) == EOF || fputc (c, f) == EOF)
875 : {
876 : success = 0;
877 : }
878 : }
879 : else
880 : {
881 272 : if (!(advance = PutUtf8Char (f, (const unsigned char *) str, str_len)))
882 : {
883 0 : success = 0;
884 : }
885 : }
886 : }
887 : else
888 : {
889 : // Outside of quotes, we need to escape a lot of things:
890 : // in keys: always: \ ; =
891 : // if first: " [
892 : // in section names: always: \ ; ]
893 : // if first: " [
894 : // in values: always: \ ;
895 : // if first: "
896 :
897 7390 : if (c == T_ESC || c == T_CMT || (first && c == T_OQ) || (is_key && (c == T_EQ || (first && c == T_OB))) ||
898 1372 : (is_section && (c == T_CB || (first && c == T_OB))))
899 : {
900 26 : if (fputc (T_ESC, f) == EOF || fputc (c, f) == EOF)
901 : {
902 : success = 0;
903 : }
904 : }
905 : else
906 : {
907 7364 : if (!(advance = PutUtf8Char (f, (const unsigned char *) str, str_len)))
908 : {
909 0 : success = 0;
910 : }
911 : }
912 : }
913 :
914 7678 : str += advance;
915 7678 : str_len -= advance;
916 7678 : first = 0;
917 : }
918 :
919 1201 : if (success && quote && fputc (T_CQ, f) == EOF)
920 : {
921 0 : success = 0;
922 : }
923 :
924 1201 : return success;
925 : }
926 :
927 : /* Outputs a single UTF-8 character from the string. Escapes anything that's
928 : * invalid UTF-8. Returns how many bytes made up the character.
929 : */
930 7636 : static int PutUtf8Char (FILE * restrict f, const unsigned char * restrict str, int str_len)
931 : {
932 : // check for ASCII range
933 7636 : if (str[0] < 0x80)
934 : {
935 : // escape what's polite
936 7610 : if (str[0] < 0x20 || str[0] == 0x7f)
937 : {
938 28 : if (fputc (T_ESC, f) == EOF) return 0;
939 :
940 : // see if we can make a pretty, non-hex escape
941 28 : int c = 0;
942 28 : switch (str[0])
943 : {
944 : case '\a':
945 : c = 'a';
946 : break;
947 : case '\b':
948 : c = 'b';
949 : break;
950 : case '\f':
951 : c = 'f';
952 : break;
953 : case '\n':
954 : c = 'n';
955 : break;
956 : case '\r':
957 : c = 'r';
958 : break;
959 : case '\t':
960 : c = 't';
961 : break;
962 : case '\v':
963 : c = 'v';
964 : break;
965 : }
966 :
967 28 : if (c)
968 : {
969 28 : if (fputc (c, f) == EOF) return 0;
970 : }
971 : else
972 : {
973 : // gotta do it the hard way
974 :
975 : int hd1, hd2;
976 0 : hex2ascii1 (str[0], hd1);
977 0 : hex2ascii2 (str[0], hd2);
978 :
979 0 : if (fputc (T_X, f) == EOF || fputc (hd1, f) == EOF || fputc (hd2, f) == EOF) return 0;
980 : }
981 : }
982 : else // doesn't warrant escaping
983 : {
984 7582 : if (fputc (str[0], f) == EOF) return 0;
985 : }
986 :
987 : return 1; // ASCII are one byte long
988 : }
989 :
990 : // This huge if statement for valid UTF-8 characters comes right out of The
991 : // Unicode Standard, Version 5.0 electronic edition, section 3.9, table 3-7,
992 : // page 104 <http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>. It's
993 : // also described by RFC 3629 <http://www.ietf.org/rfc/rfc3629.txt>,
994 : // in particular the ABNF grammar in section 4. This handles excluding
995 : // overlong sequences, the surrogates, and just plain bytes out of range.
996 26 : if ((str[0] >= 0xc2 && str[0] <= 0xdf && str_len >= 2 && str[1] >= 0x80 && str[1] <= 0xbf) ||
997 14 : (str[0] == 0xe0 && str_len >= 3 && str[1] >= 0xa0 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf) ||
998 14 : (str[0] >= 0xe1 && str[0] <= 0xec && str_len >= 3 && str[1] >= 0x80 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf) ||
999 14 : (str[0] == 0xed && str_len >= 3 && str[1] >= 0x80 && str[1] <= 0x9f && str[2] >= 0x80 && str[2] <= 0xbf) ||
1000 14 : (str[0] >= 0xee && str[0] <= 0xef && str_len >= 3 && str[1] >= 0x80 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf) ||
1001 0 : (str[0] == 0xf0 && str_len >= 4 && str[1] >= 0x90 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf && str[3] >= 0x80 &&
1002 14 : str[3] <= 0xbf) ||
1003 14 : (str[0] >= 0xf1 && str[0] <= 0xf3 && str_len >= 4 && str[1] >= 0x80 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf &&
1004 0 : str[3] >= 0x80 && str[3] <= 0xbf) ||
1005 0 : (str[0] == 0xf4 && str_len >= 4 && str[1] >= 0x80 && str[1] <= 0x8f && str[2] >= 0x80 && str[2] <= 0xbf && str[3] >= 0x80 &&
1006 : str[3] <= 0xbf))
1007 : {
1008 : // we've got a valid UTF-8 sequence
1009 :
1010 12 : int char_len = (str[0] < 0xe0 ? 2 : (str[0] < 0xf0 ? 3 : 4));
1011 :
1012 36 : for (int i = 0; i < char_len; ++i)
1013 : {
1014 24 : if (fputc (str[i], f) == EOF) return 0;
1015 : }
1016 :
1017 : return char_len; // let the caller know how many bytes we ate
1018 : }
1019 :
1020 : // if we got here, it's not ASCII and not valid UTF-8, so just output the
1021 : // byte escaped and call it a day
1022 :
1023 : int hd1, hd2;
1024 14 : hex2ascii1 (str[0], hd1);
1025 14 : hex2ascii2 (str[0], hd2);
1026 :
1027 14 : if (fputc (T_ESC, f) == EOF || fputc (T_X, f) == EOF || fputc (hd1, f) == EOF || fputc (hd2, f) == EOF) return 0;
1028 :
1029 : return 1; // we only processed one byte
1030 : }
|