LCOV - code coverage report
Current view: top level - src/plugins/ni/nickel-1.1.0/src - io.c (source / functions) Hit Total Coverage
Test: coverage-filtered.info Lines: 210 274 76.6 %
Date: 2019-09-12 12:28:41 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  * Nickel - a library for hierarchical maps and .ini files
       3             :  * One of the Bohr Game Libraries (see chaoslizard.org/devel/bohr)
       4             :  * Copyright (C) 2008 Charles Lindsay.  Some rights reserved; see COPYING.
       5             :  * $Id: io.c 345 2008-01-19 17:02:54Z chaz $
       6             :  ******************************************************************************/
       7             : 
       8             : 
       9             : #include "internal.h"
      10             : #include <bohr/ds_str.h>
      11             : #include <bohr/ni.h>
      12             : 
      13             : #include <stdio.h>
      14             : #include <stdlib.h>
      15             : 
      16             : 
      17             : // Define some character classes, some of which are duplicates of ctype.h
      18             : // classes (NOTE however that we DO NOT want to use ctype.h because we do NOT
      19             : // want locale-dependent parsing):
      20             : 
      21             : // Is space: ' ' or 9-13, which are tabs, linefeeds, etc.
      22             : #define isspace(c) ((c) == ' ' || ((c) >= 9 && (c) <= 13))
      23             : 
      24             : // Is octal digit: '0'-'7'.
      25             : #define isoctal(c) ((c) >= '0' && (c) <= '7')
      26             : 
      27             : // Is digit: '0'-'9' (used only in isxdigit and ascii2hex).
      28             : #define isdigit(c) ((c) >= '0' && (c) <= '9')
      29             : 
      30             : // Hex lower case: a-f only (not a ctype.h class; used only in isxdigit and
      31             : // ascii2hex).
      32             : #define isxlower(c) ((c) >= 'a' && (c) <= 'f')
      33             : 
      34             : // Hex upper case: A-F only.
      35             : #define isxupper(c) ((c) >= 'A' && (c) <= 'F')
      36             : 
      37             : // Is hex digit: digit or a-f or A-F.
      38             : #define isxdigit(c) (isdigit (c) || isxlower (c) || isxupper (c))
      39             : 
      40             : 
      41             : // Conversions between ascii values and integer values:
      42             : 
      43             : // Returns int value of octal ascii digit.
      44             : #define ascii2oct(c) ((c) - '0')
      45             : 
      46             : // Returns int value of hex ascii char
      47             : #define ascii2hex(c) (isdigit (c) ? ((c) - '0') : (isxlower (c) ? ((c) - 'a' + 10) : ((c) - 'A' + 10)))
      48             : 
      49             : // Sets a to the ascii hex digit of the first bits of c.
      50             : #define hex2ascii1(c, a) (a = ((c) >> 4) & 0xf, a = (a < 10 ? a + '0' : a - 10 + 'a'))
      51             : 
      52             : // Sets a to the ascii hex digit of the last bits of c.
      53             : #define hex2ascii2(c, a) (a = (c) &0xf, a = (a < 10 ? a + '0' : a - 10 + 'a'))
      54             : 
      55             : 
      56             : // Tokens for parsing (defined only to make it easier to change them if
      57             : // necessary).
      58             : #define T_EOL '\n' // end of line
      59             : #define T_OB '['   // open bracket, i.e. what introduces a section name
      60             : #define T_CB ']'   // close bracket, finishes section name
      61             : #define T_EQ '='   // equal sign, switches between key and value
      62             : #define T_OQ '"'   // open quote sign, starts off a quoted value
      63             : #define T_CQ '"'   // close quote, ends a quoted value
      64             : #define T_ESC '\\' // introduces escape sequence
      65             : #define T_X 'x'    // after \, introduces a hex sequence
      66             : #define T_CMT ';'  // introduces a comment
      67             : 
      68             : 
      69             : // Converts the next char(s) into their escaped value.
      70             : static int DoEscape (file_buf * restrict fb, int * restrict out, int eol_valid);
      71             : 
      72             : // Writes a section/key name.
      73             : static int PutString (FILE * restrict f, const char * restrict str, int str_len, int is_key, int is_section);
      74             : 
      75             : // Puts a single UTF-8 character into the file.
      76             : static int PutUtf8Char (FILE * restrict f, const unsigned char * restrict str, int str_len);
      77             : 
      78             : 
      79             : /* Reads from fb until it finds the next identifier (either a section name or a
      80             :  * key of a key/value), and places the identifier name into idfr_out, and the
      81             :  * size of the buffer required to hold it into len_out.  Returns 0 if it
      82             :  * reaches the eof before it finds a valid identifier, or 1 if it found a
      83             :  * section identifier, or 2 if it found a key of a key/value pair.  May return
      84             :  * -1 on error.  idfr_out must be at least elektraNi_KEY_SIZE chars in length--this
      85             :  * function stops after that minus one, placing a NULL as the last character.
      86             :  * If this function returns 0, the contents of idfr_out and len_out may have
      87             :  * changed, or they may not've.  Note that to parse a .ini file correctly, if
      88             :  * this function returns 2, you must call GetValue() before another call to
      89             :  * GetNextIdentifier().  level_out will be filled with how many ['s were before
      90             :  * the section name, assuming the function returns 1.
      91             :  */
      92        1795 : elektraNi_PRIVATE int GetNextIdentifier (file_buf * restrict fb, char * restrict idfr_out, int * restrict len_out, int * restrict level_out)
      93             : {
      94             : // State values for the FSM.
      95             : #define ST_DONE 0        // stop parsing
      96             : #define ST_START 1      // at start of line, skipping whitespace
      97             : #define ST_COMMENT 2      // invalid character, ignore whole line
      98             : #define ST_SKIP 3        // valid line found, skip rest of line
      99             : #define ST_IN_BRACKET 4   // found [, look for section name identifier
     100             : #define ST_IN_SEC_ID 5    // found identifier after [, put it into idfr_out
     101             : #define ST_IN_Q_SEC_ID 6  // found quotes inside [
     102             : #define ST_AFTER_Q_SEC 7  // after ["" before ]
     103             : #define ST_IN_KEY_ID 8    // found key identifier as first non-space char, put it into idfr_out
     104             : #define ST_IN_Q_KEY_ID 9  // found quotes on the beginning of the line
     105             : #define ST_AFTER_Q_KEY 10 // after "" before =
     106             : 
     107        1795 :         int rc = 0; // return code, initially set to "we got nothing"
     108             : 
     109        1795 :         int len = 0;       // length of output
     110        1795 :         int graph_len = 0; // length of the string up to last graphical character (so we can skip trailing spaces)
     111        1795 :         int level = 0;     // how many ['s we catch at the beginning of this identifier
     112             :         int c;             // current character
     113             : 
     114             : // Macro to conserve space in code below--updates graph_len if the input
     115             : // character isn't whitespace.
     116             : #define chkgr(c)                                                                                                                           \
     117             :         if (!isspace (c)) graph_len = len + 1
     118             : 
     119             : // Another space-saver--checks size of existing data and puts c into out,
     120             : // incrementing len if it'll fit.
     121             : #define put(c)                                                                                                                             \
     122             :         if (len < elektraNi_KEY_SIZE - 1) idfr_out[len++] = (c)
     123             : 
     124             : // Another space-saver--resets len and graph_len to 0, i.e. erases what we
     125             : // already had in the output.
     126             : #define invalid() (len = 0, graph_len = 0)
     127             : 
     128        1795 :         int state = ST_START;    // holds current state for FSM, duh
     129       28828 :         while (state != ST_DONE) // do this until we're done
     130             :         {
     131             :                 // Get char into c; if it's eof, dip out.
     132       25341 :                 if ((c = BufGetC (fb)) == EOF) break;
     133             : 
     134       25238 :                 switch (state)
     135             :                 {
     136             :                 // What state are we in?  See defines above for description of states.
     137             : 
     138             :                 // Start state ignores whitespace, looking for [, an identifier, or an
     139             :                 // invalid character.
     140             :                 case ST_START:
     141        3145 :                         if (c == T_OB)
     142             :                         {
     143             :                                 state = ST_IN_BRACKET; // if [, go to "in bracket" state
     144             :                                 level = 1;
     145             :                         }
     146        2768 :                         else if (c == T_CMT)
     147             :                         {
     148             :                                 state = ST_COMMENT;
     149             :                         } // if ;, do comment then come back here
     150        2592 :                         else if (c == T_OQ)
     151             :                         {
     152             :                                 state = ST_IN_Q_KEY_ID;
     153             :                         } // if ", go to quoted key id
     154        2568 :                         else if (c == T_EQ)
     155             :                         {
     156             :                                 state = ST_DONE; // if =, empty key, we'll allow it
     157             :                                 rc = 2;
     158             :                         }
     159        2519 :                         else if (c == T_ESC)
     160             :                         {
     161          14 :                                 state = ST_IN_KEY_ID; // if \, let key id handle it
     162          14 :                                 BufSeekBack (fb, 1);
     163             :                         }
     164        2505 :                         else if (!isspace (c))
     165             :                         {
     166        1228 :                                 state = ST_IN_KEY_ID; // otherwise, if not a space, assume it's an identifier
     167        1228 :                                 chkgr (c);
     168        1228 :                                 put (c);
     169             :                         }
     170             :                         break;
     171             : 
     172             :                 // Comment ignores till eol, goes back to start.
     173             :                 case ST_COMMENT:
     174        5984 :                         if (c == T_EOL)
     175             :                         {
     176         176 :                                 state = ST_START; // if we hit eol, go back to start
     177             :                         }
     178             :                         break;
     179             : 
     180             :                 // Skip ignores till eol, then finishes.
     181             :                 case ST_SKIP:
     182         451 :                         if (c == T_EOL)
     183             :                         {
     184         377 :                                 state = ST_DONE; // if we hit eol, we're done
     185             :                         }
     186             :                         break;
     187             : 
     188             :                 // We found a [, look for an identifier.
     189             :                 case ST_IN_BRACKET:
     190         377 :                         if (c == T_EOL)
     191             :                         {
     192             :                                 state = ST_START;
     193             :                         } // if eol, false alarm, go back to start
     194         377 :                         else if (c == T_CMT)
     195             :                         {
     196             :                                 state = ST_COMMENT;
     197             :                         } // if ;, do comment
     198         377 :                         else if (c == T_OB)
     199             :                         {
     200           0 :                                 ++level;
     201             :                         } // if another [, just up the bracket level
     202         377 :                         else if (c == T_CB)
     203             :                         {
     204             :                                 state = ST_SKIP; // if ], it's an empty section name--we'll allow it
     205             :                                 rc = 1;
     206             :                         }
     207         318 :                         else if (c == T_OQ)
     208             :                         {
     209             :                                 state = ST_IN_Q_SEC_ID;
     210             :                         } // if ", do quoted section name
     211         318 :                         else if (c == T_ESC)
     212             :                         {
     213           0 :                                 state = ST_IN_SEC_ID; // if \, let section id handle it
     214           0 :                                 BufSeekBack (fb, 1);
     215             :                         }
     216         318 :                         else if (!isspace (c))
     217             :                         {
     218         318 :                                 state = ST_IN_SEC_ID; // otherwise, if it's not space, assume it's an identifier
     219         318 :                                 chkgr (c);
     220         318 :                                 put (c);
     221             :                         }
     222             :                         break;
     223             : 
     224             :                 // In an identifier after a [, that is, a section name.
     225             :                 case ST_IN_SEC_ID:
     226        2949 :                         if (c == T_EOL)
     227             :                         {
     228             :                                 state = ST_START; // if eol, invalidate what we had saved and start over
     229             :                                 invalid ();
     230             :                         }
     231        2949 :                         else if (c == T_CMT)
     232             :                         {
     233             :                                 state = ST_COMMENT; // if ;, it's invalid so start over
     234             :                                 invalid ();
     235             :                         }
     236        2949 :                         else if (c == T_CB)
     237             :                         {
     238             :                                 state = ST_SKIP; // if ], it was valid, so set rc and ignore till eol
     239             :                                 rc = 1;
     240             :                         }
     241             :                         else
     242             :                         {
     243        2631 :                                 chkgr (c); // otherwise, if it's an escape sequence
     244        2631 :                                 if (c == T_ESC)
     245             :                                 {
     246           0 :                                         DoEscape (fb, &c, 0); // translate it
     247             :                                 }
     248        2631 :                                 put (c);
     249             :                         } // and either way save it
     250             :                         break;
     251             : 
     252             :                 // In an identifier in quotes in a [, a quoted section name.
     253             :                 case ST_IN_Q_SEC_ID:
     254           0 :                         if (c == T_CQ)
     255             :                         {
     256             :                                 state = ST_AFTER_Q_SEC;
     257             :                         } // if we found close quote, go to after quote logic
     258             :                         else
     259             :                         {
     260           0 :                                 if (c == T_ESC)
     261             :                                 {                             // otherwise, if it's an escape sequence
     262           0 :                                         DoEscape (fb, &c, 0); // translate it
     263             :                                 }
     264           0 :                                 put (c);
     265             :                         } // and either way put it in output
     266             :                         break;
     267             : 
     268             :                 // After ["something", looking for ].
     269             :                 case ST_AFTER_Q_SEC:
     270           0 :                         if (c == T_EOL)
     271             :                         {
     272             :                                 state = ST_START; // if eol, it was bullshit, start over
     273             :                                 invalid ();
     274             :                         }
     275           0 :                         else if (c == T_OQ)
     276             :                         {
     277             :                                 state = ST_IN_Q_SEC_ID;
     278             :                         } // if we found another open quote, keep going
     279           0 :                         else if (c == T_CB)
     280             :                         {
     281             :                                 state = ST_SKIP; // if ], skip remainder of line (no trim spaces) and return ok
     282             :                                 rc = 1;
     283             :                                 graph_len = elektraNi_KEY_SIZE - 1;
     284             :                         }
     285           0 :                         else if (!isspace (c))
     286             :                         {
     287           0 :                                 state = ST_COMMENT; // if any other char, skip rest of line, start over
     288           0 :                                 invalid ();
     289             :                         }
     290             :                         break;
     291             : 
     292             :                 // In an identifier as first thing on line, that is, a key name.
     293             :                 case ST_IN_KEY_ID:
     294       12116 :                         if (c == T_EOL)
     295             :                         {
     296             :                                 state = ST_START; // if eol, invalidate and start over
     297             :                                 invalid ();
     298             :                         }
     299       12116 :                         else if (c == T_CMT)
     300             :                         {
     301             :                                 state = ST_COMMENT; // if ;, invalidate and start over
     302             :                                 invalid ();
     303             :                         }
     304       12116 :                         else if (c == T_EQ)
     305             :                         {
     306             :                                 state = ST_DONE; // if =, stop here and set rc to indicate value comes next
     307             :                                 rc = 2;
     308             :                         }
     309             :                         else
     310             :                         {
     311       10874 :                                 chkgr (c); // otherwise, if it's an escape sequence
     312       10874 :                                 if (c == T_ESC)
     313             :                                 {
     314          38 :                                         DoEscape (fb, &c, 0); // translate that
     315             :                                 }
     316       10874 :                                 put (c);
     317             :                         } // either way, save it
     318             :                         break;
     319             : 
     320             :                 // In quotes at the beginning of the line, potentially a quoted key name.
     321             :                 case ST_IN_Q_KEY_ID:
     322         168 :                         if (c == T_CQ)
     323             :                         {
     324             :                                 state = ST_AFTER_Q_KEY;
     325             :                         } // if close quote, go to after quote logic
     326             :                         else
     327             :                         {
     328         144 :                                 if (c == T_ESC)
     329             :                                 {                             // otherwise, if escape sequence
     330           8 :                                         DoEscape (fb, &c, 0); // translate it
     331             :                                 }
     332         144 :                                 put (c);
     333             :                         } // either way, put it into output
     334             :                         break;
     335             : 
     336             :                 // After "something", looking for =.
     337             :                 case ST_AFTER_Q_KEY:
     338          48 :                         if (c == T_EOL)
     339             :                         {
     340             :                                 state = ST_START; // if eol, invalidate and start over
     341             :                                 invalid ();
     342             :                         }
     343          48 :                         else if (c == T_OQ)
     344             :                         {
     345             :                                 state = ST_IN_Q_KEY_ID;
     346             :                         } // if another open quote, keep going
     347          48 :                         else if (c == T_EQ)
     348             :                         {
     349             :                                 state = ST_DONE; // if =, we're GOOD and done (and don't strip spaces)
     350             :                                 rc = 2;
     351             :                                 graph_len = elektraNi_KEY_SIZE - 1;
     352             :                         }
     353          24 :                         else if (!isspace (c))
     354             :                         {
     355           0 :                                 state = ST_COMMENT; // if any other char, invalidate and start over
     356           0 :                                 invalid ();
     357             :                         }
     358             :                         break;
     359             : 
     360             :                 // This should never happen.
     361             :                 default:
     362             :                         rc = -1;         // so set rc to error
     363             :                         state = ST_DONE; // and stop in our tracks
     364             :                         break;
     365             :                 }
     366             :         }
     367             : 
     368             :         // Trim the length down if it was longer than the last graphical character.
     369        1795 :         if (graph_len < len)
     370             :         {
     371        1135 :                 len = graph_len;
     372             :         }
     373             : 
     374        1795 :         idfr_out[len] = '\0'; // null-terminate the output
     375             : 
     376        1795 :         if (level_out)
     377             :         {
     378        1795 :                 *level_out = level; // set level_out if it wasn't NULL
     379             :         }
     380        1795 :         if (len_out)
     381             :         {
     382        1795 :                 *len_out = len; // set len_out if it wasn't NULL
     383             :         }
     384             : 
     385             :         // Flush the buffer, since we'll never need anything in it again.
     386        1795 :         BufFlush (fb);
     387             : 
     388        1795 :         return rc;
     389             : 
     390             : // We don't need these to be defined anymore.
     391             : #undef ST_DONE
     392             : #undef ST_START
     393             : #undef ST_COMMENT
     394             : #undef ST_SKIP
     395             : #undef ST_IN_BRACKET
     396             : #undef ST_IN_SEC_ID
     397             : #undef ST_IN_Q_SEC_ID
     398             : #undef ST_AFTER_Q_SEC
     399             : #undef ST_IN_KEY_ID
     400             : #undef ST_IN_Q_KEY_ID
     401             : #undef ST_AFTER_Q_KEY
     402             : #undef chkgr
     403             : #undef put
     404             : #undef invalid
     405             : }
     406             : 
     407             : /* Parses a value of a key/value pair in the .ini file.  Must be called only
     408             :  * after GetNextIdentifier() returns 2, and it must be called then.  Returns 0
     409             :  * on error, or 1 if ok.  Puts the value into value_out.  Erases anything that
     410             :  * was in value_out before.
     411             :  */
     412        1315 : elektraNi_PRIVATE int GetValue (file_buf * restrict fb, Ds_str * restrict value_out)
     413             : {
     414             : // State values for the FSM.
     415             : #define ST_DONE 0    // done parsing
     416             : #define ST_START 1   // at the start of a value, or on a new line of a continued value
     417             : #define ST_IGNORE 2  // ignoring till eol
     418             : #define ST_IN_Q 3    // inside the quotes of a quoted value, saving to output
     419             : #define ST_AFTER_Q 4 // after the end quote of quoted value, ignoring things (mostly)
     420             : #define ST_IN_U 5    // inside unquoted value, saving to output
     421             : 
     422        1315 :         int rc = 1; // return code--default to ok
     423             : 
     424        1315 :         int graph_len = 0; // length of string up to last graphical char
     425             :         int c;             // current character
     426             : 
     427        1315 :         int state = ST_START; // that state
     428             : 
     429             : 
     430             : // Macro to conserve space in code below--updates graph_len if the input
     431             : // character isn't whitespace.
     432             : #define chkgr(c)                                                                                                                           \
     433             :         if (!isspace (c)) graph_len = value_out->len + 1
     434             : 
     435             : // Macro to conserve space below--puts a char into value_out, dips out if
     436             : // error.
     437             : #define put(c)                                                                                                                             \
     438             :         do                                                                                                                                 \
     439             :         {                                                                                                                                  \
     440             :                 if (value_out->len + 1 > value_out->size               /* check for space */                                              \
     441             :                     && !Ds_ResizeStr (value_out, value_out->size << 1)) /* grow if necessary */                                            \
     442             :                 {                                                                                                                          \
     443             :                         state = ST_DONE;                                                                                                   \
     444             :                         rc = 0;                                                                                                            \
     445             :                         break;                                                                                                             \
     446             :                 }                                       /* quit everything if error */                                                     \
     447             :                 value_out->str[value_out->len++] = (c); /* else set next char */                                                           \
     448             :         } while (0)
     449             : 
     450             : // Space-conserving macro--sets the state to the start value and sets
     451             : // graph_len to be the current length, so we don't go overboard getting rid
     452             : // of spaces.
     453             : #define cont() (state = ST_START, graph_len = value_out->len)
     454             : 
     455             : // Yet another--moves strlen back to the size of up to the last non-space
     456             : // character.
     457             : #define strip()                                                                                                                            \
     458             :         if (graph_len < value_out->len) value_out->len = graph_len
     459             : 
     460             : 
     461        1315 :         value_out->len = 0; // set length to 0
     462             : 
     463       11112 :         while (state != ST_DONE) // until we decide to stop
     464             :         {
     465             :                 // Get next char; dip out (successfully) if EOF.
     466        8484 :                 if ((c = BufGetC (fb)) == EOF) break;
     467             : 
     468        8482 :                 switch (state)
     469             :                 {
     470             :                 // What state are we in?  See defines above for what these mean.
     471             : 
     472             :                 // At the start of a value, or beginning of continued line.
     473             :                 case ST_START:
     474        2521 :                         if (c == T_EOL)
     475             :                         {
     476             :                                 state = ST_DONE;
     477             :                         } // if eol or eof, it's valid even if we have nothing
     478        2132 :                         else if (c == T_CMT)
     479             :                         {
     480             :                                 state = ST_IGNORE;
     481             :                         } // if ;, ignore the whole thing
     482        2132 :                         else if (c == T_OQ)
     483             :                         {
     484             :                                 state = ST_IN_Q;
     485             :                         } // if ", go to quoted value
     486        2100 :                         else if (c == T_ESC)
     487             :                         {
     488          10 :                                 state = ST_IN_U; // if \, do unquoted value, put \ back so no duplicated code
     489          10 :                                 BufSeekBack (fb, 1);
     490             :                         }
     491        2090 :                         else if (!isspace (c))
     492             :                         {
     493         884 :                                 state = ST_IN_U; // other non-ws chars, save and go to unquoted value
     494         884 :                                 chkgr (c);
     495         884 :                                 put (c);
     496             :                         }
     497             :                         break;
     498             : 
     499             :                 // Ignoring till end of line--rc should have been set to valid before
     500             :                 // going to this state if it is indeed valid.
     501             :                 case ST_IGNORE:
     502           0 :                         if (c == T_EOL)
     503             :                         {
     504           0 :                                 state = ST_DONE;
     505             :                         } // if eol/eof, we done an' shit
     506             :                         break;
     507             : 
     508             :                 // In quoted value.
     509             :                 case ST_IN_Q:
     510         176 :                         if (c == T_CQ)
     511             :                         {
     512             :                                 state = ST_AFTER_Q;
     513             :                         } // if end ", do after quotes deals
     514             :                         else
     515             :                         {
     516         144 :                                 if (c == T_ESC)               // otherwise, look for escape start
     517           8 :                                         DoEscape (fb, &c, 0); // if escape sequence, get the escaped value instead
     518         144 :                                 put (c);
     519             :                         } // output the maybe-escaped char
     520             :                         break;
     521             : 
     522             :                 // After end quote, looking for \ or more ""s.
     523             :                 case ST_AFTER_Q:
     524          32 :                         if (c == T_EOL)
     525             :                         {
     526             :                                 state = ST_DONE;
     527             :                         } // if eof/eol, we're done
     528           0 :                         else if (c == T_OQ)
     529             :                         {
     530             :                                 state = ST_IN_Q;
     531             :                         } // if another ", keep parsing
     532             :                         else
     533             :                         {
     534           0 :                                 if (c == T_ESC // if \, look for eol
     535           0 :                                     && DoEscape (fb, NULL, 1))
     536             :                                 {
     537           0 :                                         cont ();
     538             :                                 }
     539           0 :                                 else if (!isspace (c))
     540             :                                 {
     541           0 :                                         state = ST_IGNORE;
     542             :                                 }
     543             :                         }
     544             :                         break;
     545             : 
     546             :                 // In unquoted value.
     547             :                 case ST_IN_U:
     548        5753 :                         if (c == T_EOL)
     549             :                         {
     550         892 :                                 state = ST_DONE; // if eof or eol, strip trailing space, we done
     551         892 :                                 strip ();
     552             :                         }
     553        4861 :                         else if (c == T_CMT)
     554             :                         {
     555           0 :                                 state = ST_IGNORE; // if ;, ignore till eol and we done
     556           0 :                                 strip ();
     557             :                         }
     558             :                         else
     559             :                         {
     560        4861 :                                 if (c == T_ESC) // otherwise, if escaping
     561             :                                 {
     562          31 :                                         if (DoEscape (fb, &c, 1)) // if it's the line continue
     563             :                                         {
     564           0 :                                                 strip ();
     565           0 :                                                 cont ();
     566             :                                         } // strip and continue
     567             :                                         else
     568             :                                         {
     569          31 :                                                 chkgr (T_ESC);
     570             :                                         }
     571             :                                 } // if not line continue, it was graphical
     572             :                                 else
     573             :                                 {
     574        4830 :                                         chkgr (c);
     575             :                                 } // if not escaping, check whether it was graphical
     576        4861 :                                 put (c);
     577             :                         } // and regardless, put something in the output
     578             :                         break;
     579             : 
     580             :                 // This should never happen.
     581             :                 default:
     582             :                         rc = 0;
     583             :                         state = ST_DONE;
     584             :                         break;
     585             :                 }
     586             :         }
     587             : 
     588        1315 :         if (rc)
     589             :         {
     590             :                 // Null-terminate if no error.
     591             : 
     592        1315 :                 put ('\0'); // this might set rc to 0
     593             : 
     594             :                 // put always adds to strlen, but we don't want that NULL in there
     595        1315 :                 if (rc) value_out->len--;
     596             :         }
     597             : 
     598             :         // Flush the buffer, since we'll never need anything in it again.
     599        1315 :         BufFlush (fb);
     600             : 
     601        1315 :         return rc;
     602             : 
     603             : #undef ST_DONE
     604             : #undef ST_START
     605             : #undef ST_IGNORE
     606             : #undef ST_IN_Q
     607             : #undef ST_AFTER_Q
     608             : #undef ST_IN_U
     609             : #undef chkgr
     610             : #undef put
     611             : #undef cont
     612             : #undef strip
     613             : }
     614             : 
     615             : /* Puts the section name into the file, surrounded by brackets.  Returns
     616             :  * nonzero on success, 0 on failure.  May have written only part of the string
     617             :  * to f if it fails.
     618             :  */
     619         159 : elektraNi_PRIVATE int PutSection (FILE * restrict f, const char * restrict name, int name_len, int level)
     620             : {
     621             :         int i;
     622         159 :         int success = 0;
     623             : 
     624             :         do
     625             :         {
     626         159 :                 if (fputc (T_EOL, f) == EOF) // put an initial eol
     627             :                         break;
     628             : 
     629           2 :                 for (i = 0; i < level - 1; ++i) // put initial spaces
     630             :                 {
     631           2 :                         if (fputc (' ', f) == EOF) break;
     632             :                 }
     633         159 :                 if (i < level - 1) break;
     634             : 
     635         161 :                 for (i = 0; i < level; ++i)
     636             :                 {
     637         161 :                         if (fputc (T_OB, f) == EOF) // put as many ['s as level indicates
     638             :                                 break;
     639             :                 }
     640         159 :                 if (i < level) break;
     641             : 
     642         159 :                 if (!PutString (f, name, name_len, 0, 1)) // put section name
     643             :                         break;
     644             : 
     645         161 :                 for (i = 0; i < level; ++i)
     646             :                 {
     647         161 :                         if (fputc (T_CB, f) == EOF) // put as many ]'s as level indicates
     648             :                                 break;
     649             :                 }
     650         159 :                 if (i < level || fputc (T_EOL, f) == EOF) // put eol
     651             :                         break;
     652             : 
     653         159 :                 success = 1;
     654             :         } while (0);
     655             : 
     656         159 :         return success;
     657             : }
     658             : 
     659             : /* Puts the key/value pair into the file, separated by an =.  Returns nonzero
     660             :  * on success, 0 on failure.  May have written only part of the string to f if
     661             :  * it fails.
     662             :  */
     663         521 : elektraNi_PRIVATE int PutEntry (FILE * restrict f, const char * restrict key, int key_len, const char * restrict value, int value_len,
     664             :                                 int level)
     665             : {
     666             :         int i;
     667         521 :         int success = 0;
     668             : 
     669             :         do
     670             :         {
     671         817 :                 for (i = 0; i < level - 1; ++i) // initial spaces
     672             :                 {
     673         296 :                         if (fputc (' ', f) == EOF) break;
     674             :                 }
     675         521 :                 if (i < level - 1) break;
     676             : 
     677         521 :                 if (!PutString (f, key, key_len, 1, 0)) // key
     678             :                         break;
     679             : 
     680         521 :                 if (fputc (' ', f) == EOF     // space
     681         521 :                     || fputc (T_EQ, f) == EOF //=
     682         521 :                     || fputc (' ', f) == EOF) // space
     683             :                         break;
     684             : 
     685         521 :                 if (!PutString (f, value, value_len, 0, 0)) // value
     686             :                         break;
     687             : 
     688         521 :                 if (fputc (T_EOL, f) == EOF) // eol
     689             :                         break;
     690             : 
     691         521 :                 success = 1;
     692             :         } while (0);
     693             : 
     694         521 :         return success;
     695             : }
     696             : 
     697             : /* Internal to GetNextIdentifier() and GetValue()--assumes fb is on the
     698             :  * character AFTER a \ in an identifier/value.  Parses the next characters for
     699             :  * a valid escape sequence, returning the result in out, using a '\\' if it
     700             :  * wasn't valid.  GetNextIdentifier() and GetValue() put this character into
     701             :  * the output.  If eol_valid is nonzero, the function will accept \<ws>\n (the
     702             :  * line-continue escape) as a valid escape sequence, replacing it with a single
     703             :  * space.  Returns 1/0 indicating whether the line-continue escape sequence is
     704             :  * what was just parsed (thus, can only return 1 if eol_valid is 1).  Positions
     705             :  * fb so the next character will be the first character after the (maybe
     706             :  * invalid) escape sequence.  Either way, putting *out then the next characters
     707             :  * in fb into the output will result in the correct sequence.
     708             :  */
     709          85 : static int DoEscape (file_buf * restrict fb, int * restrict out, int eol_valid)
     710             : {
     711             :         int c;             // current character
     712          85 :         int esc = -1;      // value of escape sequence
     713          85 :         int line_cont = 0; // whether the line-continue escape is what we just parsed
     714             : 
     715          85 :         switch (c = BufGetC (fb))
     716             :         {
     717             : 
     718             :         // Normal escapes--put them in esc.
     719             :         case 'a':
     720             :                 esc = '\a';
     721             :                 break;
     722             :         case 'b':
     723           4 :                 esc = '\b';
     724           4 :                 break;
     725             :         case 'f':
     726           4 :                 esc = '\f';
     727           4 :                 break;
     728             :         case 'n':
     729           4 :                 esc = '\n';
     730           4 :                 break;
     731             :         case 'r':
     732           4 :                 esc = '\r';
     733           4 :                 break;
     734             :         case 't':
     735           4 :                 esc = '\t';
     736           4 :                 break;
     737             :         case 'v':
     738           4 :                 esc = '\v';
     739           4 :                 break;
     740             : 
     741             :         // These are the same after translation.
     742             :         case '\'':
     743             :         case '?':
     744             :         case T_ESC:
     745             :         case T_OQ:
     746             : #if (T_OQ != T_CQ)
     747             :         case T_CQ:
     748             : #endif
     749             :         case T_CMT:
     750             :         case T_OB:
     751             :         case T_CB:
     752             :         case T_EQ:
     753          42 :                 esc = c;
     754          42 :                 break;
     755             : 
     756             :         // Hex escape.  Look for hex chars.
     757             :         case T_X:
     758          14 :                 c = BufGetC (fb);  // get next char
     759          14 :                 if (!isxdigit (c)) // if it's NOT hex
     760             :                 {
     761           0 :                         BufSeekBack (fb, 1); // put it back
     762           0 :                         break;
     763             :                 }
     764          14 :                 esc = ascii2hex (c); // otherwise, save hex digit value
     765          14 :                 c = BufGetC (fb);    // and get next char
     766          14 :                 if (!isxdigit (c))   // if it's not a hex char
     767             :                 {
     768           0 :                         BufSeekBack (fb, 1); // just go back one so it'll come out next
     769           0 :                         break;
     770             :                 }
     771          14 :                 esc <<= 4;            // otherwise, shift previous char over by 4
     772          14 :                 esc += ascii2hex (c); // and add this char's value
     773          14 :                 break;
     774             : 
     775             :         // Might be an octal escape or a line-continue escape.
     776             :         default:
     777           1 :                 if (isoctal (c)) // if we've got an octal char
     778             :                 {
     779           0 :                         esc = ascii2oct (c); // get its int value
     780           0 :                         c = BufGetC (fb);    // look at next character
     781           0 :                         if (!isoctal (c))    // if not octal
     782             :                         {
     783           0 :                                 BufSeekBack (fb, 1); // put it back, dip out
     784           0 :                                 break;
     785             :                         }
     786           0 :                         esc <<= 3;            // if it is octal, shift previous value over 3
     787           0 :                         esc += ascii2oct (c); // and add it
     788           0 :                         c = BufGetC (fb);     // look at third character
     789           0 :                         if (!isoctal (c))     // and do the exact same thing
     790             :                         {
     791           0 :                                 BufSeekBack (fb, 1);
     792           0 :                                 break;
     793             :                         }
     794           0 :                         esc <<= 3;
     795           0 :                         esc += ascii2oct (c);
     796           0 :                         esc &= 0xff;
     797             :                 } // or, if we should parse for line-contine escape
     798           1 :                 else if (eol_valid && (c == EOF || isspace (c)))
     799             :                 {
     800             :                         size_t n = 0;    // how many chars we've gone past initial space
     801             :                         int comment = 0; // whether we found a comment
     802             : 
     803             :                         while (1)
     804             :                         {
     805           0 :                                 if (c == T_CMT) // if we found a comment
     806           0 :                                         comment = 1;
     807             : 
     808             :                                 // if we're done or char is invalid
     809           0 :                                 if (c == T_EOL || c == EOF || (!comment && !isspace (c))) break;
     810             : 
     811           0 :                                 c = BufGetC (fb); // get next char
     812           0 :                                 ++n;              // we've gone one farther
     813             :                         }
     814           0 :                         if (c != T_EOL) // if we stopped because of a non-space character or eof
     815             :                         {
     816           0 :                                 BufSeekBack (fb, n); // invalid, so go back however many chars we just went forward
     817           0 :                                 break;               // dip out
     818             :                         }
     819             :                         esc = ' ';     // otherwise, it's valid, so replace it with a single space
     820             :                         line_cont = 1; // set our return value to true
     821             :                 }
     822             :                 break;
     823             :         }
     824             : 
     825             :         // If we didn't get a valid sequence, we gotta put back the backslash.
     826          85 :         if (esc < 0)
     827             :         {
     828           1 :                 esc = T_ESC;     // set it
     829           1 :                 BufSeekBack (fb, 1); // and go back so we haven't gotten any other chars after backslash
     830             :         }
     831          85 :         if (out)
     832             :         { // and set *out if we can
     833          85 :                 *out = esc;
     834             :         }
     835             : 
     836          85 :         return line_cont; // return whether it was a line continuation escape
     837             : }
     838             : 
     839             : /* Outputs a string, surrounding it in quotes if necessary, and escaping
     840             :  * everything that needs it as it goes.
     841             :  */
     842        1201 : static int PutString (FILE * restrict f, const char * restrict str, int str_len, int is_key, int is_section)
     843             : {
     844        1201 :         int quote = 0;   // whether to quote the string
     845        1201 :         int success = 1; // return value
     846        1201 :         int first = 1;   // whether we're processing the first character
     847             :         int advance;     // how many bytes to advance
     848             :         int c;
     849             : 
     850        1201 :         if (str_len > 0)
     851             :         {
     852         986 :                 c = *(str + str_len - 1); // set c to last character in string
     853         986 :                 if (*str == ' ' || c == ' ')
     854             :                 {                  // if initial or trailing spaces (\t etc. are
     855          48 :                         quote = 1; // always escaped, so we just care about ' ')
     856             :                 }
     857             :         }
     858             : 
     859        1201 :         if (quote && fputc (T_OQ, f) == EOF)
     860             :         {
     861           0 :                 success = 0;
     862             :         }
     863             : 
     864        8879 :         while (success && str_len > 0)
     865             :         {
     866        7678 :                 c = *str;
     867        7678 :                 advance = 1;
     868             : 
     869        7678 :                 if (quote)
     870             :                 {
     871             :                         // In quotes, we just need to escape \ and "
     872         288 :                         if (c == T_ESC || c == T_CQ)
     873             :                         {
     874          16 :                                 if (fputc (T_ESC, f) == EOF || fputc (c, f) == EOF)
     875             :                                 {
     876             :                                         success = 0;
     877             :                                 }
     878             :                         }
     879             :                         else
     880             :                         {
     881         272 :                                 if (!(advance = PutUtf8Char (f, (const unsigned char *) str, str_len)))
     882             :                                 {
     883           0 :                                         success = 0;
     884             :                                 }
     885             :                         }
     886             :                 }
     887             :                 else
     888             :                 {
     889             :                         // Outside of quotes, we need to escape a lot of things:
     890             :                         // in keys: always:   \ ; =
     891             :                         //         if first: " [
     892             :                         // in section names: always:   \ ; ]
     893             :                         //                  if first: " [
     894             :                         // in values: always:   \ ;
     895             :                         //           if first: "
     896             : 
     897        7390 :                         if (c == T_ESC || c == T_CMT || (first && c == T_OQ) || (is_key && (c == T_EQ || (first && c == T_OB))) ||
     898        1372 :                             (is_section && (c == T_CB || (first && c == T_OB))))
     899             :                         {
     900          26 :                                 if (fputc (T_ESC, f) == EOF || fputc (c, f) == EOF)
     901             :                                 {
     902             :                                         success = 0;
     903             :                                 }
     904             :                         }
     905             :                         else
     906             :                         {
     907        7364 :                                 if (!(advance = PutUtf8Char (f, (const unsigned char *) str, str_len)))
     908             :                                 {
     909           0 :                                         success = 0;
     910             :                                 }
     911             :                         }
     912             :                 }
     913             : 
     914        7678 :                 str += advance;
     915        7678 :                 str_len -= advance;
     916        7678 :                 first = 0;
     917             :         }
     918             : 
     919        1201 :         if (success && quote && fputc (T_CQ, f) == EOF)
     920             :         {
     921           0 :                 success = 0;
     922             :         }
     923             : 
     924        1201 :         return success;
     925             : }
     926             : 
     927             : /* Outputs a single UTF-8 character from the string.  Escapes anything that's
     928             :  * invalid UTF-8.  Returns how many bytes made up the character.
     929             :  */
     930        7636 : static int PutUtf8Char (FILE * restrict f, const unsigned char * restrict str, int str_len)
     931             : {
     932             :         // check for ASCII range
     933        7636 :         if (str[0] < 0x80)
     934             :         {
     935             :                 // escape what's polite
     936        7610 :                 if (str[0] < 0x20 || str[0] == 0x7f)
     937             :                 {
     938          28 :                         if (fputc (T_ESC, f) == EOF) return 0;
     939             : 
     940             :                         // see if we can make a pretty, non-hex escape
     941          28 :                         int c = 0;
     942          28 :                         switch (str[0])
     943             :                         {
     944             :                         case '\a':
     945             :                                 c = 'a';
     946             :                                 break;
     947             :                         case '\b':
     948             :                                 c = 'b';
     949             :                                 break;
     950             :                         case '\f':
     951             :                                 c = 'f';
     952             :                                 break;
     953             :                         case '\n':
     954             :                                 c = 'n';
     955             :                                 break;
     956             :                         case '\r':
     957             :                                 c = 'r';
     958             :                                 break;
     959             :                         case '\t':
     960             :                                 c = 't';
     961             :                                 break;
     962             :                         case '\v':
     963             :                                 c = 'v';
     964             :                                 break;
     965             :                         }
     966             : 
     967          28 :                         if (c)
     968             :                         {
     969          28 :                                 if (fputc (c, f) == EOF) return 0;
     970             :                         }
     971             :                         else
     972             :                         {
     973             :                                 // gotta do it the hard way
     974             : 
     975             :                                 int hd1, hd2;
     976           0 :                                 hex2ascii1 (str[0], hd1);
     977           0 :                                 hex2ascii2 (str[0], hd2);
     978             : 
     979           0 :                                 if (fputc (T_X, f) == EOF || fputc (hd1, f) == EOF || fputc (hd2, f) == EOF) return 0;
     980             :                         }
     981             :                 }
     982             :                 else // doesn't warrant escaping
     983             :                 {
     984        7582 :                         if (fputc (str[0], f) == EOF) return 0;
     985             :                 }
     986             : 
     987             :                 return 1; // ASCII are one byte long
     988             :         }
     989             : 
     990             :         // This huge if statement for valid UTF-8 characters comes right out of The
     991             :         // Unicode Standard, Version 5.0 electronic edition, section 3.9, table 3-7,
     992             :         // page 104 <http://www.unicode.org/versions/Unicode5.0.0/ch03.pdf>.  It's
     993             :         // also described by RFC 3629 <http://www.ietf.org/rfc/rfc3629.txt>,
     994             :         // in particular the ABNF grammar in section 4.  This handles excluding
     995             :         // overlong sequences, the surrogates, and just plain bytes out of range.
     996          26 :         if ((str[0] >= 0xc2 && str[0] <= 0xdf && str_len >= 2 && str[1] >= 0x80 && str[1] <= 0xbf) ||
     997          14 :             (str[0] == 0xe0 && str_len >= 3 && str[1] >= 0xa0 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf) ||
     998          14 :             (str[0] >= 0xe1 && str[0] <= 0xec && str_len >= 3 && str[1] >= 0x80 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf) ||
     999          14 :             (str[0] == 0xed && str_len >= 3 && str[1] >= 0x80 && str[1] <= 0x9f && str[2] >= 0x80 && str[2] <= 0xbf) ||
    1000          14 :             (str[0] >= 0xee && str[0] <= 0xef && str_len >= 3 && str[1] >= 0x80 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf) ||
    1001           0 :             (str[0] == 0xf0 && str_len >= 4 && str[1] >= 0x90 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf && str[3] >= 0x80 &&
    1002          14 :              str[3] <= 0xbf) ||
    1003          14 :             (str[0] >= 0xf1 && str[0] <= 0xf3 && str_len >= 4 && str[1] >= 0x80 && str[1] <= 0xbf && str[2] >= 0x80 && str[2] <= 0xbf &&
    1004           0 :              str[3] >= 0x80 && str[3] <= 0xbf) ||
    1005           0 :             (str[0] == 0xf4 && str_len >= 4 && str[1] >= 0x80 && str[1] <= 0x8f && str[2] >= 0x80 && str[2] <= 0xbf && str[3] >= 0x80 &&
    1006             :              str[3] <= 0xbf))
    1007             :         {
    1008             :                 // we've got a valid UTF-8 sequence
    1009             : 
    1010          12 :                 int char_len = (str[0] < 0xe0 ? 2 : (str[0] < 0xf0 ? 3 : 4));
    1011             : 
    1012          36 :                 for (int i = 0; i < char_len; ++i)
    1013             :                 {
    1014          24 :                         if (fputc (str[i], f) == EOF) return 0;
    1015             :                 }
    1016             : 
    1017             :                 return char_len; // let the caller know how many bytes we ate
    1018             :         }
    1019             : 
    1020             :         // if we got here, it's not ASCII and not valid UTF-8, so just output the
    1021             :         // byte escaped and call it a day
    1022             : 
    1023             :         int hd1, hd2;
    1024          14 :         hex2ascii1 (str[0], hd1);
    1025          14 :         hex2ascii2 (str[0], hd2);
    1026             : 
    1027          14 :         if (fputc (T_ESC, f) == EOF || fputc (T_X, f) == EOF || fputc (hd1, f) == EOF || fputc (hd2, f) == EOF) return 0;
    1028             : 
    1029             :         return 1; // we only processed one byte
    1030             : }

Generated by: LCOV version 1.13