This source file includes following definitions.
- str_tokenize_word
- str_tokenize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 #include <config.h>
34
35 #include <stdlib.h>
36 #include <string.h>
37
38 #include "lib/global.h"
39 #include "lib/util.h"
40
41 #include "lib/strutil.h"
42
43
44
45
46
47 #define WORD_DELIMITERS " \t\n;&()|<>"
48 #define QUOTE_CHARACTERS "\"'`"
49
50 #define slashify_in_quotes "\\`\"$"
51
52 #define member(c, s) ((c != '\0') ? (strchr ((s), (c)) != NULL) : FALSE)
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 static int
68 str_tokenize_word (const char *string, int start)
69 {
70 int i = start;
71 char delimiter = '\0';
72 char delimopen = '\0';
73 int nestdelim = 0;
74
75 if (member (string[i], "()\n"))
76 return (i + 1);
77
78 if (g_ascii_isdigit (string[i]))
79 {
80 int j;
81
82 for (j = i; string[j] != '\0' && g_ascii_isdigit (string[j]); j++)
83 ;
84
85 if (string[j] == '\0')
86 return j;
87
88 if (string[j] == '<' || string[j] == '>')
89 i = j;
90 else
91 {
92 i = j;
93 goto get_word;
94 }
95 }
96
97 if (member (string[i], "<>;&|"))
98 {
99 char peek = string[i + 1];
100
101 if (peek == string[i])
102 {
103 if (peek == '<' && (string[i + 2] == '-' || string[i + 2] == '<'))
104 i++;
105 return (i + 2);
106 }
107
108 if (peek == '&' && (string[i] == '>' || string[i] == '<'))
109 {
110 int j;
111
112
113 for (j = i + 2; string[j] != '\0' && g_ascii_isdigit (string[j]); j++)
114 ;
115 if (string[j] == '-')
116 j++;
117 return j;
118 }
119
120 if ((peek == '>' && string[i] == '&') || (peek == '|' && string[i] == '>'))
121 return (i + 2);
122
123
124 if (peek == '(' && (string[i] == '>' || string[i] == '<'))
125 {
126
127 i += 2;
128 delimopen = '(';
129 delimiter = ')';
130 nestdelim = 1;
131 goto get_word;
132 }
133
134 return (i + 1);
135 }
136
137 get_word:
138
139
140 if (delimiter == '\0' && member (string[i], QUOTE_CHARACTERS))
141 {
142 delimiter = string[i];
143 i++;
144 }
145
146 for (; string[i] != '\0'; i++)
147 {
148 if (string[i] == '\\' && string[i + 1] == '\n')
149 {
150 i++;
151 continue;
152 }
153
154 if (string[i] == '\\' && delimiter != '\'' &&
155 (delimiter != '"' || member (string[i], slashify_in_quotes)))
156 {
157 i++;
158 continue;
159 }
160
161
162
163 if (nestdelim != 0 && string[i] == delimopen)
164 {
165 nestdelim++;
166 continue;
167 }
168 if (nestdelim != 0 && string[i] == delimiter)
169 {
170 nestdelim--;
171 if (nestdelim == 0)
172 delimiter = '\0';
173 continue;
174 }
175
176 if (delimiter != '\0' && string[i] == delimiter)
177 {
178 delimiter = '\0';
179 continue;
180 }
181
182
183 if (nestdelim == 0 && delimiter == '\0' && member (string[i], "<>$!@?+*")
184 && string[i + 1] == '(')
185 {
186
187 i += 2;
188 delimopen = '(';
189 delimiter = ')';
190 nestdelim = 1;
191 continue;
192 }
193
194 if (delimiter == '\0' && member (string[i], WORD_DELIMITERS))
195 break;
196
197 if (delimiter == '\0' && member (string[i], QUOTE_CHARACTERS))
198 delimiter = string[i];
199 }
200
201 return i;
202 }
203
204
205
206
207
208
209
210
211
212 GPtrArray *
213 str_tokenize (const char *string)
214 {
215 GPtrArray *result = NULL;
216 int i = 0;
217
218
219
220 while (string[i] != '\0')
221 {
222 int start;
223
224
225 for (; string[i] != '\0' && whiteness (string[i]); i++)
226 ;
227
228 if (string[i] == '\0')
229 return result;
230
231 start = i;
232 i = str_tokenize_word (string, start);
233
234
235
236
237
238 if (i == start)
239 for (i++; string[i] != '\0' && member (string[i], WORD_DELIMITERS); i++)
240 ;
241
242 if (result == NULL)
243 result = g_ptr_array_new ();
244
245 g_ptr_array_add (result, g_strndup (string + start, i - start));
246 }
247
248 return result;
249 }
250
251