blob: ad0bdc042ed146441ab3139e93bf3e036c006feb [file] [log] [blame]
anthony1cdc5b72012-03-03 02:31:18 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% SSS CCC RRRR III PPPP TTTTT TTTTT OOO K K EEEE N N %
6% S C R R I P P T T O O K K E NN N %
7% SSS C RRRR I PPPP T T O O KK EEE N N N %
8% S C R R I P T T O O K K E N NN %
9% SSSS CCC R RR III P T T OOO K K EEEE N N %
10% %
11% Perform "Magick" on Images via the Command Line Interface %
12% %
13% Dragon Computing %
14% Anthony Thyssen %
15% January 2012 %
16% %
17% %
18% Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization %
19% dedicated to making software imaging solutions freely available. %
20% %
21% You may not use this file except in compliance with the License. You may %
22% obtain a copy of the License at %
23% %
24% http://www.imagemagick.org/script/license.php %
25% %
26% Unless required by applicable law or agreed to in writing, software %
27% distributed under the License is distributed on an "AS IS" BASIS, %
28% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
29% See the License for the specific language governing permissions and %
30% limitations under the License. %
31% %
32%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
33%
34% Read a stream of characters and return tokens one at a time
35%
36*/
37
38/*
39 Include declarations.
40*/
41#ifndef SCRIPT_TOKEN_TESTING
42#include "MagickWand/studio.h"
43#include "MagickCore/memory_.h"
44#include "MagickCore/string-private.h"
45#include "MagickWand/script-token.h"
46#endif
47
48/*
49%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
50% %
51% %
52% %
53% A c q u i r e S c r i p t T o k e n I n f o %
54% %
55% %
56% %
57%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
58%
59% AcquireScriptTokenInfo() allocated, initializes and opens the given
60% file stream from which tokens are to be extracted.
61%
62% The format of the AcquireScriptTokenInfo method is:
63%
64% ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
65%
66% A description of each parameter follows:
67%
68% o filename the filename to open ("-" means stdin)
69%
70*/
71WandExport ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
72{
73 ScriptTokenInfo
74 *token_info;
75
76 token_info=(ScriptTokenInfo *) AcquireMagickMemory(sizeof(*token_info));
77 if (token_info == (ScriptTokenInfo *) NULL)
78 return token_info;
79 (void) ResetMagickMemory(token_info,0,sizeof(*token_info));
80
81 token_info->opened=MagickFalse;
82 if ( LocaleCompare(filename,"-") == 0 ) {
83 token_info->stream=stdin;
84 token_info->opened=MagickFalse;
85 }
86#if 0 /* FUTURE POSIBILITIES */
87 else if ( LocaleNCompare(filename,"fd:",3) == 0 ) {
88 token_info->stream=fdopen(StringToLong(filename+3),"r");
89 token_info->opened=MagickFalse;
90 }
91#endif
92 else {
93 token_info->stream=fopen(filename, "r");
94 token_info->opened=MagickTrue;
95 }
96 if ( token_info->stream == (FILE *)NULL ) {
97 token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
98 return(token_info);
99 }
100
101 token_info->curr_line=1;
102 token_info->length=INITAL_TOKEN_LENGTH;
103 token_info->token=(char *) AcquireMagickMemory(token_info->length);
104
105 token_info->status=(token_info->token != (char *)NULL)
106 ? TokenStatusOK : TokenStatusMemoryFailed;
107 token_info->signature=WandSignature;
108
109 return token_info;
110}
111
112/*
113%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
114% %
115% %
116% %
117% D e s t r o y S c r i p t T o k e n I n f o %
118% %
119% %
120% %
121%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
122%
123% DestroyScriptTokenInfo() allocated, initializes and opens the given
124% file stream from which tokens are to be extracted.
125%
126% The format of the DestroyScriptTokenInfo method is:
127%
128% ScriptTokenInfo *DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
129%
130% A description of each parameter follows:
131%
132% o token_info The ScriptTokenInfo structure to be destroyed
133%
134*/
135WandExport ScriptTokenInfo * DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
136{
137 assert(token_info != (ScriptTokenInfo *) NULL);
138 assert(token_info->signature == WandSignature);
139
140 if ( token_info->opened != MagickFalse )
141 fclose(token_info->stream);
142
143 if (token_info->token != (char *) NULL )
144 token_info->token=RelinquishMagickMemory(token_info->token);
145 token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
146 return(token_info);
147}
148
149/*
150%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
151% %
152% %
153% %
154% G e t S c r i p t T o k e n %
155% %
156% %
157% %
158%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
159%
160% GetScriptToken() is fairly general, finite state token parser. That will
161% divide a input file stream into tokens, in a way that is as close to a
162% UNIX shell, as is feasable. Only shell variable, and command
163% substitutions will not be performed. Tokens can be any length.
164%
165% Tokens are white space separated, and may be quoted, or even partially
166% quoted by either single or double quotes, or the use of backslashes,
167% or any mix of the three.
168%
169% For example: This\ is' a 'single" token"
170%
171% A token is returned immediatally the end of token is found. That is as soon
172% as a unquoted white-space or EOF condition has been found. That is to say
173% the file stream is parsed purely character-by-character, regardless any
174% buffering constraints set by the system. It is not parsed line-by-line.
175%
176% The function will return 'MagickTrue' if a valid token was found, while
177% the token status will be set accordingally to 'OK' or 'EOF', according to
178% the cause of the end of token. The token may be an empty string if the
179% input was a quoted empty string. Other error conditions return a value of
180% MagickFalse, indicating any token found but was incomplete due to some
181% error condition.
182%
183% Single quotes will preserve all characters including backslashes. Double
184% quotes will also preserve backslashes unless escaping a double quote,
185% or another backslashes. Other shell meta-characters are not treated as
186% special by this tokenizer.
187%
188% For example Quoting the quote chars:
189% \' "'" \" '"' "\"" \\ '\' "\\"
190%
191% Outside quotes, backslash characters will make spaces, tabs and quotes part
192% of a token returned. However a backslash at the end of a line (and outside
193% quotes) will cause the newline to be completely ignored (as per the shell
194% line continuation).
195%
196% Comments start with a '#' character at the start of a new token, will be
197% completely ignored upto the end of line, regardless of any backslash at the
198% end of the line. You can escape a comment '#', using quotes or backlsashes
199% just as you can in a shell.
200%
201% The format of the GetScriptToken method is:
202%
203% MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
204%
205% A description of each parameter follows:
206%
207% o token_info pointer to a structure holding token details
208%
209*/
210/* States of the parser */
211#define IN_WHITE 0
212#define IN_TOKEN 1
213#define IN_QUOTE 2
214#define IN_COMMENT 3
215
anthony799889a2012-03-11 11:00:32 +0000216/* Macro to read character from stream
217
218 This also keeps track of the line and column counts.
219 The EOL is defined as either '\r\n', or '\r', or '\n'.
220 A '\r' on its own is converted into a '\n' to correctly handle
221 raw input, typically due to 'copy-n-paste' of text files.
222*/
anthony1cdc5b72012-03-03 02:31:18 +0000223#define GetChar(c) \
224{ \
225 c=fgetc(token_info->stream); \
226 token_info->curr_column++; \
anthony799889a2012-03-11 11:00:32 +0000227 if ( c == '\r' ) { \
228 c=fgetc(token_info->stream); \
229 ungetc(c,token_info->stream); \
230 c = (c!='\n')?'\n':'\r'; \
231 } \
anthony1cdc5b72012-03-03 02:31:18 +0000232 if ( c == '\n' ) \
233 token_info->curr_line++, token_info->curr_column=0; \
234 if (c == EOF ) \
235 break; \
236 if ( (c>='\0' && c<'\a') || (c>'\r' && c<' ' && c!='\033') ) { \
237 token_info->status=TokenStatusBinary; \
238 break; \
239 } \
240}
241/* macro to collect the token characters */
242#define SaveChar(c) \
243{ \
244 if ((size_t) offset >= (token_info->length-1)) { \
245 if ( token_info->length >= MaxTextExtent ) \
246 token_info->length += MaxTextExtent; \
247 else \
248 token_info->length *= 4; \
249 token_info->token = (char *) \
250 ResizeMagickMemory(token_info->token, token_info->length); \
251 if ( token_info->token == (char *)NULL ) { \
252 token_info->status=TokenStatusMemoryFailed; \
253 break; \
254 } \
255 } \
256 token_info->token[offset++]=(char) (c); \
257}
258
259WandExport MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
260{
261 int
262 quote,
263 c;
264
265 int
266 state;
267
268 ssize_t
269 offset;
270
271 /* EOF - no more tokens! */
272 if (token_info->status != TokenStatusOK)
273 {
274 token_info->token[0]='\0';
275 return(MagickFalse);
276 }
277
278 state=IN_WHITE;
279 quote='\0';
280 offset=0;
281 while(1)
282 {
283 /* get character */
284 GetChar(c);
285
286 /* hash comment handling */
287 if ( state == IN_COMMENT ) {
288 if ( c == '\n' )
289 state=IN_WHITE;
290 continue;
291 }
292 if (c == '#' && state == IN_WHITE)
293 state=IN_COMMENT;
294 /* whitespace break character */
295 if (strchr(" \n\r\t",c) != (char *)NULL) {
296 switch (state) {
297 case IN_TOKEN:
298 token_info->token[offset]='\0';
299 return(MagickTrue);
300 case IN_QUOTE:
301 SaveChar(c);
302 break;
303 }
304 continue;
305 }
306 /* quote character */
307 if (strchr("'\"",c) != (char *)NULL) {
308 switch (state) {
309 case IN_WHITE:
310 token_info->token_line=token_info->curr_line;
311 token_info->token_column=token_info->curr_column;
312 case IN_TOKEN:
313 state=IN_QUOTE;
314 quote=c;
315 break;
316 case IN_QUOTE:
317 if (c == quote)
318 {
319 state=IN_TOKEN;
320 quote='\0';
321 }
322 else
323 SaveChar(c);
324 break;
325 }
326 continue;
327 }
328 /* escape char (preserve in quotes - unless escaping the same quote) */
329 if (c == '\\')
330 {
331 if ( state==IN_QUOTE && quote == '\'' ) {
332 SaveChar('\\');
333 continue;
334 }
335 GetChar(c);
336 if (c == '\n' || c == '\r' )
337 switch (state) {
338 case IN_COMMENT:
339 state=IN_WHITE; /* end comment */
340 case IN_WHITE:
341 case IN_TOKEN:
342 continue; /* line continuation (outside quotes and comment) */
343 }
344 switch (state) {
345 case IN_WHITE:
346 token_info->token_line=token_info->curr_line;
347 token_info->token_column=token_info->curr_column;
348 state=IN_TOKEN;
349 break;
350 case IN_QUOTE:
351 if (c != quote && c != '\\')
352 SaveChar('\\');
353 break;
354 }
355 SaveChar(c);
356 continue;
357 }
358 /* ordinary character */
359 switch (state) {
360 case IN_WHITE:
361 token_info->token_line=token_info->curr_line;
362 token_info->token_column=token_info->curr_column;
363 state=IN_TOKEN;
364 case IN_TOKEN:
365 case IN_QUOTE:
366 SaveChar(c);
367 break;
368 case IN_COMMENT:
369 break;
370 }
371 }
372 /* input stream has EOF or produced a fatal error */
373 token_info->token[offset]='\0';
374 if ( token_info->status != TokenStatusOK )
375 return(MagickFalse); /* fatal condition - no valid token */
376 token_info->status = TokenStatusEOF;
377 if ( state == IN_QUOTE)
378 token_info->status = TokenStatusBadQuotes;
379 if ( state == IN_TOKEN)
380 return(MagickTrue); /* token with EOF at end - no problem */
381 return(MagickFalse); /* in white space or in quotes - invalid token */
382}