blob: 036a2c30fd857313fe260292c08c5910626e073a [file] [log] [blame]
anthony1cdc5b72012-03-03 02:31:18 +00001/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3% %
4% %
5% SSS CCC RRRR III PPPP TTTTT TTTTT OOO K K EEEE N N %
6% S C R R I P P T T O O K K E NN N %
7% SSS C RRRR I PPPP T T O O KK EEE N N N %
8% S C R R I P T T O O K K E N NN %
9% SSSS CCC R RR III P T T OOO K K EEEE N N %
10% %
anthony756cd0d2012-04-08 12:41:44 +000011% Tokenize Magick Script into Options %
anthony1cdc5b72012-03-03 02:31:18 +000012% %
13% Dragon Computing %
14% Anthony Thyssen %
15% January 2012 %
16% %
17% %
18% Copyright 1999-2012 ImageMagick Studio LLC, a non-profit organization %
19% dedicated to making software imaging solutions freely available. %
20% %
21% You may not use this file except in compliance with the License. You may %
22% obtain a copy of the License at %
23% %
24% http://www.imagemagick.org/script/license.php %
25% %
26% Unless required by applicable law or agreed to in writing, software %
27% distributed under the License is distributed on an "AS IS" BASIS, %
28% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %
29% See the License for the specific language governing permissions and %
30% limitations under the License. %
31% %
32%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
33%
anthony756cd0d2012-04-08 12:41:44 +000034% Read a stream of characters and return tokens one at a time.
35%
36% The input stream is dived into individual 'tokens' (representing 'words' or
37% 'options'), in a way that is as close to a UNIX shell, as is feasable.
38% Only shell variable, and command substitutions will not be performed.
39% Tokens can be any length.
40%
41% The main function call is GetScriptToken() (see below) whcih returns one
42% and only one token at a time. The other functions provide support to this
43% function, opening scripts, and seting up the required structures.
44%
45% More specifically...
46%
47% Tokens are white space separated, and may be quoted, or even partially
48% quoted by either single or double quotes, or the use of backslashes,
49% or any mix of the three.
50%
51% For example: This\ is' a 'single" token"
52%
53% A token is returned immediatally the end of token is found. That is as soon
54% as a unquoted white-space or EOF condition has been found. That is to say
55% the file stream is parsed purely character-by-character, regardless any
56% buffering constraints set by the system. It is not parsed line-by-line.
57%
58% The function will return 'MagickTrue' if a valid token was found, while
59% the token status will be set accordingally to 'OK' or 'EOF', according to
60% the cause of the end of token. The token may be an empty string if the
61% input was a quoted empty string. Other error conditions return a value of
62% MagickFalse, indicating any token found but was incomplete due to some
63% error condition.
64%
65% Single quotes will preserve all characters including backslashes. Double
66% quotes will also preserve backslashes unless escaping a double quote,
67% or another backslashes. Other shell meta-characters are not treated as
68% special by this tokenizer.
69%
70% For example Quoting the quote chars:
71% \' "'" \" '"' "\"" \\ '\' "\\"
72%
73% Outside quotes, backslash characters will make spaces, tabs and quotes part
74% of a token returned. However a backslash at the end of a line (and outside
75% quotes) will cause the newline to be completely ignored (as per the shell
76% line continuation).
77%
78% Comments start with a '#' character at the start of a new token, will be
79% completely ignored upto the end of line, regardless of any backslash at the
80% end of the line. You can escape a comment '#', using quotes or backlsashes
81% just as you can in a shell.
82%
anthonyd28c6a62012-05-08 00:12:56 +000083%
84% UNIX script Launcher...
85%
86% Th euse of '#' comments allow normal UNIX 'scripting' to be used to call on
87% the "magick" command to parse the tokens from a file
anthony756cd0d2012-04-08 12:41:44 +000088%
89% #!/path/to/command/magick -script
90%
anthonyd28c6a62012-05-08 00:12:56 +000091%
92% UNIX 'env' command launcher...
93%
94% If "magick" is renamed "magick-script" you can use a 'env' UNIX launcher
anthony756cd0d2012-04-08 12:41:44 +000095%
96% #!/usr/bin/env magick-script
97%
anthonyd28c6a62012-05-08 00:12:56 +000098%
99% Shell script launsher...
100%
anthony756cd0d2012-04-08 12:41:44 +0000101% As a special case a ':' at the start of a line is also treated as a comment
102% This allows a magick script to ignore a line that can be parsed by the shell
103% and not by the magick script (tokenizer). This allows for an alternative
anthonyd28c6a62012-05-08 00:12:56 +0000104% script 'launcher' to be used for magick scripts.
anthony756cd0d2012-04-08 12:41:44 +0000105%
106% #!/bin/sh
107% #
anthonyd28c6a62012-05-08 00:12:56 +0000108% : echo "This part is run in the shell, but ignored by Magick"
anthony756cd0d2012-04-08 12:41:44 +0000109% : exec magick -script "$0" "$@"; exit 10
110% #
anthonyd28c6a62012-05-08 00:12:56 +0000111% # The rest of the file is magick script
112% -read label:"This is a Magick Script!"
113% -write show: -exit
114%
115%
116% DOS script launcher...
117%
118% Similarly for DOS, any '@' at the start of the line (outside of quotes)
119% will also be treated as comment. To allow the same DOS script launcher
120% code
121%
122% @echo This line is DOS executed but ignored by Magick
123% @magick -script %~dpnx0 %*
124% @GOTO :EOF
125% #
126% # The rest of the file is magick script
anthony756cd0d2012-04-08 12:41:44 +0000127% -read label:"This is a Magick Script!"
128% -write show: -exit
anthony1cdc5b72012-03-03 02:31:18 +0000129%
130*/
131
132/*
133 Include declarations.
anthony76369f62012-03-29 05:04:08 +0000134
135 NOTE: Do not include if being compiled into the "test/script-token-test.c"
136 module, for low level token testing.
anthony1cdc5b72012-03-03 02:31:18 +0000137*/
anthonya5c23a82012-03-29 05:01:52 +0000138#ifndef SCRIPT_TOKEN_TESTING
139# include "MagickWand/studio.h"
140# include "MagickWand/MagickWand.h"
141# include "MagickWand/script-token.h"
142# include "MagickCore/string-private.h"
143# include "MagickCore/utility-private.h"
144#endif
anthony1cdc5b72012-03-03 02:31:18 +0000145
146/*
147%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148% %
149% %
150% %
151% A c q u i r e S c r i p t T o k e n I n f o %
152% %
153% %
154% %
155%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
156%
157% AcquireScriptTokenInfo() allocated, initializes and opens the given
158% file stream from which tokens are to be extracted.
159%
160% The format of the AcquireScriptTokenInfo method is:
161%
162% ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
163%
164% A description of each parameter follows:
165%
166% o filename the filename to open ("-" means stdin)
167%
168*/
169WandExport ScriptTokenInfo *AcquireScriptTokenInfo(char *filename)
170{
171 ScriptTokenInfo
172 *token_info;
173
174 token_info=(ScriptTokenInfo *) AcquireMagickMemory(sizeof(*token_info));
175 if (token_info == (ScriptTokenInfo *) NULL)
176 return token_info;
177 (void) ResetMagickMemory(token_info,0,sizeof(*token_info));
178
179 token_info->opened=MagickFalse;
180 if ( LocaleCompare(filename,"-") == 0 ) {
181 token_info->stream=stdin;
182 token_info->opened=MagickFalse;
183 }
anthony1cdc5b72012-03-03 02:31:18 +0000184 else if ( LocaleNCompare(filename,"fd:",3) == 0 ) {
185 token_info->stream=fdopen(StringToLong(filename+3),"r");
186 token_info->opened=MagickFalse;
187 }
anthony1cdc5b72012-03-03 02:31:18 +0000188 else {
anthony7bcfe7f2012-03-30 14:01:22 +0000189 token_info->stream=fopen_utf8(filename, "r");
anthony1cdc5b72012-03-03 02:31:18 +0000190 }
191 if ( token_info->stream == (FILE *)NULL ) {
192 token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
193 return(token_info);
194 }
195
196 token_info->curr_line=1;
197 token_info->length=INITAL_TOKEN_LENGTH;
198 token_info->token=(char *) AcquireMagickMemory(token_info->length);
199
200 token_info->status=(token_info->token != (char *)NULL)
201 ? TokenStatusOK : TokenStatusMemoryFailed;
202 token_info->signature=WandSignature;
203
204 return token_info;
205}
206
207/*
208%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
209% %
210% %
211% %
212% D e s t r o y S c r i p t T o k e n I n f o %
213% %
214% %
215% %
216%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
217%
218% DestroyScriptTokenInfo() allocated, initializes and opens the given
219% file stream from which tokens are to be extracted.
220%
221% The format of the DestroyScriptTokenInfo method is:
222%
223% ScriptTokenInfo *DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
224%
225% A description of each parameter follows:
226%
227% o token_info The ScriptTokenInfo structure to be destroyed
228%
229*/
230WandExport ScriptTokenInfo * DestroyScriptTokenInfo(ScriptTokenInfo *token_info)
231{
232 assert(token_info != (ScriptTokenInfo *) NULL);
233 assert(token_info->signature == WandSignature);
234
235 if ( token_info->opened != MagickFalse )
236 fclose(token_info->stream);
237
238 if (token_info->token != (char *) NULL )
cristyaa2c16c2012-03-25 22:21:35 +0000239 token_info->token=(char *) RelinquishMagickMemory(token_info->token);
anthony1cdc5b72012-03-03 02:31:18 +0000240 token_info=(ScriptTokenInfo *) RelinquishMagickMemory(token_info);
241 return(token_info);
242}
243
244/*
245%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
246% %
247% %
248% %
249% G e t S c r i p t T o k e n %
250% %
251% %
252% %
253%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
254%
anthony756cd0d2012-04-08 12:41:44 +0000255% GetScriptToken() a fairly general, finite state token parser. That returns
256% tokens one at a time, as soon as posible.
anthony52bef752012-03-27 13:54:47 +0000257%
258%
anthony1cdc5b72012-03-03 02:31:18 +0000259% The format of the GetScriptToken method is:
260%
261% MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
262%
263% A description of each parameter follows:
264%
265% o token_info pointer to a structure holding token details
266%
267*/
268/* States of the parser */
269#define IN_WHITE 0
270#define IN_TOKEN 1
271#define IN_QUOTE 2
272#define IN_COMMENT 3
273
anthony799889a2012-03-11 11:00:32 +0000274/* Macro to read character from stream
275
276 This also keeps track of the line and column counts.
277 The EOL is defined as either '\r\n', or '\r', or '\n'.
278 A '\r' on its own is converted into a '\n' to correctly handle
279 raw input, typically due to 'copy-n-paste' of text files.
anthony3731b342012-04-19 05:12:09 +0000280 But a '\r\n' sequence is left ASIS for string handling
anthony799889a2012-03-11 11:00:32 +0000281*/
anthony1cdc5b72012-03-03 02:31:18 +0000282#define GetChar(c) \
283{ \
284 c=fgetc(token_info->stream); \
285 token_info->curr_column++; \
anthony799889a2012-03-11 11:00:32 +0000286 if ( c == '\r' ) { \
287 c=fgetc(token_info->stream); \
288 ungetc(c,token_info->stream); \
289 c = (c!='\n')?'\n':'\r'; \
290 } \
anthony1cdc5b72012-03-03 02:31:18 +0000291 if ( c == '\n' ) \
292 token_info->curr_line++, token_info->curr_column=0; \
293 if (c == EOF ) \
294 break; \
295 if ( (c>='\0' && c<'\a') || (c>'\r' && c<' ' && c!='\033') ) { \
296 token_info->status=TokenStatusBinary; \
297 break; \
298 } \
299}
300/* macro to collect the token characters */
301#define SaveChar(c) \
302{ \
303 if ((size_t) offset >= (token_info->length-1)) { \
304 if ( token_info->length >= MaxTextExtent ) \
305 token_info->length += MaxTextExtent; \
306 else \
307 token_info->length *= 4; \
308 token_info->token = (char *) \
309 ResizeMagickMemory(token_info->token, token_info->length); \
310 if ( token_info->token == (char *)NULL ) { \
311 token_info->status=TokenStatusMemoryFailed; \
312 break; \
313 } \
314 } \
315 token_info->token[offset++]=(char) (c); \
316}
317
318WandExport MagickBooleanType GetScriptToken(ScriptTokenInfo *token_info)
319{
320 int
321 quote,
322 c;
323
324 int
325 state;
326
327 ssize_t
328 offset;
329
330 /* EOF - no more tokens! */
331 if (token_info->status != TokenStatusOK)
332 {
333 token_info->token[0]='\0';
334 return(MagickFalse);
335 }
336
337 state=IN_WHITE;
338 quote='\0';
339 offset=0;
340 while(1)
341 {
342 /* get character */
343 GetChar(c);
344
345 /* hash comment handling */
346 if ( state == IN_COMMENT ) {
347 if ( c == '\n' )
348 state=IN_WHITE;
349 continue;
350 }
anthonyd28c6a62012-05-08 00:12:56 +0000351 /* comment lines start with '#' anywhere, or ':' or '@' at start of line */
anthony52bef752012-03-27 13:54:47 +0000352 if ( state == IN_WHITE )
anthonyd28c6a62012-05-08 00:12:56 +0000353 if ( ( c == '#' ) ||
354 ( token_info->curr_column==1 && (c == ':' || c == '@' ) ) )
anthony52bef752012-03-27 13:54:47 +0000355 state=IN_COMMENT;
anthonyd28c6a62012-05-08 00:12:56 +0000356 /* whitespace token seperator character */
anthony1cdc5b72012-03-03 02:31:18 +0000357 if (strchr(" \n\r\t",c) != (char *)NULL) {
358 switch (state) {
359 case IN_TOKEN:
360 token_info->token[offset]='\0';
361 return(MagickTrue);
362 case IN_QUOTE:
363 SaveChar(c);
364 break;
365 }
366 continue;
367 }
368 /* quote character */
anthonyd28c6a62012-05-08 00:12:56 +0000369 if ( c=='\'' || c =='"' ) {
anthony1cdc5b72012-03-03 02:31:18 +0000370 switch (state) {
371 case IN_WHITE:
372 token_info->token_line=token_info->curr_line;
373 token_info->token_column=token_info->curr_column;
374 case IN_TOKEN:
375 state=IN_QUOTE;
376 quote=c;
377 break;
378 case IN_QUOTE:
379 if (c == quote)
380 {
381 state=IN_TOKEN;
382 quote='\0';
383 }
384 else
385 SaveChar(c);
386 break;
387 }
388 continue;
389 }
390 /* escape char (preserve in quotes - unless escaping the same quote) */
391 if (c == '\\')
392 {
393 if ( state==IN_QUOTE && quote == '\'' ) {
394 SaveChar('\\');
395 continue;
396 }
397 GetChar(c);
anthony3731b342012-04-19 05:12:09 +0000398 if (c == '\n')
anthony1cdc5b72012-03-03 02:31:18 +0000399 switch (state) {
400 case IN_COMMENT:
401 state=IN_WHITE; /* end comment */
anthony3731b342012-04-19 05:12:09 +0000402 case IN_QUOTE:
403 if (quote != '"')
404 break; /* in double quotes only */
anthony1cdc5b72012-03-03 02:31:18 +0000405 case IN_WHITE:
406 case IN_TOKEN:
anthony3731b342012-04-19 05:12:09 +0000407 continue; /* line continuation - remove line feed */
anthony1cdc5b72012-03-03 02:31:18 +0000408 }
409 switch (state) {
410 case IN_WHITE:
411 token_info->token_line=token_info->curr_line;
412 token_info->token_column=token_info->curr_column;
413 state=IN_TOKEN;
414 break;
415 case IN_QUOTE:
416 if (c != quote && c != '\\')
417 SaveChar('\\');
418 break;
419 }
420 SaveChar(c);
421 continue;
422 }
423 /* ordinary character */
424 switch (state) {
425 case IN_WHITE:
426 token_info->token_line=token_info->curr_line;
427 token_info->token_column=token_info->curr_column;
428 state=IN_TOKEN;
429 case IN_TOKEN:
430 case IN_QUOTE:
431 SaveChar(c);
432 break;
433 case IN_COMMENT:
434 break;
435 }
436 }
437 /* input stream has EOF or produced a fatal error */
438 token_info->token[offset]='\0';
439 if ( token_info->status != TokenStatusOK )
440 return(MagickFalse); /* fatal condition - no valid token */
441 token_info->status = TokenStatusEOF;
442 if ( state == IN_QUOTE)
443 token_info->status = TokenStatusBadQuotes;
444 if ( state == IN_TOKEN)
445 return(MagickTrue); /* token with EOF at end - no problem */
446 return(MagickFalse); /* in white space or in quotes - invalid token */
447}