|
1 <?php |
|
2 |
|
3 /* |
|
4 * This file is part of Twig. |
|
5 * |
|
6 * (c) 2009 Fabien Potencier |
|
7 * (c) 2009 Armin Ronacher |
|
8 * |
|
9 * For the full copyright and license information, please view the LICENSE |
|
10 * file that was distributed with this source code. |
|
11 */ |
|
12 |
|
13 /** |
|
14 * Lexes a template string. |
|
15 * |
|
16 * @package twig |
|
17 * @author Fabien Potencier <fabien@symfony.com> |
|
18 */ |
|
19 class Twig_Lexer implements Twig_LexerInterface |
|
20 { |
|
21 protected $tokens; |
|
22 protected $code; |
|
23 protected $cursor; |
|
24 protected $lineno; |
|
25 protected $end; |
|
26 protected $state; |
|
27 protected $brackets; |
|
28 |
|
29 protected $env; |
|
30 protected $filename; |
|
31 protected $options; |
|
32 protected $operatorRegex; |
|
33 |
|
34 const STATE_DATA = 0; |
|
35 const STATE_BLOCK = 1; |
|
36 const STATE_VAR = 2; |
|
37 |
|
38 const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A'; |
|
39 const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A'; |
|
40 const REGEX_STRING = '/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As'; |
|
41 const PUNCTUATION = '()[]{}?:.,|'; |
|
42 |
|
43 public function __construct(Twig_Environment $env, array $options = array()) |
|
44 { |
|
45 $this->env = $env; |
|
46 |
|
47 $this->options = array_merge(array( |
|
48 'tag_comment' => array('{#', '#}'), |
|
49 'tag_block' => array('{%', '%}'), |
|
50 'tag_variable' => array('{{', '}}'), |
|
51 'whitespace_trim' => '-', |
|
52 ), $options); |
|
53 } |
|
54 |
|
55 /** |
|
56 * Tokenizes a source code. |
|
57 * |
|
58 * @param string $code The source code |
|
59 * @param string $filename A unique identifier for the source code |
|
60 * |
|
61 * @return Twig_TokenStream A token stream instance |
|
62 */ |
|
63 public function tokenize($code, $filename = null) |
|
64 { |
|
65 if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) { |
|
66 $mbEncoding = mb_internal_encoding(); |
|
67 mb_internal_encoding('ASCII'); |
|
68 } |
|
69 |
|
70 $this->code = str_replace(array("\r\n", "\r"), "\n", $code); |
|
71 $this->filename = $filename; |
|
72 $this->cursor = 0; |
|
73 $this->lineno = 1; |
|
74 $this->end = strlen($this->code); |
|
75 $this->tokens = array(); |
|
76 $this->state = self::STATE_DATA; |
|
77 $this->brackets = array(); |
|
78 |
|
79 while ($this->cursor < $this->end) { |
|
80 // dispatch to the lexing functions depending |
|
81 // on the current state |
|
82 switch ($this->state) { |
|
83 case self::STATE_DATA: |
|
84 $this->lexData(); |
|
85 break; |
|
86 |
|
87 case self::STATE_BLOCK: |
|
88 $this->lexBlock(); |
|
89 break; |
|
90 |
|
91 case self::STATE_VAR: |
|
92 $this->lexVar(); |
|
93 break; |
|
94 } |
|
95 } |
|
96 |
|
97 $this->pushToken(Twig_Token::EOF_TYPE); |
|
98 |
|
99 if (!empty($this->brackets)) { |
|
100 list($expect, $lineno) = array_pop($this->brackets); |
|
101 throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename); |
|
102 } |
|
103 |
|
104 if (isset($mbEncoding)) { |
|
105 mb_internal_encoding($mbEncoding); |
|
106 } |
|
107 |
|
108 return new Twig_TokenStream($this->tokens, $this->filename); |
|
109 } |
|
110 |
|
111 protected function lexData() |
|
112 { |
|
113 $pos = $this->end; |
|
114 $append = ''; |
|
115 |
|
116 // Find the first token after the cursor |
|
117 foreach (array('tag_comment', 'tag_variable', 'tag_block') as $type) { |
|
118 $tmpPos = strpos($this->code, $this->options[$type][0], $this->cursor); |
|
119 if (false !== $tmpPos && $tmpPos < $pos) { |
|
120 $trimBlock = false; |
|
121 $append = ''; |
|
122 $pos = $tmpPos; |
|
123 $token = $this->options[$type][0]; |
|
124 if (strpos($this->code, $this->options['whitespace_trim'], $pos) === ($pos + strlen($token))) { |
|
125 $trimBlock = true; |
|
126 $append = $this->options['whitespace_trim']; |
|
127 } |
|
128 } |
|
129 } |
|
130 |
|
131 // if no matches are left we return the rest of the template as simple text token |
|
132 if ($pos === $this->end) { |
|
133 $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor)); |
|
134 $this->cursor = $this->end; |
|
135 return; |
|
136 } |
|
137 |
|
138 // push the template text first |
|
139 $text = $textContent = substr($this->code, $this->cursor, $pos - $this->cursor); |
|
140 if (true === $trimBlock) { |
|
141 $text = rtrim($text); |
|
142 } |
|
143 $this->pushToken(Twig_Token::TEXT_TYPE, $text); |
|
144 $this->moveCursor($textContent.$token.$append); |
|
145 |
|
146 switch ($token) { |
|
147 case $this->options['tag_comment'][0]: |
|
148 $this->lexComment(); |
|
149 break; |
|
150 |
|
151 case $this->options['tag_block'][0]: |
|
152 // raw data? |
|
153 if (preg_match('/\s*raw\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', $this->code, $match, null, $this->cursor)) { |
|
154 $this->moveCursor($match[0]); |
|
155 $this->lexRawData(); |
|
156 $this->state = self::STATE_DATA; |
|
157 // {% line \d+ %} |
|
158 } else if (preg_match('/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', $this->code, $match, null, $this->cursor)) { |
|
159 $this->moveCursor($match[0]); |
|
160 $this->lineno = (int) $match[1]; |
|
161 $this->state = self::STATE_DATA; |
|
162 } else { |
|
163 $this->pushToken(Twig_Token::BLOCK_START_TYPE); |
|
164 $this->state = self::STATE_BLOCK; |
|
165 } |
|
166 break; |
|
167 |
|
168 case $this->options['tag_variable'][0]: |
|
169 $this->pushToken(Twig_Token::VAR_START_TYPE); |
|
170 $this->state = self::STATE_VAR; |
|
171 break; |
|
172 } |
|
173 } |
|
174 |
|
175 protected function lexBlock() |
|
176 { |
|
177 $trimTag = preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/'); |
|
178 $endTag = preg_quote($this->options['tag_block'][1], '/'); |
|
179 |
|
180 if (empty($this->brackets) && preg_match('/\s*(?:'.$trimTag.'\s*|\s*'.$endTag.')\n?/A', $this->code, $match, null, $this->cursor)) { |
|
181 $this->pushToken(Twig_Token::BLOCK_END_TYPE); |
|
182 $this->moveCursor($match[0]); |
|
183 $this->state = self::STATE_DATA; |
|
184 } else { |
|
185 $this->lexExpression(); |
|
186 } |
|
187 } |
|
188 |
|
189 protected function lexVar() |
|
190 { |
|
191 $trimTag = preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/'); |
|
192 $endTag = preg_quote($this->options['tag_variable'][1], '/'); |
|
193 |
|
194 if (empty($this->brackets) && preg_match('/\s*'.$trimTag.'\s*|\s*'.$endTag.'/A', $this->code, $match, null, $this->cursor)) { |
|
195 $this->pushToken(Twig_Token::VAR_END_TYPE); |
|
196 $this->moveCursor($match[0]); |
|
197 $this->state = self::STATE_DATA; |
|
198 } else { |
|
199 $this->lexExpression(); |
|
200 } |
|
201 } |
|
202 |
|
203 protected function lexExpression() |
|
204 { |
|
205 // whitespace |
|
206 if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) { |
|
207 $this->moveCursor($match[0]); |
|
208 |
|
209 if ($this->cursor >= $this->end) { |
|
210 throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s"', $this->state === self::STATE_BLOCK ? 'block' : 'variable')); |
|
211 } |
|
212 } |
|
213 |
|
214 // operators |
|
215 if (preg_match($this->getOperatorRegex(), $this->code, $match, null, $this->cursor)) { |
|
216 $this->pushToken(Twig_Token::OPERATOR_TYPE, $match[0]); |
|
217 $this->moveCursor($match[0]); |
|
218 } |
|
219 // names |
|
220 elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) { |
|
221 $this->pushToken(Twig_Token::NAME_TYPE, $match[0]); |
|
222 $this->moveCursor($match[0]); |
|
223 } |
|
224 // numbers |
|
225 elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) { |
|
226 $this->pushToken(Twig_Token::NUMBER_TYPE, ctype_digit($match[0]) ? (int) $match[0] : (float) $match[0]); |
|
227 $this->moveCursor($match[0]); |
|
228 } |
|
229 // punctuation |
|
230 elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) { |
|
231 // opening bracket |
|
232 if (false !== strpos('([{', $this->code[$this->cursor])) { |
|
233 $this->brackets[] = array($this->code[$this->cursor], $this->lineno); |
|
234 } |
|
235 // closing bracket |
|
236 elseif (false !== strpos(')]}', $this->code[$this->cursor])) { |
|
237 if (empty($this->brackets)) { |
|
238 throw new Twig_Error_Syntax(sprintf('Unexpected "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename); |
|
239 } |
|
240 |
|
241 list($expect, $lineno) = array_pop($this->brackets); |
|
242 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) { |
|
243 throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename); |
|
244 } |
|
245 } |
|
246 |
|
247 $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]); |
|
248 ++$this->cursor; |
|
249 } |
|
250 // strings |
|
251 elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) { |
|
252 $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1))); |
|
253 $this->moveCursor($match[0]); |
|
254 } |
|
255 // unlexable |
|
256 else { |
|
257 throw new Twig_Error_Syntax(sprintf('Unexpected character "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename); |
|
258 } |
|
259 } |
|
260 |
|
261 protected function lexRawData() |
|
262 { |
|
263 if (!preg_match('/'.preg_quote($this->options['tag_block'][0], '/').'\s*endraw\s*'.preg_quote($this->options['tag_block'][1], '/').'/s', $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) { |
|
264 throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "block"')); |
|
265 } |
|
266 $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor); |
|
267 $this->pushToken(Twig_Token::TEXT_TYPE, $text); |
|
268 $this->moveCursor($text.$match[0][0]); |
|
269 } |
|
270 |
|
271 protected function lexComment() |
|
272 { |
|
273 $commentEndRegex = '/(?:'.preg_quote($this->options['whitespace_trim'], '/') |
|
274 .preg_quote($this->options['tag_comment'][1], '/').'\s*|' |
|
275 .preg_quote($this->options['tag_comment'][1], '/').')\n?/s'; |
|
276 |
|
277 if (!preg_match($commentEndRegex, $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) { |
|
278 throw new Twig_Error_Syntax('Unclosed comment', $this->lineno, $this->filename); |
|
279 } |
|
280 |
|
281 $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]); |
|
282 } |
|
283 |
|
284 protected function pushToken($type, $value = '') |
|
285 { |
|
286 // do not push empty text tokens |
|
287 if (Twig_Token::TEXT_TYPE === $type && '' === $value) { |
|
288 return; |
|
289 } |
|
290 |
|
291 $this->tokens[] = new Twig_Token($type, $value, $this->lineno); |
|
292 } |
|
293 |
|
294 protected function moveCursor($text) |
|
295 { |
|
296 $this->cursor += strlen($text); |
|
297 $this->lineno += substr_count($text, "\n"); |
|
298 } |
|
299 |
|
300 protected function getOperatorRegex() |
|
301 { |
|
302 if (null !== $this->operatorRegex) { |
|
303 return $this->operatorRegex; |
|
304 } |
|
305 |
|
306 $operators = array_merge( |
|
307 array('='), |
|
308 array_keys($this->env->getUnaryOperators()), |
|
309 array_keys($this->env->getBinaryOperators()) |
|
310 ); |
|
311 |
|
312 $operators = array_combine($operators, array_map('strlen', $operators)); |
|
313 arsort($operators); |
|
314 |
|
315 $regex = array(); |
|
316 foreach ($operators as $operator => $length) { |
|
317 // an operator that ends with a character must be followed by |
|
318 // a whitespace or a parenthesis |
|
319 if (ctype_alpha($operator[$length - 1])) { |
|
320 $regex[] = preg_quote($operator, '/').'(?=[ ()])'; |
|
321 } else { |
|
322 $regex[] = preg_quote($operator, '/'); |
|
323 } |
|
324 } |
|
325 |
|
326 return $this->operatorRegex = '/'.implode('|', $regex).'/A'; |
|
327 } |
|
328 } |