Twig-1.3.0/lib/Twig/Lexer.php
changeset 4 9a001a04b634
equal deleted inserted replaced
3:6d109e3804ac 4:9a001a04b634
       
     1 <?php
       
     2 
       
     3 /*
       
     4  * This file is part of Twig.
       
     5  *
       
     6  * (c) 2009 Fabien Potencier
       
     7  * (c) 2009 Armin Ronacher
       
     8  *
       
     9  * For the full copyright and license information, please view the LICENSE
       
    10  * file that was distributed with this source code.
       
    11  */
       
    12 
       
    13 /**
       
    14  * Lexes a template string.
       
    15  *
       
    16  * @package    twig
       
    17  * @author     Fabien Potencier <fabien@symfony.com>
       
    18  */
       
    19 class Twig_Lexer implements Twig_LexerInterface
       
    20 {
       
    21     protected $tokens;
       
    22     protected $code;
       
    23     protected $cursor;
       
    24     protected $lineno;
       
    25     protected $end;
       
    26     protected $state;
       
    27     protected $brackets;
       
    28 
       
    29     protected $env;
       
    30     protected $filename;
       
    31     protected $options;
       
    32     protected $operatorRegex;
       
    33 
       
    34     const STATE_DATA  = 0;
       
    35     const STATE_BLOCK = 1;
       
    36     const STATE_VAR   = 2;
       
    37 
       
    38     const REGEX_NAME   = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A';
       
    39     const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A';
       
    40     const REGEX_STRING = '/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As';
       
    41     const PUNCTUATION  = '()[]{}?:.,|';
       
    42 
       
    43     public function __construct(Twig_Environment $env, array $options = array())
       
    44     {
       
    45         $this->env = $env;
       
    46 
       
    47         $this->options = array_merge(array(
       
    48             'tag_comment'     => array('{#', '#}'),
       
    49             'tag_block'       => array('{%', '%}'),
       
    50             'tag_variable'    => array('{{', '}}'),
       
    51             'whitespace_trim' => '-',
       
    52         ), $options);
       
    53     }
       
    54 
       
    55     /**
       
    56      * Tokenizes a source code.
       
    57      *
       
    58      * @param  string $code     The source code
       
    59      * @param  string $filename A unique identifier for the source code
       
    60      *
       
    61      * @return Twig_TokenStream A token stream instance
       
    62      */
       
    63     public function tokenize($code, $filename = null)
       
    64     {
       
    65         if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
       
    66             $mbEncoding = mb_internal_encoding();
       
    67             mb_internal_encoding('ASCII');
       
    68         }
       
    69 
       
    70         $this->code = str_replace(array("\r\n", "\r"), "\n", $code);
       
    71         $this->filename = $filename;
       
    72         $this->cursor = 0;
       
    73         $this->lineno = 1;
       
    74         $this->end = strlen($this->code);
       
    75         $this->tokens = array();
       
    76         $this->state = self::STATE_DATA;
       
    77         $this->brackets = array();
       
    78 
       
    79         while ($this->cursor < $this->end) {
       
    80             // dispatch to the lexing functions depending
       
    81             // on the current state
       
    82             switch ($this->state) {
       
    83                 case self::STATE_DATA:
       
    84                     $this->lexData();
       
    85                     break;
       
    86 
       
    87                 case self::STATE_BLOCK:
       
    88                     $this->lexBlock();
       
    89                     break;
       
    90 
       
    91                 case self::STATE_VAR:
       
    92                     $this->lexVar();
       
    93                     break;
       
    94             }
       
    95         }
       
    96 
       
    97         $this->pushToken(Twig_Token::EOF_TYPE);
       
    98 
       
    99         if (!empty($this->brackets)) {
       
   100             list($expect, $lineno) = array_pop($this->brackets);
       
   101             throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
       
   102         }
       
   103 
       
   104         if (isset($mbEncoding)) {
       
   105             mb_internal_encoding($mbEncoding);
       
   106         }
       
   107 
       
   108         return new Twig_TokenStream($this->tokens, $this->filename);
       
   109     }
       
   110 
       
   111     protected function lexData()
       
   112     {
       
   113         $pos = $this->end;
       
   114         $append = '';
       
   115 
       
   116         // Find the first token after the cursor
       
   117         foreach (array('tag_comment', 'tag_variable', 'tag_block') as $type) {
       
   118             $tmpPos = strpos($this->code, $this->options[$type][0], $this->cursor);
       
   119             if (false !== $tmpPos && $tmpPos < $pos) {
       
   120                 $trimBlock = false;
       
   121                 $append = '';
       
   122                 $pos = $tmpPos;
       
   123                 $token = $this->options[$type][0];
       
   124                 if (strpos($this->code, $this->options['whitespace_trim'], $pos) === ($pos + strlen($token))) {
       
   125                     $trimBlock = true;
       
   126                     $append = $this->options['whitespace_trim'];
       
   127                 }
       
   128             }
       
   129         }
       
   130 
       
   131         // if no matches are left we return the rest of the template as simple text token
       
   132         if ($pos === $this->end) {
       
   133             $this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor));
       
   134             $this->cursor = $this->end;
       
   135             return;
       
   136         }
       
   137 
       
   138         // push the template text first
       
   139         $text = $textContent = substr($this->code, $this->cursor, $pos - $this->cursor);
       
   140         if (true === $trimBlock) {
       
   141             $text = rtrim($text);
       
   142         }
       
   143         $this->pushToken(Twig_Token::TEXT_TYPE, $text);
       
   144         $this->moveCursor($textContent.$token.$append);
       
   145 
       
   146         switch ($token) {
       
   147             case $this->options['tag_comment'][0]:
       
   148                 $this->lexComment();
       
   149                 break;
       
   150 
       
   151             case $this->options['tag_block'][0]:
       
   152                 // raw data?
       
   153                 if (preg_match('/\s*raw\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', $this->code, $match, null, $this->cursor)) {
       
   154                     $this->moveCursor($match[0]);
       
   155                     $this->lexRawData();
       
   156                     $this->state = self::STATE_DATA;
       
   157                 // {% line \d+ %}
       
   158                 } else if (preg_match('/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', $this->code, $match, null, $this->cursor)) {
       
   159                     $this->moveCursor($match[0]);
       
   160                     $this->lineno = (int) $match[1];
       
   161                     $this->state = self::STATE_DATA;
       
   162                 } else {
       
   163                     $this->pushToken(Twig_Token::BLOCK_START_TYPE);
       
   164                     $this->state = self::STATE_BLOCK;
       
   165                 }
       
   166                 break;
       
   167 
       
   168             case $this->options['tag_variable'][0]:
       
   169                 $this->pushToken(Twig_Token::VAR_START_TYPE);
       
   170                 $this->state = self::STATE_VAR;
       
   171                 break;
       
   172         }
       
   173     }
       
   174 
       
   175     protected function lexBlock()
       
   176     {
       
   177         $trimTag = preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/');
       
   178         $endTag = preg_quote($this->options['tag_block'][1], '/');
       
   179 
       
   180         if (empty($this->brackets) && preg_match('/\s*(?:'.$trimTag.'\s*|\s*'.$endTag.')\n?/A', $this->code, $match, null, $this->cursor)) {
       
   181             $this->pushToken(Twig_Token::BLOCK_END_TYPE);
       
   182             $this->moveCursor($match[0]);
       
   183             $this->state = self::STATE_DATA;
       
   184         } else {
       
   185             $this->lexExpression();
       
   186         }
       
   187     }
       
   188 
       
   189     protected function lexVar()
       
   190     {
       
   191         $trimTag = preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/');
       
   192         $endTag = preg_quote($this->options['tag_variable'][1], '/');
       
   193 
       
   194         if (empty($this->brackets) && preg_match('/\s*'.$trimTag.'\s*|\s*'.$endTag.'/A', $this->code, $match, null, $this->cursor)) {
       
   195             $this->pushToken(Twig_Token::VAR_END_TYPE);
       
   196             $this->moveCursor($match[0]);
       
   197             $this->state = self::STATE_DATA;
       
   198         } else {
       
   199             $this->lexExpression();
       
   200         }
       
   201     }
       
   202 
       
   203     protected function lexExpression()
       
   204     {
       
   205         // whitespace
       
   206         if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) {
       
   207             $this->moveCursor($match[0]);
       
   208 
       
   209             if ($this->cursor >= $this->end) {
       
   210                 throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s"', $this->state === self::STATE_BLOCK ? 'block' : 'variable'));
       
   211             }
       
   212         }
       
   213 
       
   214         // operators
       
   215         if (preg_match($this->getOperatorRegex(), $this->code, $match, null, $this->cursor)) {
       
   216             $this->pushToken(Twig_Token::OPERATOR_TYPE, $match[0]);
       
   217             $this->moveCursor($match[0]);
       
   218         }
       
   219         // names
       
   220         elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) {
       
   221             $this->pushToken(Twig_Token::NAME_TYPE, $match[0]);
       
   222             $this->moveCursor($match[0]);
       
   223         }
       
   224         // numbers
       
   225         elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) {
       
   226             $this->pushToken(Twig_Token::NUMBER_TYPE, ctype_digit($match[0]) ? (int) $match[0] : (float) $match[0]);
       
   227             $this->moveCursor($match[0]);
       
   228         }
       
   229         // punctuation
       
   230         elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) {
       
   231             // opening bracket
       
   232             if (false !== strpos('([{', $this->code[$this->cursor])) {
       
   233                 $this->brackets[] = array($this->code[$this->cursor], $this->lineno);
       
   234             }
       
   235             // closing bracket
       
   236             elseif (false !== strpos(')]}', $this->code[$this->cursor])) {
       
   237                 if (empty($this->brackets)) {
       
   238                     throw new Twig_Error_Syntax(sprintf('Unexpected "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename);
       
   239                 }
       
   240 
       
   241                 list($expect, $lineno) = array_pop($this->brackets);
       
   242                 if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) {
       
   243                     throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename);
       
   244                 }
       
   245             }
       
   246 
       
   247             $this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]);
       
   248             ++$this->cursor;
       
   249         }
       
   250         // strings
       
   251         elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) {
       
   252             $this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1)));
       
   253             $this->moveCursor($match[0]);
       
   254         }
       
   255         // unlexable
       
   256         else {
       
   257             throw new Twig_Error_Syntax(sprintf('Unexpected character "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename);
       
   258         }
       
   259     }
       
   260 
       
   261     protected function lexRawData()
       
   262     {
       
   263         if (!preg_match('/'.preg_quote($this->options['tag_block'][0], '/').'\s*endraw\s*'.preg_quote($this->options['tag_block'][1], '/').'/s', $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
       
   264             throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "block"'));
       
   265         }
       
   266         $text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor);
       
   267         $this->pushToken(Twig_Token::TEXT_TYPE, $text);
       
   268         $this->moveCursor($text.$match[0][0]);
       
   269     }
       
   270 
       
   271     protected function lexComment()
       
   272     {
       
   273         $commentEndRegex = '/(?:'.preg_quote($this->options['whitespace_trim'], '/')
       
   274                            .preg_quote($this->options['tag_comment'][1], '/').'\s*|'
       
   275                            .preg_quote($this->options['tag_comment'][1], '/').')\n?/s';
       
   276 
       
   277         if (!preg_match($commentEndRegex, $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) {
       
   278             throw new Twig_Error_Syntax('Unclosed comment', $this->lineno, $this->filename);
       
   279         }
       
   280 
       
   281         $this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]);
       
   282     }
       
   283 
       
   284     protected function pushToken($type, $value = '')
       
   285     {
       
   286         // do not push empty text tokens
       
   287         if (Twig_Token::TEXT_TYPE === $type && '' === $value) {
       
   288             return;
       
   289         }
       
   290 
       
   291         $this->tokens[] = new Twig_Token($type, $value, $this->lineno);
       
   292     }
       
   293 
       
   294     protected function moveCursor($text)
       
   295     {
       
   296         $this->cursor += strlen($text);
       
   297         $this->lineno += substr_count($text, "\n");
       
   298     }
       
   299 
       
   300     protected function getOperatorRegex()
       
   301     {
       
   302         if (null !== $this->operatorRegex) {
       
   303             return $this->operatorRegex;
       
   304         }
       
   305 
       
   306         $operators = array_merge(
       
   307             array('='),
       
   308             array_keys($this->env->getUnaryOperators()),
       
   309             array_keys($this->env->getBinaryOperators())
       
   310         );
       
   311 
       
   312         $operators = array_combine($operators, array_map('strlen', $operators));
       
   313         arsort($operators);
       
   314 
       
   315         $regex = array();
       
   316         foreach ($operators as $operator => $length) {
       
   317             // an operator that ends with a character must be followed by
       
   318             // a whitespace or a parenthesis
       
   319             if (ctype_alpha($operator[$length - 1])) {
       
   320                 $regex[] = preg_quote($operator, '/').'(?=[ ()])';
       
   321             } else {
       
   322                 $regex[] = preg_quote($operator, '/');
       
   323             }
       
   324         }
       
   325 
       
   326         return $this->operatorRegex = '/'.implode('|', $regex).'/A';
       
   327     }
       
   328 }