Skip to content
  • P
    Projects
  • G
    Groups
  • S
    Snippets
  • Help

semour / semour_admin

  • This project
    • Loading...
  • Sign in
Go to a project
  • Project
  • Repository
  • Issues 0
  • Merge Requests 0
  • Pipelines
  • Wiki
  • Snippets
  • Settings
  • Activity
  • Graph
  • Charts
  • Create a new issue
  • Jobs
  • Commits
  • Issue Boards
  • Files
  • Commits
  • Branches
  • Tags
  • Contributors
  • Graph
  • Compare
  • Charts
Find file
BlameHistoryPermalink
Switch branch/tag
  • semour_admin
  • vendor
  • theseer
  • tokenizer
  • src
  • Tokenizer.php
  • mushishixian's avatar
    Initial commit · 585b9d09
    mushishixian committed 2 years ago
    585b9d09
Tokenizer.php 3.36 KB
Edit
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
<?php declare(strict_types = 1);
namespace TheSeer\Tokenizer;

class Tokenizer {

    /**
     * Token Map for "non-tokens"
     *
     * @var array
     */
    private $map = [
        '(' => 'T_OPEN_BRACKET',
        ')' => 'T_CLOSE_BRACKET',
        '[' => 'T_OPEN_SQUARE',
        ']' => 'T_CLOSE_SQUARE',
        '{' => 'T_OPEN_CURLY',
        '}' => 'T_CLOSE_CURLY',
        ';' => 'T_SEMICOLON',
        '.' => 'T_DOT',
        ',' => 'T_COMMA',
        '=' => 'T_EQUAL',
        '<' => 'T_LT',
        '>' => 'T_GT',
        '+' => 'T_PLUS',
        '-' => 'T_MINUS',
        '*' => 'T_MULT',
        '/' => 'T_DIV',
        '?' => 'T_QUESTION_MARK',
        '!' => 'T_EXCLAMATION_MARK',
        ':' => 'T_COLON',
        '"' => 'T_DOUBLE_QUOTES',
        '@' => 'T_AT',
        '&' => 'T_AMPERSAND',
        '%' => 'T_PERCENT',
        '|' => 'T_PIPE',
        '$' => 'T_DOLLAR',
        '^' => 'T_CARET',
        '~' => 'T_TILDE',
        '`' => 'T_BACKTICK'
    ];

    public function parse(string $source): TokenCollection {
        $result = new TokenCollection();

        if ($source === '') {
            return $result;
        }

        $tokens = \token_get_all($source);

        $lastToken = new Token(
            $tokens[0][2],
            'Placeholder',
            ''
        );

        foreach ($tokens as $pos => $tok) {
            if (\is_string($tok)) {
                $token = new Token(
                    $lastToken->getLine(),
                    $this->map[$tok],
                    $tok
                );
                $result->addToken($token);
                $lastToken = $token;

                continue;
            }

            $line   = $tok[2];
            $values = \preg_split('/\R+/Uu', $tok[1]);

            foreach ($values as $v) {
                $token = new Token(
                    $line,
                    \token_name($tok[0]),
                    $v
                );
                $lastToken = $token;
                $line++;

                if ($v === '') {
                    continue;
                }

                $result->addToken($token);
            }
        }

        return $this->fillBlanks($result, $lastToken->getLine());
    }

    private function fillBlanks(TokenCollection $tokens, int $maxLine): TokenCollection {
        $prev = new Token(
            0,
            'Placeholder',
            ''
        );

        $final = new TokenCollection();

        foreach ($tokens as $token) {
            if ($prev === null) {
                $final->addToken($token);
                $prev = $token;

                continue;
            }

            $gap = $token->getLine() - $prev->getLine();

            while ($gap > 1) {
                $linebreak = new Token(
                    $prev->getLine() + 1,
                    'T_WHITESPACE',
                    ''
                );
                $final->addToken($linebreak);
                $prev = $linebreak;
                $gap--;
            }

            $final->addToken($token);
            $prev = $token;
        }

        $gap = $maxLine - $prev->getLine();

        while ($gap > 0) {
            $linebreak = new Token(
                $prev->getLine() + 1,
                'T_WHITESPACE',
                ''
            );
            $final->addToken($linebreak);
            $prev = $linebreak;
            $gap--;
        }

        return $final;
    }
}