* @license http://opensource.org/licenses/GPL-2.0 GNU Public License */ class Formatter { /** * The formatting options. * * @var array */ public $options; /** * Clauses that must be inlined. * * These clauses usually are short and it's nicer to have them inline. * * @var array */ public static $INLINE_CLAUSES = array( 'CREATE' => true, 'LIMIT' => true, 'PARTITION BY' => true, 'PARTITION' => true, 'PROCEDURE' => true, 'SUBPARTITION BY' => true, 'VALUES' => true, ); /** * Constructor. * * @param array $options The formatting options. */ public function __construct(array $options = array()) { // The specified formatting options are merged with the default values. $this->options = array_merge( array( /** * The format of the result. * * @var string The type ('text', 'cli' or 'html') */ 'type' => php_sapi_name() == 'cli' ? 'cli' : 'text', /** * The line ending used. * By default, for text this is "\n" and for HTML this is "
". * * @var string */ 'line_ending' => $this->options['type'] == 'html' ? '
' : "\n", /** * The string used for indentation. * * @var string */ 'indentation' => ' ', /** * Whether comments should be removed or not. * * @var bool */ 'remove_comments' => false, /** * Whether each clause should be on a new line. * * @var bool */ 'clause_newline' => true, /** * Whether each part should be on a new line. * Parts are delimited by brackets and commas. * * @var bool */ 'parts_newline' => true, /** * Whether each part of each clause should be indented. * * @var bool */ 'indent_parts' => true, /** * The styles used for HTML formatting. * array($type, $flags, $span, $callback) * * @var array[] */ 'formats' => array( array( 'type' => Token::TYPE_KEYWORD, 'flags' => Token::FLAG_KEYWORD_RESERVED, 'html' => 'class="sql-reserved"', 'cli' => "\e[35m", 'function' => 'strtoupper', ), array( 'type' => Token::TYPE_KEYWORD, 'flags' => 0, 'html' => 'class="sql-keyword"', 'cli' => "\e[95m", 'function' => 'strtoupper', ), array( 'type' => Token::TYPE_COMMENT, 'flags' => 0, 'html' => 'class="sql-comment"', 'cli' => "\e[37m", 'function' => '', ), array( 'type' => Token::TYPE_BOOL, 'flags' => 0, 'html' => 'class="sql-atom"', 'cli' => "\e[36m", 'function' => 'strtoupper', ), array( 'type' => Token::TYPE_NUMBER, 'flags' => 0, 'html' => 'class="sql-number"', 'cli' => "\e[92m", 'function' => 'strtolower', ), array( 'type' => Token::TYPE_STRING, 'flags' => 0, 'html' => 'class="sql-string"', 'cli' => "\e[91m", 'function' => '', ), array( 'type' => Token::TYPE_SYMBOL, 'flags' => 0, 'html' => 'class="sql-variable"', 'cli' => "\e[36m", 'function' => '', ), ) ), $options ); // `parts_newline` requires `clause_newline` $this->options['parts_newline'] &= $this->options['clause_newline']; } /** * Formats the given list of tokens. * * @param TokensList $list The list of tokens. * * @return string */ public function formatList($list) { /** * The query to be returned. * * @var string $ret */ $ret = ''; /** * The indentation level. * * @var int $indent */ $indent = 0; /** * Whether the line ended. * * @var bool $lineEnded */ $lineEnded = false; /** * Whether current group is short (no linebreaks) * * @var bool $shortGroup */ $shortGroup = false; /** * The name of the last clause. * * @var string $lastClause */ $lastClause = ''; /** * A stack that keeps track of the indentation level every time a new * block is found. * * @var array $blocksIndentation */ $blocksIndentation = array(); /** * A stack that keeps track of the line endings every time a new block * is found. * * @var array $blocksLineEndings */ $blocksLineEndings = array(); /** * Whether clause's options were formatted. * * @var bool $formattedOptions */ $formattedOptions = false; /** * Previously parsed token. * * @var Token $prev */ $prev = null; /** * Comments are being formatted separately to maintain the whitespaces * before and after them. * * @var string $comment */ $comment = ''; // In order to be able to format the queries correctly, the next token // must be taken into consideration. The loop below uses two pointers, // `$prev` and `$curr` which store two consecutive tokens. // Actually, at every iteration the previous token is being used. for ($list->idx = 0; $list->idx < $list->count; ++$list->idx) { /** * Token parsed at this moment. * * @var Token $curr */ $curr = $list->tokens[$list->idx]; if ($curr->type === Token::TYPE_WHITESPACE) { // Whitespaces are skipped because the formatter adds its own. continue; } elseif ($curr->type === Token::TYPE_COMMENT) { // Whether the comments should be parsed. if (!empty($this->options['remove_comments'])) { continue; } if ($list->tokens[$list->idx - 1]->type === Token::TYPE_WHITESPACE) { // The whitespaces before and after are preserved for // formatting reasons. $comment .= $list->tokens[$list->idx - 1]->token; } $comment .= $this->toString($curr); if (($list->tokens[$list->idx + 1]->type === Token::TYPE_WHITESPACE) && ($list->tokens[$list->idx + 2]->type !== Token::TYPE_COMMENT) ) { // Adding the next whitespace only there is no comment that // follows it immediately which may cause adding a // whitespace twice. $comment .= $list->tokens[$list->idx + 1]->token; } // Everything was handled here, no need to continue. continue; } // Checking if pointers were initialized. if ($prev !== null) { // Checking if a new clause started. if (static::isClause($prev) !== false) { $lastClause = $prev->value; $formattedOptions = false; } // The options of a clause should stay on the same line and everything that follows. if (($this->options['parts_newline']) && (!$formattedOptions) && (empty(self::$INLINE_CLAUSES[$lastClause])) && (($curr->type !== Token::TYPE_KEYWORD) || (($curr->type === Token::TYPE_KEYWORD) && ($curr->flags & Token::FLAG_KEYWORD_FUNCTION))) ) { $formattedOptions = true; $lineEnded = true; ++$indent; } // Checking if this clause ended. if ($tmp = static::isClause($curr)) { if (($tmp == 2) || ($this->options['clause_newline'])) { $lineEnded = true; if ($this->options['parts_newline']) { --$indent; } } } // Indenting BEGIN ... END blocks. if (($prev->type === Token::TYPE_KEYWORD) && ($prev->value === 'BEGIN')) { $lineEnded = true; array_push($blocksIndentation, $indent); ++$indent; } elseif (($curr->type === Token::TYPE_KEYWORD) && ($curr->value === 'END')) { $lineEnded = true; $indent = array_pop($blocksIndentation); } // Formatting fragments delimited by comma. if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === ',')) { // Fragments delimited by a comma are broken into multiple // pieces only if the clause is not inlined or this fragment // is between brackets that are on new line. if (((empty(self::$INLINE_CLAUSES[$lastClause])) && ! $shortGroup && ($this->options['parts_newline'])) || (end($blocksLineEndings) === true) ) { $lineEnded = true; } } // Handling brackets. // Brackets are indented only if the length of the fragment between // them is longer than 30 characters. if (($prev->type === Token::TYPE_OPERATOR) && ($prev->value === '(')) { array_push($blocksIndentation, $indent); $shortGroup = true; if (static::getGroupLength($list) > 30) { ++$indent; $lineEnded = true; $shortGroup = false; } array_push($blocksLineEndings, $lineEnded); } elseif (($curr->type === Token::TYPE_OPERATOR) && ($curr->value === ')')) { $indent = array_pop($blocksIndentation); $lineEnded |= array_pop($blocksLineEndings); $shortGroup = false; } // Delimiter must be placed on the same line with the last // clause. if ($curr->type === Token::TYPE_DELIMITER) { $lineEnded = false; } // Adding the token. $ret .= $this->toString($prev); // Finishing the line. if ($lineEnded) { if ($indent < 0) { // TODO: Make sure this never occurs and delete it. $indent = 0; } if ($curr->type !== Token::TYPE_COMMENT) { $ret .= $this->options['line_ending'] . str_repeat($this->options['indentation'], $indent); } $lineEnded = false; } else { // If the line ended there is no point in adding whitespaces. // Also, some tokens do not have spaces before or after them. if (!((($prev->type === Token::TYPE_OPERATOR) && (($prev->value === '.') || ($prev->value === '('))) // No space after . ( || (($curr->type === Token::TYPE_OPERATOR) && (($curr->value === '.') || ($curr->value === ',') || ($curr->value === '(') || ($curr->value === ')'))) // No space before . , ( ) || (($curr->type === Token::TYPE_DELIMITER)) && (mb_strlen($curr->value, 'UTF-8') < 2)) // A space after delimiters that are longer than 2 characters. || ($prev->value === 'DELIMITER') ) { $ret .= ' '; } } } if (!empty($comment)) { $ret .= $comment; $comment = ''; } // Iteration finished, consider current token as previous. $prev = $curr; } if ($this->options['type'] === 'cli') { return $ret . "\e[0m"; } return $ret; } /** * Tries to print the query and returns the result. * * @param Token $token The token to be printed. * * @return string */ public function toString($token) { $text = $token->token; foreach ($this->options['formats'] as $format) { if (($token->type === $format['type']) && (($token->flags & $format['flags']) === $format['flags']) ) { // Running transformation function. if (!empty($format['function'])) { $func = $format['function']; $text = $func($text); } // Formatting HTML. if ($this->options['type'] === 'html') { return '' . $text . ''; } elseif ($this->options['type'] === 'cli') { return $format['cli'] . $text; } break; } } if ($this->options['type'] === 'cli') { return "\e[39m" . $text; } return $text; } /** * Formats a query. * * @param string $query The query to be formatted * @param array $options The formatting options. * * @return string The formatted string. */ public static function format($query, array $options = array()) { $lexer = new Lexer($query); $formatter = new Formatter($options); return $formatter->formatList($lexer->list); } /** * Computes the length of a group. * * A group is delimited by a pair of brackets. * * @param TokensList $list The list of tokens. * * @return int */ public static function getGroupLength($list) { /** * The number of opening brackets found. * This counter starts at one because by the time this function called, * the list already advanced one position and the opening bracket was * already parsed. * * @var int $count */ $count = 1; /** * The length of this group. * * @var int $length */ $length = 0; for ($idx = $list->idx; $idx < $list->count; ++$idx) { // Counting the brackets. if ($list->tokens[$idx]->type === Token::TYPE_OPERATOR) { if ($list->tokens[$idx]->value === '(') { ++$count; } elseif ($list->tokens[$idx]->value === ')') { --$count; if ($count == 0) { break; } } } // Keeping track of this group's length. $length += mb_strlen($list->tokens[$idx]->value, 'UTF-8'); } return $length; } /** * Checks if a token is a statement or a clause inside a statement. * * @param Token $token The token to be checked. * * @return int|bool */ public static function isClause($token) { if ((($token->type === Token::TYPE_NONE) && (strtoupper($token->token) === 'DELIMITER')) || (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$STATEMENT_PARSERS[$token->value]))) ) { return 2; } elseif (($token->type === Token::TYPE_KEYWORD) && (isset(Parser::$KEYWORD_PARSERS[$token->value]))) { return 1; } return false; } }