diff --git a/src/InputSymbols/Block.php b/src/InputSymbols/Block.php index fbfaa6e..5c1d9f8 100644 --- a/src/InputSymbols/Block.php +++ b/src/InputSymbols/Block.php @@ -8,14 +8,16 @@ class Block private Delimiter $symbolClose; private bool $shouldTokenizeContent; + private bool $splitAffixIntoSymbols; /** * Block constructor. * @param string $symbolOpen * @param string|null $symbolClose * @param bool $shouldTokenizeContent + * @param bool $splitAffixIntoSymbols */ - public function __construct(string $symbolOpen, ?string $symbolClose, bool $shouldTokenizeContent) + public function __construct(string $symbolOpen, ?string $symbolClose, bool $shouldTokenizeContent, bool $splitAffixIntoSymbols = false) { $this->symbolOpen = new Delimiter($symbolOpen); @@ -26,6 +28,7 @@ public function __construct(string $symbolOpen, ?string $symbolClose, bool $shou } $this->shouldTokenizeContent = $shouldTokenizeContent; + $this->splitAffixIntoSymbols = $splitAffixIntoSymbols; } /** @@ -66,6 +69,14 @@ public function shouldTokenizeContent(): bool return $this->shouldTokenizeContent; } + /** + * @return bool + */ + public function splitAffixIntoSymbols(): bool + { + return $this->splitAffixIntoSymbols; + } + public function __toString() { return "{$this->open()}{$this->close()}"; diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 71ad9f3..9984068 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -4,6 +4,7 @@ use ricwein\Tokenizer\InputSymbols\Block; use ricwein\Tokenizer\InputSymbols\Delimiter; +use ricwein\Tokenizer\Result\BaseToken; use ricwein\Tokenizer\Result\TokenStream; use ricwein\Tokenizer\Result\BlockToken; use ricwein\Tokenizer\Result\Token; @@ -89,7 +90,7 @@ private function process(string $input, int $depth, int $line): array // abort tokenizing after reaching the max block depth // just return the input string as the remaining symbol if ($this->maxDepth > 0 && $depth >= $this->maxDepth) { - return [new Token($input, null)]; + return [new Token($input, null, $line)]; } /** @var BlockToken[]|Token[] $result */ @@ -98,6 +99,7 @@ private function process(string $input, int $depth, int $line): array /** @var array|null $openBlocks 'block' => BlockToken, 'startOffset' => int */ $openBlocks = []; + /** @var BaseToken|null $lastSymbol */ $lastSymbol = null; /** @var Delimiter|null $lastDelimiter */ @@ -176,9 +178,18 @@ private function process(string $input, int $depth, int $line): array $resultBlock = new BlockToken($block, $lastDelimiter, $line); if ($lastOffset < $offset) { - $prefix = ltrim(substr($input, $lastOffset, $offset - $lastOffset)); + $prefix = trim(substr($input, $lastOffset, $offset - $lastOffset)); if (!empty($prefix)) { - $resultBlock->withPrefix($prefix); + if ($block->splitAffixIntoSymbols()) { + $lastSymbol = new Token($prefix, $lastDelimiter, $line); + + $resultBlock->setDelimiter(null); + $lastDelimiter = null; + + $result[] = $lastSymbol; + } else { + $resultBlock->withPrefix($prefix); + } } } @@ -204,8 +215,15 @@ private function process(string $input, int $depth, int $line): array // encounter of symbol directly after an block (no delimiter in between) if ($lastSymbol instanceof BlockToken) { + if (!empty($content)) { - $lastSymbol->withSuffix($content); + + if ($lastSymbol->block()->splitAffixIntoSymbols()) { + $result[] = new Token($content, null, $line); + } else { + $lastSymbol->withSuffix($content); + } + } // we need to reset the last-symbol, since we processed the @@ -233,15 +251,21 @@ private function process(string $input, int $depth, int $line): array } } + // handle remaining tokens $remaining = ltrim($remaining, ' '); if (strlen($remaining) > 0) { if ($lastSymbol instanceof BlockToken) { - $lastSymbol->withSuffix($remaining); + + if ($lastSymbol->block()->splitAffixIntoSymbols()) { + $result[] = new Token(ltrim($remaining), null, $line); + } else { + $lastSymbol->withSuffix($remaining); + } + } else { $result[] = new Token($remaining, $lastDelimiter, $line); } } - return $result; } diff --git a/tests/TokenizerTest.php b/tests/TokenizerTest.php index 2bd27ba..0d42061 100644 --- a/tests/TokenizerTest.php +++ b/tests/TokenizerTest.php @@ -20,11 +20,14 @@ protected function setUp(): void $delimiter = [new Delimiter('.'), new Delimiter('|'), new Delimiter(',')]; $blocks = [ - new Block('[', ']', true), - new Block('(', ')', true), - new Block('{', '}', false), - new Block('\'', null, false), - new Block('"', null, false), + new Block('[', ']', true, false), + new Block('(', ')', true, false), + new Block('{', '}', false, false), + new Block('\'', null, false, false), + new Block('"', null, false, false), + + new Block('{{', '}}', false, true), + new Block('{%', '%}', true, true), ]; $this->tokenizer = new Tokenizer($delimiter, $blocks); @@ -291,7 +294,7 @@ public function testLineTracking() $testString = file_get_contents(__DIR__ . '/test.txt'); $expected = [ new Token('first', null), - (new BlockToken(new Block('(', ')', true), new Delimiter('.'), 2))->withPrefix('second' . PHP_EOL)->withSymbols([ + (new BlockToken(new Block('(', ')', true), new Delimiter('.'), 2))->withPrefix('second')->withSymbols([ new Token('line:2', null, 2), ])->withSuffix(PHP_EOL . 'end' . PHP_EOL), ]; @@ -307,7 +310,33 @@ public function testLineTracking() new Token('end', new Delimiter(PHP_EOL), 3), ]; $this->assertEquals(new TokenStream($expected), $customTokenizer->tokenize($testString)); + } + + public function testAffixSplitting() + { + $testString = "before {{ test }} after"; + $expected = [ + new Token('before', null), + (new BlockToken(new Block('{{', '}}', false, true), null))->withSymbols([ + new Token(' test ', null), + ]), + new Token('after', null), + ]; + $this->assertEquals(new TokenStream($expected), $this->tokenizer->tokenize($testString)); + $testString = "before.one {% test.first %} 'after'"; + $expected = [ + new Token('before', null), + new Token('one', new Delimiter('.')), + (new BlockToken(new Block('{%', '%}', true, true), null))->withSymbols([ + new Token(' test', null), + new Token('first ', new Delimiter('.')), + ]), + (new BlockToken(new Block('\'', '\'', false), null))->withSymbols([ + new Token('after', null), + ]), + ]; + $this->assertEquals(new TokenStream($expected), $this->tokenizer->tokenize($testString)); } }