cms11/app/Services/Markdown/Linter.php

<?php

namespace App\Services\Markdown;

/**
 * Linter class is responsible for formatting markdown content.
 */
class Linter
{
    /**
     * The markdown content to be formatted.
     *
     * @var string
     */
    private $markdown;

    /**
     * Characters that mark the end of a sentence.
     *
     * @var array
     */
    private $phraseEndingChars = ['.', '!', '?'];

    private $htmlPlaceholders = [];

    /**
     * Constructor takes markdown content and prepares it for formatting.
     *
     * @param  string  $markdown  Markdown content to format.
     */
    public function __construct(?string $markdown = '')
    {
        $this->markdown = mb_convert_encoding($markdown, 'UTF-8');
    }

    /**
     * Format the markdown content by applying various formatting rules.
     *
     * @return string The formatted markdown.
     */
    public function format(): string
    {
        return $this->markdown;
        $this->markdown = $this->replaceHtmlWithPlaceholders($this->markdown);

        if (empty($this->markdown)) {
            return $this->markdown;
        }

        $blocks = $this->segmentMarkdown();

        $processedBlocks = array_map(function ($block) {
            $type = $this->determineBlockType($block);

            return $this->formatBlock($block, $type);
        }, $blocks);

        $result = implode("\n\n", $processedBlocks);
        $result = $this->restoreHtmlFromPlaceholders($result);

        return $result;
    }

    private function restoreHtmlFromPlaceholders($text)
    {
        return str_replace(array_keys($this->htmlPlaceholders), array_values($this->htmlPlaceholders), $text);
    }

    private function replaceHtmlWithPlaceholders($text)
    {
        $index = 0;

        return preg_replace_callback('/<([a-zA-Z0-9\-]+)([^>]*)>(.*?)<\/\1>/s', function ($matches) use (&$index) {
            $placeholder                          = "<!-- html-placeholder-{$index} -->";
            $this->htmlPlaceholders[$placeholder] = $matches[0];  // Store the whole match
            $index++;

            return $placeholder;
        }, $text);
    }

    private function segmentMarkdown(): array
    {
        $blocks       = [];
        $currentBlock = '';
        $lines        = explode("\n", $this->markdown);
        $inCodeBlock  = false;

        foreach ($lines as $line) {
            if (preg_match('/^```/', trim($line))) {
                if ($inCodeBlock) {
                    // End of a code block
                    $currentBlock .= $line . "\n";
                    $blocks[]     = $currentBlock;
                    $currentBlock = '';
                    $inCodeBlock  = false;
                } else {
                    // Start of a code block
                    if (!empty($currentBlock)) {
                        $blocks[]     = $currentBlock;
                        $currentBlock = '';
                    }
                    $inCodeBlock = true;
                    $currentBlock .= $line . "\n";
                }
            } elseif ($inCodeBlock) {
                // Inside a code block
                $currentBlock .= $line . "\n";
            } else {
                // Normal line processing
                if (trim($line) === '' && trim($currentBlock) !== '') {
                    $blocks[]     = $currentBlock;
                    $currentBlock = '';
                } else {
                    $currentBlock .= $line . "\n";
                }
            }
        }

        // Add the last block if not empty
        if (!empty(trim($currentBlock))) {
            $blocks[] = $currentBlock;
        }

        return $blocks;
    }

    /**
     * Determine the type of a markdown block.
     *
     * @param  string  $block  The markdown block to analyze.
     * @return string The type of the block.
     */
    private function determineBlockType(string $block): string
    {
        if (preg_match('/^\s*```/', trim($block))) {
            return 'code';
        }

        if (preg_match('/^\s*#/', trim($block))) {
            return 'header';
        }

        if (preg_match('/^\s*\|/', trim($block))) {
            return 'table';
        }

        if (preg_match('/^\s*>\s/', trim($block))) {
            return 'blockquote';
        }

        if (
            preg_match('/^\s*-\s/', trim($block))
            || preg_match('/^\s*\d+\.\s/', trim($block))
        ) {
            return 'list';
        }

        if (preg_match('/^\s*\[\^[\w-]+\]:/', trim($block))) {
            return 'footnote';
        }

        return 'paragraph';  // Default to paragraph if no other type matches
    }

    /**
     * Apply formatting rules to a single markdown block based on its type.
     *
     * @param  string  $block  The markdown block to format.
     * @param  string  $type  The type of the block.
     * @return string The formatted block.
     */
    private function formatBlock(string $block, string $type): string
    {
        $block = trim($block, "\n");

        switch ($type) {
            case 'code':
                return $this->formatCodeBlock($block);
            case 'header':
                return $this->formatHeaderBlock($block);
            case 'table':
                return $this->formatTableBlock($block);
            case 'blockquote':
                return $this->formatBlockquoteBlock($block);
            case 'list':
                return $this->formatListBlock($block);
            case 'footnote':
                return $this->formatFootnoteBlock($block);
            default:
                return $this->formatParagraphBlock($block);
        }
    }

    private function formatCodeBlock(string $block): string
    {
        // Split the block into lines
        $lines = explode("\n", $block);

        // Clean the first line if it starts with ```
        if (count($lines) > 0 && preg_match('/^```/', trim($lines[0]))) {
            $lines[0] = preg_replace('/^(```\w*)\s*{.*?}$/', '$1', trim($lines[0]));
        }

        $last = count($lines) - 1;

        $lines[$last] = '```';

        // Reassemble the block
        return implode("\n", $lines);
    }

    private function formatFootnoteBlock(string $block): string
    {
        // HTML-specific formatting
        return $block;
    }

    private function formatHeaderBlock(string $block): string
    {
        // Header-specific formatting
        return $this->replaceUnderscoresWithAsterisks($block);
    }

    private function formatTableBlock(string $block): string
    {
        // Table formatting
        return $block;
    }

    private function formatBlockquoteBlock(string $block): string
    {
        // Blockquote-specific formatting
        return $block;
    }

    private function formatListBlock(string $block): string
    {
        // List-specific formatting
        return $block;
    }

    /**
     * Apply formatting rules to a paragraph block.
     *
     * @param  string  $block  The paragraph block to format.
     * @return string The formatted paragraph block.
     */
    private function formatParagraphBlock(string $block): string
    {
        // Normalize three dots and variants to the ellipsis character
        $block = preg_replace('/\.{3}(?!\.)/', '…', $block);

        // Remove unnecessary new lines within the paragraph
        $block = str_replace("\n", ' ', $block);

        // Normalize spaces (replace multiple spaces with a single space)
        $block = preg_replace('/\s+/', ' ', $block);

        // Avoid adding space in markdown links by temporarily replacing them
        preg_match_all('/\[[^\]]+\]\([^\)]+\)/', $block, $links);
        foreach ($links[0] as $index => $link) {
            $block = str_replace($link, "link_placeholder_{$index}", $block);
        }

        // Add space after punctuation
        $block = preg_replace('/(\S)([.!?…])(\s|$)/', '$1$2 ', $block);

        // Restore links
        foreach ($links[0] as $index => $link) {
            $block = str_replace("link_placeholder_{$index}", $link, $block);
        }

        $delimiter = sprintf('/(?<=[%s])\s+/u', implode('', array_map('preg_quote', $this->phraseEndingChars)));
        $sentences = preg_split($delimiter, $block, -1, PREG_SPLIT_NO_EMPTY);
        $sentences = array_map(function ($sentence) {
            // Replace underscores by asterisks when they are used as pairs and not part of markdown links
            $sentence = $this->replaceUnderscoresWithAsterisks($sentence);

            return trim($sentence);
        }, $sentences);

        // Join sentences by new lines
        $formattedParagraph = implode("\n", $sentences);

        return $formattedParagraph;
    }

    /**
     * Replace underscores with asterisks when used in pairs, not affecting markdown links.
     *
     * @param  string  $sentence  The sentence to process.
     * @return string The processed sentence.
     */
    private function replaceUnderscoresWithAsterisks(string $sentence): string
    {
        // Temporarily remove Markdown links to avoid processing underscores within them
        $patterns = [
            '/\[[^\]]+\]\([^\)]+\)/',  // Match links of the form [text](link)
            '/<[^>]+>/',              // Match links of the form <link>
            '/\[\^[^\]]+\]/',          // Match footnote references of the form [^footnote]
        ];

        $links = [];
        foreach ($patterns as $pattern) {
            preg_match_all($pattern, $sentence, $matches);
            foreach ($matches[0] as $index => $match) {
                // Store the link with a unique placeholder
                $placeholder         = sprintf('link-placeholder-%d-%d', count($links), $index);
                $links[$placeholder] = $match;
                $sentence            = str_replace($match, $placeholder, $sentence);
            }
        }

        // Replace all non-link underscore pairs
        $sentence = preg_replace_callback('/(_[^_]+_)/', function ($matches) {
            // Replace underscores with asterisks, but keep the content
            return str_replace('_', '*', $matches[0]);
        }, $sentence);

        // Restore the links
        foreach ($links as $placeholder => $link) {
            $sentence = str_replace($placeholder, $link, $sentence);
        }

        return $sentence;
    }
}
Basic - although quite huge - infrastructure for rendering bundles 2024-04-20 23:27:47 +02:00			`<?php`

			`namespace App\Services\Markdown;`

			`/**`
			`* Linter class is responsible for formatting markdown content.`
			`*/`
			`class Linter`
			`{`
			`/**`
			`* The markdown content to be formatted.`
			`*`
			`* @var string`
			`*/`
			`private $markdown;`

			`/**`
			`* Characters that mark the end of a sentence.`
			`*`
			`* @var array`
			`*/`
			`private $phraseEndingChars = ['.', '!', '?'];`

Added almost everything needed to browse the website 2024-04-23 23:55:48 +02:00			`private $htmlPlaceholders = [];`

Basic - although quite huge - infrastructure for rendering bundles 2024-04-20 23:27:47 +02:00			`/**`
			`* Constructor takes markdown content and prepares it for formatting.`
			`*`
			`* @param string $markdown Markdown content to format.`
			`*/`
			`public function __construct(?string $markdown = '')`
			`{`
			`$this->markdown = mb_convert_encoding($markdown, 'UTF-8');`
			`}`

			`/**`
			`* Format the markdown content by applying various formatting rules.`
			`*`
			`* @return string The formatted markdown.`
			`*/`
			`public function format(): string`
			`{`
Added almost everything needed to browse the website 2024-04-23 23:55:48 +02:00			`return $this->markdown;`
			`$this->markdown = $this->replaceHtmlWithPlaceholders($this->markdown);`

Basic - although quite huge - infrastructure for rendering bundles 2024-04-20 23:27:47 +02:00			`if (empty($this->markdown)) {`
			`return $this->markdown;`
			`}`

			`$blocks = $this->segmentMarkdown();`

			`$processedBlocks = array_map(function ($block) {`
			`$type = $this->determineBlockType($block);`

			`return $this->formatBlock($block, $type);`
			`}, $blocks);`

Added almost everything needed to browse the website 2024-04-23 23:55:48 +02:00			`$result = implode("\n\n", $processedBlocks);`
			`$result = $this->restoreHtmlFromPlaceholders($result);`

			`return $result;`
			`}`

			`private function restoreHtmlFromPlaceholders($text)`
			`{`
			`return str_replace(array_keys($this->htmlPlaceholders), array_values($this->htmlPlaceholders), $text);`
			`}`

			`private function replaceHtmlWithPlaceholders($text)`
			`{`
			`$index = 0;`

			`return preg_replace_callback('/<([a-zA-Z0-9\-]+)([^>])>(.?)<\/\1>/s', function ($matches) use (&$index) {`
			`$placeholder = "<!-- html-placeholder-{$index} -->";`
			`$this->htmlPlaceholders[$placeholder] = $matches[0]; // Store the whole match`
			`$index++;`

			`return $placeholder;`
			`}, $text);`
Basic - although quite huge - infrastructure for rendering bundles 2024-04-20 23:27:47 +02:00			`}`

			`private function segmentMarkdown(): array`
			`{`
			`$blocks = [];`
			`$currentBlock = '';`
			`$lines = explode("\n", $this->markdown);`
			`$inCodeBlock = false;`

			`foreach ($lines as $line) {`
			if (preg_match('/^```/', trim($line))) {
			`if ($inCodeBlock) {`
			`// End of a code block`
			`$currentBlock .= $line . "\n";`
			`$blocks[] = $currentBlock;`
			`$currentBlock = '';`
			`$inCodeBlock = false;`
			`} else {`
			`// Start of a code block`
			`if (!empty($currentBlock)) {`
			`$blocks[] = $currentBlock;`
			`$currentBlock = '';`
			`}`
			`$inCodeBlock = true;`
			`$currentBlock .= $line . "\n";`
			`}`
			`} elseif ($inCodeBlock) {`
			`// Inside a code block`
			`$currentBlock .= $line . "\n";`
			`} else {`
			`// Normal line processing`
			`if (trim($line) === '' && trim($currentBlock) !== '') {`
			`$blocks[] = $currentBlock;`
			`$currentBlock = '';`
			`} else {`
			`$currentBlock .= $line . "\n";`
			`}`
			`}`
			`}`

			`// Add the last block if not empty`
			`if (!empty(trim($currentBlock))) {`
			`$blocks[] = $currentBlock;`
			`}`

			`return $blocks;`
			`}`

			`/**`
			`* Determine the type of a markdown block.`
			`*`
			`* @param string $block The markdown block to analyze.`
			`* @return string The type of the block.`
			`*/`
			`private function determineBlockType(string $block): string`
			`{`
			if (preg_match('/^\s*```/', trim($block))) {
			`return 'code';`
			`}`

			`if (preg_match('/^\s*#/', trim($block))) {`
			`return 'header';`
			`}`

			`if (preg_match('/^\s*\\|/', trim($block))) {`
			`return 'table';`
			`}`

			`if (preg_match('/^\s*>\s/', trim($block))) {`
			`return 'blockquote';`
			`}`

			`if (`
			`preg_match('/^\s*-\s/', trim($block))`
			`\|\| preg_match('/^\s*\d+\.\s/', trim($block))`
			`) {`
			`return 'list';`
			`}`

			`if (preg_match('/^\s*\[\^[\w-]+\]:/', trim($block))) {`
			`return 'footnote';`
			`}`

			`return 'paragraph'; // Default to paragraph if no other type matches`
			`}`

			`/**`
			`* Apply formatting rules to a single markdown block based on its type.`
			`*`
			`* @param string $block The markdown block to format.`
			`* @param string $type The type of the block.`
			`* @return string The formatted block.`
			`*/`
			`private function formatBlock(string $block, string $type): string`
			`{`
			`$block = trim($block, "\n");`

			`switch ($type) {`
			`case 'code':`
			`return $this->formatCodeBlock($block);`
			`case 'header':`
			`return $this->formatHeaderBlock($block);`
			`case 'table':`
			`return $this->formatTableBlock($block);`
			`case 'blockquote':`
			`return $this->formatBlockquoteBlock($block);`
			`case 'list':`
			`return $this->formatListBlock($block);`
			`case 'footnote':`
			`return $this->formatFootnoteBlock($block);`
			`default:`
			`return $this->formatParagraphBlock($block);`
			`}`
			`}`

			`private function formatCodeBlock(string $block): string`
			`{`
			`// Split the block into lines`
			`$lines = explode("\n", $block);`

			// Clean the first line if it starts with ```
			if (count($lines) > 0 && preg_match('/^```/', trim($lines[0]))) {
			$lines[0] = preg_replace('/^(```\w)\s{.*?}$/', '$1', trim($lines[0]));
			`}`

Fix: Last line of code block was wrong 2024-04-21 23:45:09 +02:00			`$last = count($lines) - 1;`

			$lines[$last] = '```';

Basic - although quite huge - infrastructure for rendering bundles 2024-04-20 23:27:47 +02:00			`// Reassemble the block`
			`return implode("\n", $lines);`
			`}`

			`private function formatFootnoteBlock(string $block): string`
			`{`
			`// HTML-specific formatting`
			`return $block;`
			`}`

			`private function formatHeaderBlock(string $block): string`
			`{`
			`// Header-specific formatting`
			`return $this->replaceUnderscoresWithAsterisks($block);`
			`}`

			`private function formatTableBlock(string $block): string`
			`{`
Added almost everything needed to browse the website 2024-04-23 23:55:48 +02:00			`// Table formatting`
Basic - although quite huge - infrastructure for rendering bundles 2024-04-20 23:27:47 +02:00			`return $block;`
			`}`

			`private function formatBlockquoteBlock(string $block): string`
			`{`
			`// Blockquote-specific formatting`
			`return $block;`
			`}`

			`private function formatListBlock(string $block): string`
			`{`
			`// List-specific formatting`
			`return $block;`
			`}`

			`/**`
			`* Apply formatting rules to a paragraph block.`
			`*`
			`* @param string $block The paragraph block to format.`
			`* @return string The formatted paragraph block.`
			`*/`
			`private function formatParagraphBlock(string $block): string`
			`{`
			`// Normalize three dots and variants to the ellipsis character`
			`$block = preg_replace('/\.{3}(?!\.)/', '…', $block);`

			`// Remove unnecessary new lines within the paragraph`
			`$block = str_replace("\n", ' ', $block);`

			`// Normalize spaces (replace multiple spaces with a single space)`
			`$block = preg_replace('/\s+/', ' ', $block);`

			`// Avoid adding space in markdown links by temporarily replacing them`
			`preg_match_all('/\[[^\]]+\]\([^\)]+\)/', $block, $links);`
			`foreach ($links[0] as $index => $link) {`
			`$block = str_replace($link, "link_placeholder_{$index}", $block);`
			`}`

			`// Add space after punctuation`
			`$block = preg_replace('/(\S)([.!?…])(\s\|$)/', '$1$2 ', $block);`

			`// Restore links`
			`foreach ($links[0] as $index => $link) {`
			`$block = str_replace("link_placeholder_{$index}", $link, $block);`
			`}`

			`$delimiter = sprintf('/(?<=[%s])\s+/u', implode('', array_map('preg_quote', $this->phraseEndingChars)));`
			`$sentences = preg_split($delimiter, $block, -1, PREG_SPLIT_NO_EMPTY);`
			`$sentences = array_map(function ($sentence) {`
			`// Replace underscores by asterisks when they are used as pairs and not part of markdown links`
			`$sentence = $this->replaceUnderscoresWithAsterisks($sentence);`

			`return trim($sentence);`
			`}, $sentences);`

			`// Join sentences by new lines`
			`$formattedParagraph = implode("\n", $sentences);`

			`return $formattedParagraph;`
			`}`

			`/**`
			`* Replace underscores with asterisks when used in pairs, not affecting markdown links.`
			`*`
			`* @param string $sentence The sentence to process.`
			`* @return string The processed sentence.`
			`*/`
			`private function replaceUnderscoresWithAsterisks(string $sentence): string`
			`{`
			`// Temporarily remove Markdown links to avoid processing underscores within them`
			`$patterns = [`
			`'/\[[^\]]+\]\([^\)]+\)/', // Match links of the form [text](link)`
			`'/<[^>]+>/', // Match links of the form <link>`
			`'/\[\^[^\]]+\]/', // Match footnote references of the form [^footnote]`
			`];`

			`$links = [];`
			`foreach ($patterns as $pattern) {`
			`preg_match_all($pattern, $sentence, $matches);`
			`foreach ($matches[0] as $index => $match) {`
			`// Store the link with a unique placeholder`
			`$placeholder = sprintf('link-placeholder-%d-%d', count($links), $index);`
			`$links[$placeholder] = $match;`
			`$sentence = str_replace($match, $placeholder, $sentence);`
			`}`
			`}`

			`// Replace all non-link underscore pairs`
			`$sentence = preg_replace_callback('/(_[^_]+_)/', function ($matches) {`
			`// Replace underscores with asterisks, but keep the content`
			`return str_replace('_', '*', $matches[0]);`
			`}, $sentence);`

			`// Restore the links`
			`foreach ($links as $placeholder => $link) {`
			`$sentence = str_replace($placeholder, $link, $sentence);`
			`}`

			`return $sentence;`
			`}`
			`}`