<?php

namespace App\Services\Markdown;

/**
 * Linter class is responsible for formatting markdown content.
 */
class Linter
{
    /**
     * The markdown content to be formatted.
     *
     * @var string
     */
    private $markdown;

    /**
     * Characters that mark the end of a sentence.
     *
     * @var array
     */
    private $phraseEndingChars = ['.', '!', '?'];

    /**
     * Constructor takes markdown content and prepares it for formatting.
     *
     * @param  string  $markdown  Markdown content to format.
     */
    public function __construct(?string $markdown = '')
    {
        $this->markdown = mb_convert_encoding($markdown, 'UTF-8');
    }

    /**
     * Format the markdown content by applying various formatting rules.
     *
     * @return string The formatted markdown.
     */
    public function format(): string
    {
        if (empty($this->markdown)) {
            return $this->markdown;
        }

        $blocks = $this->segmentMarkdown();

        $processedBlocks = array_map(function ($block) {
            $type = $this->determineBlockType($block);

            return $this->formatBlock($block, $type);
        }, $blocks);

        return implode("\n\n", $processedBlocks);
    }

    /**
     * Segment the markdown into blocks based on empty lines, respecting code blocks and multi-line HTML.
     *
     * @return array Array of blocks, each containing markdown content.
     */
    private function segmentMarkdown(): array
    {
        $blocks       = [];
        $currentBlock = '';
        $lines        = explode("\n", $this->markdown);
        $inCodeBlock  = false;

        foreach ($lines as $line) {
            if (preg_match('/^```/', trim($line))) {
                if ($inCodeBlock) {
                    // End of a code block
                    $currentBlock .= $line . "\n";
                    $blocks[]     = $currentBlock;
                    $currentBlock = '';
                    $inCodeBlock  = false;
                } else {
                    // Start of a code block
                    if (!empty($currentBlock)) {
                        $blocks[]     = $currentBlock;
                        $currentBlock = '';
                    }
                    $inCodeBlock = true;
                    $currentBlock .= $line . "\n";
                }
            } elseif ($inCodeBlock) {
                // Inside a code block
                $currentBlock .= $line . "\n";
            } else {
                // Normal line processing
                if (trim($line) === '' && trim($currentBlock) !== '') {
                    $blocks[]     = $currentBlock;
                    $currentBlock = '';
                } else {
                    $currentBlock .= $line . "\n";
                }
            }
        }

        // Add the last block if not empty
        if (!empty(trim($currentBlock))) {
            $blocks[] = $currentBlock;
        }

        return $blocks;
    }

    /**
     * Determine the type of a markdown block.
     *
     * @param  string  $block  The markdown block to analyze.
     * @return string The type of the block.
     */
    private function determineBlockType(string $block): string
    {
        if (preg_match('/^\s*```/', trim($block))) {
            return 'code';
        }

        if (preg_match('/^\s*<[^>]+>/', trim($block))) {
            return 'html';
        }

        if (preg_match('/^\s*#/', trim($block))) {
            return 'header';
        }

        if (preg_match('/^\s*\|/', trim($block))) {
            return 'table';
        }

        if (preg_match('/^\s*>\s/', trim($block))) {
            return 'blockquote';
        }

        if (
            preg_match('/^\s*-\s/', trim($block))
            || preg_match('/^\s*\d+\.\s/', trim($block))
        ) {
            return 'list';
        }

        if (preg_match('/^\s*\[\^[\w-]+\]:/', trim($block))) {
            return 'footnote';
        }

        return 'paragraph';  // Default to paragraph if no other type matches
    }

    /**
     * Apply formatting rules to a single markdown block based on its type.
     *
     * @param  string  $block  The markdown block to format.
     * @param  string  $type  The type of the block.
     * @return string The formatted block.
     */
    private function formatBlock(string $block, string $type): string
    {
        $block = trim($block, "\n");

        switch ($type) {
            case 'code':
                return $this->formatCodeBlock($block);
            case 'html':
                return $this->formatHtmlBlock($block);
            case 'header':
                return $this->formatHeaderBlock($block);
            case 'table':
                return $this->formatTableBlock($block);
            case 'blockquote':
                return $this->formatBlockquoteBlock($block);
            case 'list':
                return $this->formatListBlock($block);
            case 'footnote':
                return $this->formatFootnoteBlock($block);
            default:
                return $this->formatParagraphBlock($block);
        }
    }

    private function formatCodeBlock(string $block): string
    {
        // Split the block into lines
        $lines = explode("\n", $block);

        // Clean the first line if it starts with ```
        if (count($lines) > 0 && preg_match('/^```/', trim($lines[0]))) {
            $lines[0] = preg_replace('/^(```\w*)\s*{.*?}$/', '$1', trim($lines[0]));
        }

        $last = count($lines) - 1;

        $lines[$last] = '```';

        // Reassemble the block
        return implode("\n", $lines);
    }

    private function formatHtmlBlock(string $block): string
    {
        // HTML-specific formatting
        return $block;
    }

    private function formatFootnoteBlock(string $block): string
    {
        // HTML-specific formatting
        return $block;
    }

    private function formatHeaderBlock(string $block): string
    {
        // Header-specific formatting
        return $this->replaceUnderscoresWithAsterisks($block);
    }

    private function formatTableBlock(string $block): string
    {
        // HTML-specific formatting
        return $block;
    }

    private function formatBlockquoteBlock(string $block): string
    {
        // Blockquote-specific formatting
        return $block;
    }

    private function formatListBlock(string $block): string
    {
        // List-specific formatting
        return $block;
    }

    /**
     * Apply formatting rules to a paragraph block.
     *
     * @param  string  $block  The paragraph block to format.
     * @return string The formatted paragraph block.
     */
    private function formatParagraphBlock(string $block): string
    {
        // Normalize three dots and variants to the ellipsis character
        $block = preg_replace('/\.{3}(?!\.)/', '…', $block);

        // Remove unnecessary new lines within the paragraph
        $block = str_replace("\n", ' ', $block);

        // Normalize spaces (replace multiple spaces with a single space)
        $block = preg_replace('/\s+/', ' ', $block);

        // Avoid adding space in markdown links by temporarily replacing them
        preg_match_all('/\[[^\]]+\]\([^\)]+\)/', $block, $links);
        foreach ($links[0] as $index => $link) {
            $block = str_replace($link, "link_placeholder_{$index}", $block);
        }

        // Add space after punctuation
        $block = preg_replace('/(\S)([.!?…])(\s|$)/', '$1$2 ', $block);

        // Restore links
        foreach ($links[0] as $index => $link) {
            $block = str_replace("link_placeholder_{$index}", $link, $block);
        }

        $delimiter = sprintf('/(?<=[%s])\s+/u', implode('', array_map('preg_quote', $this->phraseEndingChars)));
        $sentences = preg_split($delimiter, $block, -1, PREG_SPLIT_NO_EMPTY);
        $sentences = array_map(function ($sentence) {
            // Replace underscores by asterisks when they are used as pairs and not part of markdown links
            $sentence = $this->replaceUnderscoresWithAsterisks($sentence);

            return trim($sentence);
        }, $sentences);

        // Join sentences by new lines
        $formattedParagraph = implode("\n", $sentences);

        return $formattedParagraph;
    }

    /**
     * Replace underscores with asterisks when used in pairs, not affecting markdown links.
     *
     * @param  string  $sentence  The sentence to process.
     * @return string The processed sentence.
     */
    private function replaceUnderscoresWithAsterisks(string $sentence): string
    {
        // Temporarily remove Markdown links to avoid processing underscores within them
        $patterns = [
            '/\[[^\]]+\]\([^\)]+\)/',  // Match links of the form [text](link)
            '/<[^>]+>/',              // Match links of the form <link>
            '/\[\^[^\]]+\]/',          // Match footnote references of the form [^footnote]
        ];

        $links = [];
        foreach ($patterns as $pattern) {
            preg_match_all($pattern, $sentence, $matches);
            foreach ($matches[0] as $index => $match) {
                // Store the link with a unique placeholder
                $placeholder         = sprintf('link-placeholder-%d-%d', count($links), $index);
                $links[$placeholder] = $match;
                $sentence            = str_replace($match, $placeholder, $sentence);
            }
        }

        // Replace all non-link underscore pairs
        $sentence = preg_replace_callback('/(_[^_]+_)/', function ($matches) {
            // Replace underscores with asterisks, but keep the content
            return str_replace('_', '*', $matches[0]);
        }, $sentence);

        // Restore the links
        foreach ($links as $placeholder => $link) {
            $sentence = str_replace($placeholder, $link, $sentence);
        }

        return $sentence;
    }
}