import fs = require("fs");
import { PerlDocument, PerlElem, PerlSymbolKind } from "./types";
import Uri from "vscode-uri";
import { isFile } from "./utils";

export async function getPod(elem: PerlElem, perlDoc: PerlDocument, modMap: Map<string, string>): Promise<string | undefined> {
    // File may not exists. Return nothing if it doesn't

    const absolutePath = await resolvePathForDoc(elem, perlDoc, modMap);

    if(!absolutePath) return;

    try {
        var fileContent = await fs.promises.readFile(absolutePath, "utf8");
    } catch {
        return;
    }

    // Initialize state variables
    let inPodBlock = false;
    let inRelevantBlock = true;
    let podContent = "";
    let podBuffer = ""; // We "buffer" pod when searching to avoid empty sections
    let meaningFullContent = false;
    let searchItem;
    if([PerlSymbolKind.Package, PerlSymbolKind.Module].includes(elem.type)){
        // Search all. Note I'm not really treating packages different from Modules
    } else if([PerlSymbolKind.ImportedSub, PerlSymbolKind.Method, PerlSymbolKind.Inherited, PerlSymbolKind.PathedField, 
                PerlSymbolKind.LocalMethod, PerlSymbolKind.LocalSub].includes(elem.type)){
        searchItem = elem.name;
        searchItem = searchItem.replace(/^[\w:]+::(\w+)$/, "$1"); // Remove package
    } else {
        return;
    }

    let markdown = "";

    // Quick search for leading comments of a very specific form with comment blocks the preceed a sub (and aren't simply get/set without docs)
    // These regexes are painful, but I didn't want to mix this with the line-by-line POD parsing which would overcomplicate that piece
    let match, match2;
    if(searchItem && (match = fileContent.match(`\\r?\\n#(?:####+| \-+) *(?:\\r?\\n# *)*${searchItem}\\r?\\n((?:(?:#.*| *)\\r?\\n)+)sub +${searchItem}\\b`))){
        // Ensure it's not an empty get/set pair.
        if(!( (match2 = searchItem.match(/^get_(\w+)$/)) && match[1].match(new RegExp(`^(?:# +set_${match2[1]}\\r?\\n)?[\\s#]*$`)))){
            let content = match[1].replace(/^ *#+ ?/gm,'');
            content = content.replace(/^\s+|\s+$/g,'');
            if(content){ // It may still be empty for non-get functions
                markdown += "```text\n" + content + "\n```\n"
            }
        }
    }

    // Split the file into lines and iterate through them
    const lines = fileContent.split(/\r?\n/);
    for (const line of lines) {
        if (line.startsWith("=cut")) {
            // =cut lines are not added.
            inPodBlock = false;
        }

         if (line.match(/^=(pod|head\d|over|item|back|begin|end|for|encoding)/)) {
            inPodBlock = true;
            meaningFullContent = false;
            if(searchItem && line.match(new RegExp(`^=(head\\d|item).*\\b${searchItem}\\b`))){
                // This is structured so if we hit two relevant block in a row, we keep them both
                inRelevantBlock = true;
            } else {
                inRelevantBlock = false;
                podBuffer = "";
            }
        } else if(line.match(/\w/)){
            // For this section, we found something that's not a header and has content
            meaningFullContent = true;
        }

        if(inPodBlock){
            if(searchItem){
                if(inRelevantBlock) {
                    podBuffer += line + "\n";   
                }
            }
            else {
                podContent += line + "\n";
            }
        }

        if(meaningFullContent && podBuffer != ""){
            podContent += podBuffer;
            podBuffer = "";
        }
    }
    
    markdown += convertPODToMarkdown(podContent);

    return markdown;
}


async function resolvePathForDoc(elem: PerlElem, perlDoc: PerlDocument, modMap: Map<string, string>): Promise<string | undefined> {
    let absolutePath = Uri.parse(elem.uri).fsPath;

    const foundPath = await fsPathOrAlt(absolutePath);
    if(foundPath){
        return foundPath;
    }

    if (elem.package) {
        let elemResolved = perlDoc.elems.get(elem.package);
        
        if(!elemResolved){

            // Looking up a module by the package name is only convention, but helps for things like POSIX
            const modUri = modMap.get(elem.package);
            if(modUri){
                let modPath = await fsPathOrAlt(Uri.parse(modUri).fsPath);
                if(modPath){
                    return modPath;
                }
            }
            return;
        }


        for (let potentialElem of elemResolved) {
            const potentialPath = Uri.parse(potentialElem.uri).fsPath;
            const foundPackPath = await fsPathOrAlt(potentialPath);
            if (foundPackPath) {
                return foundPackPath;
            }
        }
    }
    if(await badFile(absolutePath)){
        return;
    }

}

async function fsPathOrAlt(fsPath: string | undefined): Promise<string | undefined>{

    if(!fsPath){
        return;
    }

    if(/\.pm$/.test(fsPath)){
        let podPath = fsPath.replace(/\.pm$/, ".pod");
        if(!await badFile(podPath)){
            return podPath;
        }
    }
    if(!await badFile(fsPath)){
        return fsPath;
    }
    return;

}

async function badFile(fsPath: string): Promise<boolean> {

    if (!fsPath || fsPath.length <= 1) {
        return true;
    }

    if( /\w+\.c$/.test(fsPath) ){
        return true;
    }

    if(!(await isFile(fsPath))){
        return true;
    }

    return false;
}

type ConversionState = {
    inList: boolean;
    inVerbatim: boolean;
    inCustomBlock: boolean;
    markdown: string;
    encoding: string | null; // Currently processed, but not used
    waitingForListTitle: boolean;
};

const convertPODToMarkdown = (pod: string): string => {
    let finalMarkdown: string = "";
    let state: ConversionState = {
        inList: false,
        inVerbatim: false,
        inCustomBlock: false,
        markdown: "",
        encoding: null,
        waitingForListTitle: false,
    };

    const lines = pod.split("\n");

    for (let i = 0; i < lines.length; i++) {
        let line = lines[i];

        // Check for verbatim blocks first, perhaps ending a prior one
        if (shouldConsiderVerbatim(line) || state.inVerbatim) {
            state = processVerbatim(line, state);
            finalMarkdown += state.markdown;
            if (state.inVerbatim) {
                // Don't need to keep going if we're still in verbatim mode
                continue;
            }
        }

        // Inline transformations for code, bold, etc.
        line = processInlineElements(line);

        // Handling =pod to start documentation
        if (line.startsWith("=pod")) {
            continue; // Generally, we just skip this.
        }
        // Headings
        else if (line.startsWith("=head")) {
            const output = processHeadings(line);

            if(/\w/.test(finalMarkdown) || !/^\n##+ NAME\n$/.test(output)){
                // I find it a waste of space to include the headline "NAME". We're short on space in the hover 
                finalMarkdown += output;
            }
        }
        // List markers and items
        else if (line.startsWith("=over") || line.startsWith("=item") || line.startsWith("=back") || state.waitingForListTitle) {
            state = processList(line, state);
            finalMarkdown += state.markdown;
        }
        // Custom blocks like =begin and =end
        else if (line.startsWith("=begin") || line.startsWith("=end")) {
            state = processCustomBlock(line, state);
            finalMarkdown += state.markdown;
        }
        // Format-specific blocks like =for
        else if (line.startsWith("=for")) {
            finalMarkdown += processFormatSpecificBlock(line);
        }
        // Encoding
        else if (line.startsWith("=encoding")) {
            state = processEncoding(line, state);
        }

        else if(state.inList){
            if(line){
                finalMarkdown += ` ${line} `;
            }
        }
        // Generic text
        else {
            finalMarkdown += `${line}\n`;
        }
    }

    return finalMarkdown;
};

const processHeadings = (line: string): string => {
    // Extract the heading level from the line. This will be a number from 1-6.
    let level = parseInt(line.slice(5, 6));
    level = Math.min(level, 3); // Maximum 6 indentation levels in Markdown
    // Ensure that the heading level is valid.
    if (isNaN(level) || level < 1 || level > 6) {
        return "";
    }

    // Extract the actual text of the heading, which follows the =head command.
    const text = line.slice(7).trim();

    // Convert the heading to its Markdown equivalent. I marked head1 -> ### because I prefer the compact form.
    const markdownHeading = `\n##${"#".repeat(level)} ${text}\n`;

    return markdownHeading;
};

const processList = (line: string, state: ConversionState): ConversionState => {
    let markdown: string = "";

    // The =over command starts a list.
    if (line.startsWith("=over")) {
        state.inList = true;
        markdown = "\n";
    }

    // The =item command denotes a list item.
    else if (/^=item \*\s*$/.test(line)) {
        state.waitingForListTitle= true;
        markdown = "";
    } else if (state.waitingForListTitle && /[^\s]/.test(line)) {
        state.waitingForListTitle = false;
        markdown = `\n- ${line}  \n  `;
    }

    // The =item command denotes a list item.
    else if (line.startsWith("=item")) {
        state.inList = true;

        // Remove the '=item' part to get the actual text for the list item.
        let listItem = line.substring(6).trim();
	if (listItem.startsWith("* ")) // Doubled up list identifiers
		listItem = listItem.replace("*", "");
        markdown = `\n- ${listItem}  \n  `; // Unordered list
    }
    // The =back command ends the list.
    else if (line.startsWith("=back")) {
        state.inList = false;
        markdown = "\n";
    }

    return {
        ...state,
        markdown,
    };
};

const processCustomBlock = (line: string, state: ConversionState): ConversionState => {
    let markdown = "";

    // =begin starts a custom block
    if (line.startsWith("=begin")) {
        // Extract the format following =begin
        const format = line.slice(7).trim();
        state.inCustomBlock = true;

        // Choose Markdown representation based on the format
        switch (format) {
            case "code":
                markdown = "```perl\n";
                break;
            // Add cases for other formats as needed
            default:
                markdown = `<!-- begin ${format} -->\n`;
                break;
        }
    }
    // =end ends the custom block
    else if (line.startsWith("=end")) {
        // Extract the format following =end
        const format = line.slice(5).trim();
        state.inCustomBlock = false;

        // Close the Markdown representation
        switch (format) {
            case "code":
                markdown = "```\n";
                break;
            // Add cases for other formats as needed
            default:
                markdown = `<!-- end ${format} -->\n`;
                break;
        }
    }

    return {
        ...state,
        markdown,
    };
};

const processFormatSpecificBlock = (line: string): string => {
    // The `=for` command itself is followed by the format and then the text.
    const parts = line.split(" ").slice(1);

    if (parts.length < 2) {
        return "";
    }

    // Extract the format and the actual text.
    const format = parts[0].trim();
    const text = parts.slice(1).join(" ").trim();

    // Choose the Markdown representation based on the format.
    let markdown = "";
    switch (format) {
        case "text":
            // Plain text, just add it.
            markdown = `${text}\n`;
            break;
        case "html":
            // If it's HTML, encapsulate it within comments for safety.
            markdown = `<!-- HTML: ${text} -->\n`;
            break;
        // Add more cases as you find the need for other specific formats.
        default:
            // For unsupported or custom formats, wrap it in a comment.
            markdown = `<!-- ${format} block: ${text} -->\n`;
            break;
    }

    return markdown;
};

// Mapping backticks to the Unicode non-character U+FFFF which is not allowed to appear in text
const tempPlaceholder = '\uFFFF';

const processInlineElements = (line: string): string => {

    line = line.replaceAll('`', tempPlaceholder);

    // WWW::Mechanize is a good test for this one. Code blocks with embedded link
    line = line.replace(/C<([^<>]*)L<< (?:.+?\|\/?)?(.+?) >>([^<>]*)>/g, "C<< $1 $2 $3 >>");

    // Handle code (C<code>), while allowing E<> replacements
    line = line.replace(/C<((?:[^<>]|[EL]<[^<>]+>)+?)>/g, (match, code) => escapeBackticks(code));

    // Unfortunately doesn't require the <<< to be matched in quantity. E<> is allowed automatically
    line = line.replace(/C<< (.+?) >>/g, (match, code) => escapeBackticks(code));
    line = line.replace(/C<<<+ (.+?) >+>>/g, (match, code) => escapeBackticks(code));

    // Handle special characters (E<entity>)
    line = line.replace(/E<([^>]+)>/g, (match, entity) => convertE(entity));

    // Mapping the Unicode non-character U+FFFF back to escaped backticks
    line = line.replace(new RegExp(tempPlaceholder, 'g'), '\\`');

    // Handle bold (B<bold>)
    line = line.replace(/B<([^<>]+)>/g, "**$1**");
    line = line.replace(/B<< (.+?) >>/g, "**$1**");

    // Handle italics (I<italic>)
    line = line.replace(/I<([^<>]+)>/g, "*$1*");
    line = line.replace(/I<< (.+?) >>/g, "*$1*");

    // Handle links (L<name>), URLS auto-link in vscode's markdown
    line = line.replace(/L<(http[^>]+)>/g, " $1 ");

    line = line.replace(/L<([^<>]+)>/g, "`$1`");
    line = line.replace(/L<< (.*?) >>/g, "`$1`");

    // Handle non-breaking spaces (S<text>)
    line = line.replace(/S<([^<>]+)>/g, "$1");

    // Handle file names (F<name>), converting to italics
    line = line.replace(/F<([^<>]+)>/g, "*$1*");

    // Handle index entries (X<entry>), ignoring as Markdown doesn't have an index
    line = line.replace(/X<([^<>]+)>/g, "");

    // Escape HTML entities last since we use them above
    line = escapeHTML(line);

    return line;
};


function escapeRegExp(str: string): string {
    return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
  }
  


const escapeHTML = (str: string): string => {
    const map: { [key: string]: string } = {
        "&": "&amp;",
        "<": "&lt;",
        ">": "&gt;",
        '"': "&quot;",
        "'": "&#039;",
        "\\\\": "\\", // Two backslashes become one

        // These are required for the regex to consume & to ensure they don't get mapped to amp style.
        "\\&": "\\&", 
        "\\<": "\\<", 
        '\\"': '\\"', 
        "\\'": "\\'", 
    };

    // If the number of backticks is odd, it means backticks are unbalanced
    const backtickCount = (str.match(/`/g) || []).length;
    const segments = str.split("`");

    if (backtickCount % 2 !== 0 || segments.length % 2 === 0) {
        // Handle the unbalanced backticks here
        str = str.replaceAll("`", "");
    }

    // Escape special characters and create a regex pattern
    const pattern = new RegExp( Object.keys(map).map(escapeRegExp).join('|'), 'g' );

    for (let i = 0; i < segments.length; i += 2) {
        segments[i] = segments[i].replace(pattern, (m) => map[m]);
    }

    return segments.join("`");
};

const escapeBackticks = (str: string): string => {
    let count = (str.match(new RegExp(tempPlaceholder, 'g')) || []).length;
    str = str.replace(new RegExp(tempPlaceholder, 'g'), '`'); // Backticks inside don't need to be escaped.
    let delimiters = "`".repeat(count + 1);
    return `${delimiters}${str}${delimiters}`;
};

const convertE = (content: string): string => {
    
    switch (content) {
        case "lt":
            return "<";
        case "gt":
            return ">";
        case "verbar":
            return "|";
        case "sol":
            return "/";
        default:
            if (/^0x[\da-fA-F]+$/.test(content)) {
                return String.fromCodePoint(parseInt(content.substring(2), 16));
            } else if (/^0[0-7]+$/.test(content)) {
                return String.fromCodePoint(parseInt(content.substring(1), 8));
            } else if (/^\d+$/.test(content)) {
                return String.fromCodePoint(parseInt(content, 10));
            } else {
                return `&${content};`;
            }
    }
};

// Determine if the line should start a verbatim text block
const shouldConsiderVerbatim = (line: string): boolean => {
    // A verbatim block starts with a whitespace but isn't part of a list
    return /^\s+/.test(line);
};

// Process verbatim text blocks
const processVerbatim = (line: string, state: ConversionState): ConversionState => {
    let markdown = "";
    if (/^\s+/.test(line)) {
        // If this is the start of a new verbatim block, add Markdown code fence
        if (!state.inVerbatim) {
            markdown += "\n```\n";
        }
        state.inVerbatim = true;

        // Trim some starting whitespace and add the line to the block
        // Most pod code has 4 spaces or a tab, but I find 2 space indents most readable in the space constrained pop-up
        markdown += line.replace(/^(?:\s{4}|\t)/, "  ") + "\n";
    }
    // } else if(/^\s+/.test(line)){
    //     // Verbatim blocks in lists are tricky. Let's just do one line at a time for now so we don't need to keep track of indentation
    //     markdown = "```\n" + line + "```\n";
    //     state.isLineVerbatim = true;
    // }
    else if (state.inVerbatim) {
        // This line ends the verbatim block
        state.inVerbatim = false;
        markdown += "```\n"; // End the Markdown code fence
    }

    return {
        ...state,
        markdown,
    };
};

const processEncoding = (line: string, state: ConversionState): ConversionState => {
    // Extract the encoding type from the line
    const encodingType = line.split(" ")[1]?.trim();

    if (encodingType) {
        return {
            ...state,
            encoding: encodingType,
        };
    }

    return state;
};
