/**
 * Function parses article to extract information about h2 headers, this information
 * is cleaned up and returned as headline data containing h2 text and its 'slugified' id.
 * Ids are also assigned to <h2> tags in article body.
 *
 * Stored header information is used later to construct interactive table of contents.
 * @param text
 * @param type
 */
import {DomUtils, parseDocument} from "htmlparser2";

import {deburr} from "@pg-mono/nodash";

import {ITableOfContentsElement} from "../actions/fetch_article_detail";

interface IParsedArticleText {
    articleText: string;
    articleHeaderIds: string[] | null;
    tableOfContents: ITableOfContentsElement[];
}

const TABLE_OF_CONTENTS_TINYMCE_CLASS = "mce-toc";
export const extractHeadlines = (text: string | null, type: "article" | "district"): IParsedArticleText => {
    if (!text) {
        return {
            articleText: "",
            tableOfContents: [],
            articleHeaderIds: null
        };
    }

    const headlinesList: ITableOfContentsElement[] = [];
    const parsedArticleTextDOM = parseDocument(text, {decodeEntities: true});

    // need to remove table of contents created by tinymce plugin
    const tableOfContentsElementWrapper = DomUtils.filter((element: Element) => {
        return DomUtils.getAttributeValue(element, "class") === TABLE_OF_CONTENTS_TINYMCE_CLASS;
    }, parsedArticleTextDOM);

    // remove element with class="mce-toc"
    tableOfContentsElementWrapper.length > 0 && DomUtils.removeElement(tableOfContentsElementWrapper[0]);

    let lastH2: ITableOfContentsElement = {id: null, text: null, elements: []};

    DomUtils.findAll((childNode: {tagName: string; attribs: {id: string}}) => {
        const childNodeText = DomUtils.getText(childNode);

        if (childNode.tagName === "h2" && childNodeText !== "Spis treści") {
            let slugifiedText = deburr(childNodeText)
                .trim()
                .replace(/\s+/g, "-")
                .replace(/[^\w-]+/g, "")
                .toLowerCase();

            if (type === "district") {
                slugifiedText = slugifiedText.replace(/^\d+-/gm, "");
            }

            childNode.attribs.id = slugifiedText;

            // Create new h2 entry in the list
            lastH2 = {id: slugifiedText, text: childNodeText, elements: []};
            headlinesList.push(lastH2);
        }

        return false;
    }, parsedArticleTextDOM.childNodes);
    // create one list of all id's from h2 and h3 elements
    const headlinesAllIds =
        headlinesList && headlinesList.length > 0
            ? [
                  ...headlinesList.map((headline) => headline.id).filter((id) => id != null),
                  ...headlinesList.flatMap((headline) => headline.elements?.map((element) => element.id).filter((id) => id != null))
              ]
            : null;

    return {
        articleText: DomUtils.getOuterHTML(parsedArticleTextDOM),
        tableOfContents: headlinesList ? headlinesList : [],
        articleHeaderIds: headlinesAllIds ? headlinesAllIds : null
    };
};
/**
 * Function parses article to extract information about h2 headers, this information
 * is cleaned up and returned as headline data containing h2 text and its 'slugified' id.
 * Ids are also assigned to <h2> tags in article body.
 *
 * Stored header information is used later to construct interactive table of contents.
 * @param text
 * @param type
 */
import {DomUtils, parseDocument} from "htmlparser2";

import {deburr} from "@pg-mono/nodash";

import {ITableOfContentsElement} from "../actions/fetch_article_detail";

interface IParsedArticleText {
    articleText: string;
    articleHeaderIds: string[] | null;
    tableOfContents: ITableOfContentsElement[];
}

const TABLE_OF_CONTENTS_TINYMCE_CLASS = "mce-toc";
export const extractHeadlines = (text: string | null, type: "article" | "district"): IParsedArticleText => {
    if (!text) {
        return {
            articleText: "",
            tableOfContents: [],
            articleHeaderIds: null
        };
    }

    const headlinesList: ITableOfContentsElement[] = [];
    const parsedArticleTextDOM = parseDocument(text, {decodeEntities: true});

    // need to remove table of contents created by tinymce plugin
    const tableOfContentsElementWrapper = DomUtils.filter((element: Element) => {
        return DomUtils.getAttributeValue(element, "class") === TABLE_OF_CONTENTS_TINYMCE_CLASS;
    }, parsedArticleTextDOM);

    // remove element with class="mce-toc"
    tableOfContentsElementWrapper.length > 0 && DomUtils.removeElement(tableOfContentsElementWrapper[0]);

    let lastH2: ITableOfContentsElement = {id: null, text: null, elements: []};

    DomUtils.findAll((childNode: {tagName: string; attribs: {id: string}}) => {
        const childNodeText = DomUtils.getText(childNode);

        if (childNode.tagName === "h2" && childNodeText !== "Spis treści") {
            let slugifiedText = deburr(childNodeText)
                .trim()
                .replace(/\s+/g, "-")
                .replace(/[^\w-]+/g, "")
                .toLowerCase();

            if (type === "district") {
                slugifiedText = slugifiedText.replace(/^\d+-/gm, "");
            }

            childNode.attribs.id = slugifiedText;

            // Create new h2 entry in the list
            lastH2 = {id: slugifiedText, text: childNodeText, elements: []};
            headlinesList.push(lastH2);
        }

        return false;
    }, parsedArticleTextDOM.childNodes);
    // create one list of all id's from h2 and h3 elements
    const headlinesAllIds =
        headlinesList && headlinesList.length > 0
            ? [
                  ...headlinesList.map((headline) => headline.id).filter((id) => id != null),
                  ...headlinesList.flatMap((headline) => headline.elements?.map((element) => element.id).filter((id) => id != null))
              ]
            : null;

    return {
        articleText: DomUtils.getOuterHTML(parsedArticleTextDOM),
        tableOfContents: headlinesList ? headlinesList : [],
        articleHeaderIds: headlinesAllIds ? headlinesAllIds : null
    };
};
