import { SourceType } from "../config";

function pointedCharacters2Epidoc(text: string): string {
    // var text = text.replace(/([aA-zZ])\x{323}/g, '<unclear>$1</unclear>');    //a
    //text = text.replace(/\u1E5B\/u/g, '<unclear>r</unclear>');    //r
    /*        console.log("Texte dot ---:" + text);*/
    return text;

};

/**
 * Convert an input text using different variants of the Leiden conventions.
 * Adopted from the [Ausohnum Library](https://gitlab.huma-num.fr/estudium/ausohnum-library/-/blob/master/resources/scripts/teiEditor/ancientTextImportRules.js).
 * 
 * @param text content to convert
 * @param importSource the type of the input
 * @returns resulting epiDoc XML
 */
export function ancientText2XML(text: string, importSource: SourceType): string {
    const textImportMode = 'newText';
    const startingLineNumber = 1;

    //Clean tabs
    text = text.toString().replace(/\t+/g, '')
    ///r to /n
    text = text.replace(/\r/g, '');


    if (text.toString().length - text.toString().lastIndexOf(" ") == 1) {
        text = text.toString().substring(0, text.toString().lastIndexOf(" "));
    }

    //First Line
    if (textImportMode === "newText") {
        text = '<lb n="' + startingLineNumber.toString() + '"/>' + text;

        /*
         *****************************
         *        Line breaks        *
         *****************************/

        //New lines no word break
        const regexLine = /(\-|-?|\=?)\s?\n/g;
        let index = startingLineNumber - 1;
        text = text.replace(regexLine, function (match, selection) {
            if ((selection === "-") || (selection === "=")) { 
                return '\n<lb n="' + (index++ + 2) + '" break="no"/>'; 
            } else { 
                return '\n<lb n="' + (index++ + 2) + '"/>'; 
            }
        });

        //All line in lacuna [— — — — — — — — — — —] 
        text = text.replace(/(\<lb n=\"[0-9]*\"\/\>)\[(\s?—\s?){3,50}\]\n(\<lb n=\"[0-9]*\"\/\>)/g, '$1<gap reason="illegible" quantity="1" unit="line"/>\n$3');

        if (importSource == SourceType.edcs) {
            index = 0;
            text = text.replace(/(\s)?(\/\/)(?!\>)(\s)?/g, function (match) {
                if (match[0] === "/") { var breakNo = ' break="no"' }
                else { var breakNo = "" };
                return '\n<cb n="' + (index++ + 2) + '"' + breakNo + '/>';
            });

            //Line breaks
            const regexLine = /(\s)?(\/{1})(?!\>)(\s)?/g;
            index = 0;
            text = text.replace(regexLine, function (match) {
                if (match[0] === "/") { var breakNo = ' break="no"' }
                else { var breakNo = "" };
                console.log("BreakNo = " + breakNo);
                console.log("Match 0= " + match[0]);
                return '\n<lb n="' + (index++ + 2) + '"' + breakNo + '/>';
            });

        };

        if (importSource == SourceType.phi) {
            text = text.replace(/#⁷/g, '<gap reason="illegible" quantity="1" unit="character"/>');
            text = text.replace(/#⁷#⁷#⁷/g, '<gap reason="illegible" quantity="3" unit="character"/>');
            text = text.replace(/#⁵⁶/g, '<g type="interpunct">▴</g>');

        }; //End of features specific to PHI
        //Lines 5, 10, 15, etc.
        const regexLines5 = /\n<lb n=\'([0-9])\'\/>\1\s/g;
        const substLines5 = '\n<lb n="$1"/>';
        text = text.replace(regexLines5, substLines5);

        /* Removing original line number*/
        const regexLineClean = /(\"[0-9]{1,3}\"\/>)([0-9]{1,3})/g;
        const substLineClean = "$1";
        text = text.replace(regexLineClean, substLineClean);
    }; //End of line breaks if insertMode is newText

    /*
     *****************************
     *       Corrections         *
     *****************************/
    const regexCorrection = /\<([^\x00-\x7F]*[aA-zZ]*)(?!\=)(?!\/)\>/g;
    const substCorrection = '<supplied reason="omitted">$1</supplied>';
    text = text.replace(regexCorrection, substCorrection);

    const regexCorrectionOther = /⟨([^\x00-\x7F]*[aA-zZ]*)(?!\=)(?!\/)⟩/g;
    const substCorrectionOther = '<supplied reason="omitted">$1</supplied>';
    text = text.replace(regexCorrectionOther, substCorrectionOther);

    /****************************/
    /*      EDCS <x=Y>*/
    /******************************/
    if (importSource == SourceType.edcs) {
        /*        console.log("in EDCS2 for " + text);*/

        const regexCorrection2EDCS = /\<([^\x00-\x7F]*[aA-zZ]*)(\=)([^\x00-\x7F]*[aA-zZ]*)(?!\/)\>/g;
        const substCorrection2EDCS = "<choice>"
            + "<corr>$1</corr>"
            + "<sic>$3</sic></choice>"

        text = text.replace(regexCorrection2EDCS, substCorrection2EDCS);

        /* Line in lacuna [6]*/
        text = text.replace(/\[6\]/g,
            '<gap reason="lost" quantity="1" unit="line"/>');
        text = text.replace(/\[3\]/g,
            '<gap reason="lost" extent="unknown" unit="character"/>');
        text = text.replace(/\[3\s([^\x00-\x7F]*[aA-zZ]*)\]/g,
            '<gap reason="lost" extent="unknown" unit="character"/>'
            + '<supplied reason="lost">$1</supplied>');

        text = text.replace(/\[3\s/g,
            '<gap reason="lost" extent="unknown" unit="character"/>'
            + '<supplied reason="lost">');

        text = text.replace(/\s3\s/g,
            ' - - - '
        );
        text = text.replace(/\s3\]/g,
            ' - - -]'
        );
        text = text.replace(/a\(\)/g, '<abbr>a</abbr>');


    };
    /*    End of EDCS-specific features*/

    /*<note> (!), (sic)...*/
    text = text.replace(/\(\!\)/g, '<note>!</note>');
    text = text.replace(/\(sic\)/g, '<note>sic</note>');

    //Lacunae  [.....10.....]

    //[– – –]
    let regex = /\[(\–|\-)\s?(\–|\-)\s?(\–|\-)\]/g
    text = text.replace(regex, function (match, selection) {
        return '<gap reason="lost" extent="unknown" unit="character"/>';
    });


    //Lacunae with precise number of letters [...6...]
    regex = /\[(?:(?:\s?\.\s?){1,99})([1-9][0-9]*)(?:(?:\s?\.\s?){1,99})\]/g
    text = text.replace(regex, function (match, selection) {
        return '<gap reason="lost" quantity="' + selection + '" unit="character"/>';
    });

    //Lacunae with precise number of letters   [...6...SPACEtext
    regex = /\[(?:(?:\s?\.\s?){1,99})([1-9][0-9]*)(?:(?:\s?\.\s?){1,99})\s/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '<gap reason="lost" quantity="' + selection + '" unit="character"/><supplied reason="lost">';
    });


    //Lacunae with precise number of letters   textSPACE...6...]
    regex = /\s(?:(?:\s?\.\s?){1,99})([1-9][0-9]*)(?:(?:\s?\.\s?){1,99})\]/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '</supplied><gap reason="lost" quantity="' + selection + '" unit="character"/>';
    });

    //Lacunae with precise number of letters   textSPACE...SPACEtext
    regex = /\s(?:(?:\s?\.\s?){1,99})([1-9][0-9]*)(?:(?:\s?\.\s?){1,99})\s/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '</supplied><gap reason="lost" quantity="' + selection + '" unit="character"/><supplied reason="lost">';
    });


    //Lacunae with precise number of letters [....]
    regex = /\[((\.){1,99})\]/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '<gap reason="lost" quantity="' + length + '" unit="character"/>';
    });

    //Lacunae with precise number of letters   [...SPACEtext
    regex = /\[((\.){1,99})\s/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '<gap reason="lost" quantity="' + length + '" unit="character"/><supplied reason="lost">';
    });


    //Lacunae with precise number of letters   textSPACE...]
    regex = /\s((\.){1,99})\]/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '</supplied><gap reason="lost" quantity="' + length + '" unit="character"/>';
    });
    //Lacunae with precise number of letters   textSPACE...SPACEtext
    regex = /\s((\.){1,99})\s/g
    text = text.replace(regex, function (match, selection) {
        const length = match.length - 2;
        return '</supplied><gap reason="lost" quantity="' + length + '" unit="character"/><supplied reason="lost">';
    });

    //[ca. 5-7]
    text = text.replace(/\[(-|–|\.\s?){1,20}ca\.(\s?)([1-9][0-9]*)((-)([1-9][0-9]*))(\s?)(-|–|\.\s?){1,20}\]/g,
        '<gap reason="lost" atLeast="$3" atMost="$6" '
        + 'unit="character"/>');
    //[ca. 5]
    text = text.replace(/\[(-|–|\.\s?){1,20}ca\.(\s?)([1-9][0-9]*)(\s?)(-|–|\.\s?){1,20}\]/g,
        '<gap reason="lost" quantity="$3" '
        + 'unit="character" precision="low"/>');

    //[...c.5-7...] 
    text = text.replace(
        /\[(?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:(?:-)([1-9][0-9]*))(?:\s?)(?:[\.․]){1,20}(?:\s?)\]/g,
        '<gap reason="lost" atLeast="$1" atMost="$2" unit="character"/>');
    //[...c.5...] 
    text = text.replace(
        /\[(?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:\s?)(?:[\.․]){1,20}(?:\s?)\]/g,
        '<gap reason="lost" quantity="$1" unit="character" precision="low"/>');

    //[...c.5-7... 
    text = text.replace(
        /\[(?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:(?:-)([1-9][0-9]*))(?:\s?)(?:[\.․]){1,20}(?:\s)/g,
        '<gap reason="lost" atLeast="$1" atMost="$2" unit="character"/><supplied reason="lost">');
    //[...c.5... 
    text = text.replace(
        /\[(?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:\s?)(?:[\.․]){1,20}(?:\s)/g,
        '<gap reason="lost" quantity="$1" unit="character" precision="low"/><supplied reason="lost">');

    //[text ...c.5-7...] 
    text = text.replace(
        /\s(?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:(?:-)([1-9][0-9]*))(?:\s?)(?:[\.․]){1,20}(?:\s?)\]/g,
        '</supplied><gap reason="lost" atLeast="$1" atMost="$2" unit="character"/>');
    //[text ...c.5...] 
    text = text.replace(
        /\s(?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:\s?)(?:[\.․]){1,20}(?:\s?)\]/g,
        '</supplied><gap reason="lost" quantity="$1" unit="character" precision="low"/>');

    //...c.5-7... 
    text = text.replace(
        /\s(?:[\.․]){2,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:(?:-)([1-9][0-9]*))(?:\s?)(?:[\.․]){1,20}(?:\s)/g,
        '</supplied><gap reason="lost" atLeast="$1" atMost="$2" unit="character"/><supplied reason="lost">');

    //...c.5... 
    text = text.replace(
        /\s(?:[\.․]){2,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:\s?)(?:[\.․]){1,20}(?:\s)/g,
        '</supplied><gap reason="lost" quantity="$1" unit="character" precision="low"/><supplied reason="lost">');
    //Symbol (centurio)
    text = text.replace(/(?:\s|\n)\(([^\x00-\x7F]*[aA-zZ]*)\)/g,
        ' <expan><ex>$1</ex></expan>');


    //Replacing double [[ with ⟦
    text = text.replace(/\[{2}/g, '⟦');
    //Replacing double ]] with ⟧
    text = text.replace(/\]{2}/g, '⟧');


    /* Abbreviation with [] [Q]ui(ina) */
    text = text.replace(/\[([^\x00-\x7F]*[aA-zZ]*[^\]])\]([^\x00-\x7F]*[aA-zZ]*[^\]])\(([^\x00-\x7F]*?[aA-zZ]*?)\)/g,
        '<expan><abbr><supplied reason="lost">$1</supplied>$2</abbr><ex>$3</ex></expan>');
    /* Abbreviation with Rasura [Q]ui(ina) */
    text = text.replace(/(〚|⟦|\[\[)([^\x00-\x7F]*[aA-zZ]*[^\]])(〛|⟧|]])([^\x00-\x7F]*[aA-zZ]*[^\]])\(([^\x00-\x7F]*?[aA-zZ]*?)\)/g,
        '<expan><abbr><supplied reason="lost">$1</supplied>$2</abbr><ex>$3</ex></expan>');


    /*        Hermes [Augusti - - - lib(ertus)]   ==> straitgh supplied with a gap inside (supplied not starting or ending in abbreviation */
    text = text.replace(/\[(\w*?\s?)(?:(?:(?:\-|\–|\—)\s?){1,20})(\w*?)\](?![^\x00-\x7F]*?[aA-zZ]*?\()/g,
        '<supplied reason="lost">$1</supplied><gap reason="lost" extent="unknown" unit="character"/><supplied reason="lost">$2</supplied>');
    //CLEANING </supplied>\s?</supplied>
    text = text.replace(/<supplied reason=\"lost\"><\/supplied>/g, '');

    /*        Hermes [Augusti - - - lib(ertus)]   ==> straitgh RASURA with a gap inside (supplied not starting or ending in abbreviation */
    text = text.replace(/(?:⟦|〚)(.*?\s?)(?:(?:(?:\-|\–|\—)\s?){1,20})(.*?)(?:⟧|〛)(?![^\x00-\x7F]*?[aA-zZ]*?\()/g,
        '<del rend="erasure">$1</del><gap reason="lost" extent="unknown" unit="character"/><del rend="erasure">$2</del>');


    //Supplied: BLANK[szs szsz sszss(f) sss]CARRIAGE : if [] are preceded and followed by space, means can be replaced by <supplied>
    text = text.replace(/(?:\s)\[(.*?)\](?:\n)/g, ' <supplied reason="lost">$1</supplied>\n');
    //Supplied: BLANK[szs szsz s sss]BLANK : if [] are preceded and followed by space, means can be replaced by <supplied>
    text = text.replace(/(?:\s)\[(.*)\](?:\s)/g, ' <supplied reason="lost">$1</supplied> ');
    //DEL: BLANK[szs szsz sszss(f) sss]CARRIAGE : if [] are preceded and followed by space, means can be replaced by <supplied>
    text = text.replace(/(?:\s)(?:⟦|〚)(.*?)(?:⟧|〛)(?:\n)/g, ' <del rend="erasure">$1</del>\n');
    //DEL: BLANK[szs szsz s sss]BLANK : if [] are preceded and followed by space, means can be replaced by <supplied>
    text = text.replace(/(?:\s)(?:⟦|〚)(.*?)(?:⟧|〛)(?:\s)/g, ' <del256 rend="erasure">$1</del> ');




    /*//Supplied: BLANK[szs szsz sszss(f) sss]BLANK : if [] are preceded and followed by space, means can be replaced by <supplied>
         text = text.replace(/(?:\s)\[(.*?)\](?!(.*?))(?:\s)/g, ' <supplied262 reason="lost">$1</supplied> '); 
    */


    /*Abbreviation partly in lacuna without text after )*/
    regex = /([^\x00-\x7F]*[aA-zZ]*)\[([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)(\])/g;
    let subst = '<expan><abbr>$1<supplied reason="lost">$2</supplied></abbr><ex>$3</ex></expan>'
    text = text.replace(regex, subst);
    /*Abbreviation partly in RASURA without text after )*/
    regex = /([^\x00-\x7F]*[aA-zZ]*)(?:⟦|〚)([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)(?:⟧|〛)/g;
    subst = '<expan><abbr>$1<del rend="erasure">$2</del></abbr><ex>$3</ex></expan>'
    text = text.replace(regex, subst);


    /*      SUPPLIED: Au[g(usti) followed by words even ABBREV but ] not in inside an abbreviation        */
    text = text.replace(/([^\x00-\x7F]*?[aA-zZ]*?[^>])?\[([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\)(.*?[^\-\–\—])\](?![^\x00-\x7F]*?[aA-zZ]*\()/g,
        '<expan><abbr>$1<supplied reason="lost">$2</supplied></abbr><ex>$3</ex></expan> <supplied reason="lost">$4</supplied>');
    /*  Before Chnage on 7/4/20 10:38: text = text.replace(/([^\x00-\x7F]*?[aA-zZ]*?[^\s][^>])?\[([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\)(.*?[^\-\–\—])\](?![^\x00-\x7F]*?[aA-zZ]*\()/g,*/

    /*   text = text.replace(/([^\x00-\x7F]*?[aA-zZ]*?[^\s][^>])?(?:\s)(?:⟦|〚)([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\)(.*?[^\-\–\—])(?:⟧|〛)(?![^\x00-\x7F]*?[aA-zZ]*\()/g,*/


    /*      DEL: Au[[g(usti) followed by words even ABBREV but ]] not in inside an abbreviation        */
    text = text.replace(/([^\x00-\x7F]*?[aA-zZ]*?[^\s][^>])?(?:\s?)(?:⟦|〚)([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\)(.*?[^\-\–\—])(?:⟧|〛)(?![^\x00-\x7F]*?[aA-zZ]*\()/g,
        '<expan><abbr>$1<del rend="erasure">$2</del></abbr><ex>$3</ex></expan> <del rend="erasure">$4</del>');


    /*        SUPPLIED: Hermes Au[g(usti) libertus pr]oc(urator)  [ starting and ending in a abbreviation*/
    text = text.replace(/([^\x00-\x7F]*?[aA-zZ]+)\[([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\)(.*?[^\-\–\—])([^\x00-\x7F]*?[aA-zZ]*?)\](?:([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\))/g,
        '<expan><abbr>$1<supplied reason="lost">$2</supplied></abbr><ex>$3</ex></expan> <supplied reason="lost">$4</supplied><expan><abbr><supplied reason="lost">$5</supplied>$6</abbr><ex>$7</ex></expan>');
    /*        DEL: Hermes Au[[g(usti) libertus pr]]oc(urator)  [ starting and ending in a abbreviation*/
    text = text.replace(/([^\x00-\x7F]*?[aA-zZ]*?)(?:⟦|〚)([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\)(.*?[^\-\–\—])([^\x00-\x7F]*?[aA-zZ]*?)(?:⟧|〛)(?:([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\))/g,
        '<expan><abbr>$1<del rend="erasure">$2</del></abbr><ex>$3</ex></expan> <del rend="erasure">$4</del><expan><abbr><del rend="erasure">$5</del>$6</abbr><ex>$7</ex></expan>');

    // <supplied reason="lost"> ․․․c.5-7․․․ </supplied>
    text = text.replace(/<supplied reason="lost"> (?:[\.․]){1,20}c(?:a?)\.(?:\s?)([1-9][0-9]*)(?:(?:-)([1-9][0-9]*))(?:\s?)(?:[\.․]){1,20} <\/supplied>/g,
        '<gap reason="lost" atLeast="$1" atMost="$2" unit="character"/>');
    //Cleaning <supplied reason="lost"><gap reason="lost" atLeast="5" atMost="7" unit="character"/></supplied>
    text = text.replace(/<supplied reason=\"lost\">(<gap reason=\"lost\" atLeast=\"[0-9]*\" atMost=\"[0-9]\" unit="character"\/>)<\/supplied>/g,
        '$1');

    //Supplied: [szs szsz sszss(f) sss]                      
    text = text.replace(/\[(.[^\[\-\–\—]*)\](?![^\x00-\x7F]*[aA-zZ]*\()/g, '<supplied reason="lost">$1</supplied>');
    //DEL: [[]szs szsz sszss(f) sss]]                     
    text = text.replace(/(?:⟦|〚)(.[^\[]*)\](?![^\x00-\x7F]*[aA-zZ]*\()/g, '<del rend="erasure">$1</supplied>');



    /*
     * THIS regex is making everything slow
    /\*        SUPPLIED: Herm[es Aug(usti) libertus pr]oc(urator)  ending in a abbreviation*\/
    text = text.replace(/(?:([^\x00-\x7F]*?[aA-zZ]*?[^\s])*)\[([^\x00-\x7F]*?[aA-zZ]*?)\s(.*?)([^\x00-\x7F]*?[aA-zZ]*?)\](?:([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*?[aA-zZ]*?)\))/g,
                                    '$1<supplied reason="lost">$3</supplied> <expan><abbr><supplied reason="lost">$4$5</supplied>$6</abbr><ex>$7</ex></expan>');
    */




    /*      SUPPLIED: [Aug(usti)         */
    text = text.replace(/(?:\s)\[((.[^\.<])*)\s/g,
        ' <supplied reason="lost"><expan><abbr>$1</abbr><ex>$2</ex></expan></supplied>');
    /*Was before: text = text.replace(/(?:\s)\[([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)/g,*/

    /*         SUPPLIED   ddd(t)]        */
    text = text.replace(/(?!([^\x00-\x7F]*[aA-zZ]*))(\s)([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)\]/g,
        '$1<supplied reason="lost"><expan><abbr>$2</abbr><ex>$3</ex></expan></supplied>');



    /*         SUPPLIED   dd]d(t)        */
    text = text.replace(/(\s)([^\x00-\x7F]*?[aA-zZ]*?)\]([^\x00-\x7F]*?[aA-zZ]*?)\(([^\x00-\x7F]*[aA-zZ]*)\)/g,
        '$1</supplied><expan><abbr><supplied reason="lost">$2</supplied>$3</abbr><ex>$4</ex></expan>');


    /*      SUPPLIED:  A[ug(usti)         */

    text = text.replace(/(\s)([^\x00-\x7F]+[aA-zZ]+)\[([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)/g,
        ' <expan><abbr>$2<supplied reason="lost">$3</supplied></abbr><ex>$4</ex></expan> <supplied reason="lost">');


    //SUPPLIED:      xx[frfrfrf(?) - - - - - - ]
    text = text.replace(/\[([^\x00-\x7F]*[aA-zZ]*)\(\?\)((\s?(\-|\–|\—)\s?){1,20})\]/g,
        '<supplied reason="lost" cert="low">$1</supplied><gap reason="lost" extent="unknown" unit="character"/>');

    //SUPPLIED:      xx[frfrfrf - - - - - - frf]
    text = text.replace(/\[([^\x00-\x7F]*[aA-zZ]*)((\s?(\-|\–|\—)\s?){1,20})\]/g,
        '<supplied reason="lost">$1</supplied><gap reason="lost" extent="unknown" unit="character"/>');



    /*
    *****************************
    *      Abbreviations        *
    *****************************/

    /*Word with multiple abbreviations*/

    /*  var regex= /\[(․{1,20})\]/g;
         index =0;
         text = text.replace(regex, function(match){
                console.log('Ici match:' + match.length );
                var length = parseInt(match.length) -2; 
                return '<gap reason="illegible" quantity="' + length + '" unit="character"/>' ;
                });
   */


    /*Abbreviation partly in lacuna with text after )*/
    regex = /([^\x00-\x7F]*[aA-zZ]*)\[([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)(\s)?([^\x00-\x7F]*[aA-zZ]*)?(\s)?(\])/g;
    subst = '<expan><abbr>$1<supplied reason="lost">$2</supplied></abbr><ex>$3</ex></expan><supplied reason="lost">$4$5</supplied>'
    text = text.replace(regex, subst);

    /*    Abbreviation in Rasura*/
    regex = /(〚|⟦|\[\[)([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)([^\x00-\x7F]*[aA-zZ]*)?(〛|⟧|]])/g;
    subst = '<del rend="erasure"><expan><abbr>$2</abbr><ex>$3</ex></expan>$4</del>';
    text = text.replace(regex, subst);

    /*    Abbreviation in Lacuna*/
    regex = /\[([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)(])/g;
    subst = '<supplied reason="lost"><expan><abbr>$1</abbr><ex>$2</ex></expan></supplied>';
    text = text.replace(regex, subst);


    /*    Abbreviation with uncertain resolution*/
    regex = /([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\?\)/g;
    const substAbbrevInLac = '<expan><abbr>$1</abbr><ex cert="low">$2</ex></expan>';
    text = text.replace(regex, substAbbrevInLac);


    /*Double abbreviation*/
    regex = /([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)([^\x00-\x7F]*[aA-zZ]*)*\(([^\x00-\x7F]*[aA-zZ]*)\)/g;
    subst = "<expan><abbr>$1</abbr><ex>$2</ex><abbr>$3</abbr><ex>$4</ex></expan>";
    text = text.replace(regex, subst);


    /*Basic abbreviation*/
    regex = /([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)([^\x00-\x7F]*[aA-zZ]*)*/g;
    subst = "<expan><abbr>$1</abbr><ex>$2</ex>$3</expan>";
    text = text.replace(regex, subst);
    //cleaning wrong closing of expan
    text = text.replace("\<\/expan\>\<expan\>\<abbr\>", "<abbr>");


    /*
     *****************************
     *        Line in lacuna     *
     *****************************/
    //Line in lacuna [------]
    text = text.replace('\n\[------\]', '<gap unit="line" />');
    //Line in lacuna ------
    text = text.replace(/(-){6}/g, '<gap unit="line" />');

    /*//All line [— — — — — — — — — — — 
  text = text.replace(/(\<lb n=\"[0-9]*\"\/\>)\[(—\s?)*\]/g, '$1<gap reason="lost" quantity="1" unit="line"/>');
*/
    //text = text.replace(/(\<lb n=\"[0-9]*\"\/\>)\[(\s?—\s?)*\]/g, '$1<gap reason="lost" quantity="1" unit="line"/>\n');

    //Part of line [— — — — — — — — — — — 
    text = text.replace(/\[(—\s?)*\]/g, '<gap reason="lost" extent="unknown" unit="character"/>');

    //gap of 3?
    text = text.replace(/\[---\]/g, '<gap reason="lost" extent="unknown" unit="character"/>');

    //[— — —ca.x-y— — —]
    text = text.replace(/\[— — —ca\.([1-9][0-9]*)((-)([1-9][0-9]*))?— — —\]/g, '<gap reason="lost" quantity="$1" '
        + 'unit="character" precision="low"/>');

    //[— — — —ca.x-y— — — —]
    text = text.replace(/\[— — — —ca\.([1-9][0-9]*)((-)([1-9][0-9]*))?— — — —\]/g, '<gap reason="lost" quantity="$1" '
        + 'unit="character" precision="low"/>');


    //[—ca. x— ]
    text = text.replace(/\[(-|–\s?){1,20}ca\.(\s?)([1-9][0-9]*)?(\s?)(-|–\s?){1,20}\]/g, '<gap reason="lost" quantity="$3" '
        + 'unit="character" precision="low"/>');


    //Line lost of unknown extent ; ------? 
    text = text.replace(/\------\?/g, '<gap reason="lost" extent="unknown" '
        + 'unit="line"><certainty match=".." locus="name"/></gap>');
    //Line lost of unknown extent ; [------?] 
    text = text.replace(/\[\------\?\]/g, '<gap reason="lost" extent="unknown" '
        + 'unit="line"><certainty match=".." locus="name"/></gap>');
    //Line lost of unknown extent ; [------?] 
    text = text.replace(/\[\---\?\]/g, '<gap reason="lost" extent="unknown" '
        + 'unit="line"><certainty match=".." locus="name"/></gap>');

    //beginning lost
    text = text.replace(/\[(-|–|\—\s?){1,20}([^\x00-\x7F]*[aA-zZ]*)\]/g, '<gap reason="lost" extent="unknown" unit="character"/>');

    //Beginning of line lost, unknown extent ; with restition of word at end   [------word]
    //[- - - - - - - - - - γυμνα]-
    text = text.replace(/\[(-|–|\—\s?){1,20}([^\x00-\x7F]*[aA-zZ]*)\]/g, '<gap reason="lost" extent="unknown" '
        + 'unit="character"/><supplied reason="lost">$2</supplied>');

    //Beginning of line lost, unknown extent ; with RASURA of word at end   [------word]
    //[- - - - - - - - - - γυμνα]-
    text = text.replace(/(?:⟦|〚)(-|–|\—\s?){1,20}([^\x00-\x7F]*[aA-zZ]*)(?:⟧|〛)/g, '<gap reason="lost" extent="unknown" '
        + 'unit="character"/><del rend="erasure">$2</supplied>');

    //End of line lost, unknown extent ; with restition of word at beginning   [word ---]
    text = text.replace(
        /*                                /\[(?!(\u2013|\u2014))([^\x00-\x7F]*[aA-zZ]*)((\s?)(\u2013|\u2014)\s?){1,20}\]/g,*/
        /*                                /\[(?!([\-\–\—]))([^\x00-\x7F]*[aA-zZ]*)((\s?)([\-\—\–])\s?){1,20}\]/g,                            //==>before 31/03/2020 and attempt to match [wor word - - -]*/
        /\[(?!([\-\–\—]))(((?!([\-\–\—\[\]]))[^\x00-\x7F]*[aA-zZ]*(\s?)){1,10})(([\-\—\–])\s?){1,20}\]/g,
        '<supplied reason="lost">$2</supplied><gap reason="lost" extent="unknown" '
        + 'unit="character"/>');
    //End of line lost, unknown extent ; ---]
    text = text.replace(
        /(([\-\—\–])\s?){1,20}\]/g,
        '<gap reason="lost" extent="unknown" '
        + 'unit="character"/>');
    //Cleaning <expan><abbr>[   
    text = text.replace(/<expan><abbr>\[/g, '<supplied reason="lost"><expan><abbr>');
    //CLeaning </expan> </supplied><
    text = text.replace(/<\/expan> <\/supplied></g, '</expan></supplied> <');
    //Cleaning <supplied reason="lost"><expan><abbr>TEXT[
    text = text.replace(/<supplied reason=\"lost\"><expan><abbr>([^\x00-\x7F]*?[aA-zZ]*?)\]/g,
        '<expan><abbr><supplied reason="lost">$1</supplied>');
    //Cleaning <expan><abbr></abbr><ex>
    text = text.replace(/<expan><abbr><\/abbr><ex>/g, '<expan><ex>')
    //Cleaning restitutions not dealt with by previous regex
    //text + ---- in lacuna
    const regexSuppliedClean = /\[/g;
    const substSuppliedClean = '<supplied reason="lost">';
    text = text.replace(regexSuppliedClean, substSuppliedClean);
    const regexSuppliedCleanClose = /\]/g;
    const substSuppliedCleanClose = '</supplied>';
    text = text.replace(regexSuppliedCleanClose, substSuppliedCleanClose);

    //Cleaning </ex></supplied></expan>
    text = text.replace(/<\/ex><\/supplied><\/expan>/g, '</ex></expan></supplied>');
    /*//Cleaning <expan><abbr>gt</supplied>gt</abbr>
            text = text.replace(/<expan><abbr>gt<\/supplied>gt<\/abbr>/g,
                                          , '<expan><abbr><supplied reason="lost">$1</supplied>gt</abbr>');
    */

    // CLEANING  - - - </supplied>
    text = text.replace(/((\s?(\-|\—|\–)\s?){1,20})<\/supplied>/g,
        '</supplied><gap reason="lost" extent="unknown" unit="character"/>')

    text = text.replace(/((\s?(\-|\—|\–)\s?){1,20})<\/supplied>/g,
        '</supplied><gap reason="lost" extent="unknown" unit="character"/>')
    //CLEAING <supplied reason="lost">- - -
    text = text.replace(/<supplied reason=\"lost\">((\s?(\-|\—|\–)\s?){1,20})/g,
        '<gap reason="lost" extent="unknown" unit="character"/><supplied reason="lost">')
    //Cleaning: <supplied reason="lost"></supplied223><gapzz reason="lost" extent="unknown" unit="character"/><supplied224 reason="lost"></supplied>
    text = text.replace(/<supplied reason=\"lost\"><\/supplied><gap reason=\"lost\" extent=\"unknown\" unit=\"character\"\/><supplied reason=\"lost\"><\/supplied>/g
        , '</supplied><gap reason="lost" extent="unknown" unit="character"/><supplied reason="lost">');
    //CLEANING <supplied reason="lost"> </supplied>
    text = text.replace(/<supplied reason=\"lost\"> <\/supplied>/g, '');

    //CLEANING <del rend="erasure">\s?</supplied>
    text = text.replace(/<del rend=\"erasure\">\s?<\/del>/g, '');

    //CLEANING </supplied>\s?</supplied>
    text = text.replace(/<\/supplied>\s?<\/supplied>/g, '</supplied>');
    //CLEANING <lb /></supplied><gap
    text = text.replace(/\/><\/supplied><gap/g, '/><gap');
    //Supplied: [szs - - - sss]
    text = text.replace(/\[(?!([\-\–\—]))((?:\s?(?:(?!(?:[\-\–\—\[\]]))[^\x00-\x7F]*[aA-zZ](?!\s)*)){1,10})(?:(?:\s?[\-\—\–])\s?){1,20}((?:(?!([\-\–\—\[\]]))[^\x00-\x7F]*[aA-zZ]*))\]/g,
        '<supplied reason="lost">$2</supplied><gap reason="lost" extent="unknown" unit="character"/><supplied reason="lost">$3</supplied>');
    //Cleaning not processed - - - in middle lacuna
    text = text.replace(
        /(\s?([\-\—\–])\s?){1,20}/g,
        '</supplied><gap reason="lost" extent="unknown" '
        + 'unit="character"/><supplied reason="lost">');

    //Supplied with ] in an abbreviation
    text = text.replace(/\[(.[^\[]*)(\s)([^\x00-\x7F]*[aA-zZ]*)\]([^\x00-\x7F]*[aA-zZ]*)\(([^\x00-\x7F]*[aA-zZ]*)\)/g,
        '<supplied reason="lost">$1</supplied><expan><abbr><supplied reason="lost">$3</supplied>$4</abbr><ex>$5</ex></expan>');




    /*//Equivalent to previous but on ending
     text = text.replace(/\[([^\x00-\x7F]*[aA-zZ]*)\s?(-|–\s?){1,20}\]/g, '<supplied reason="lost">$1</supplied><gap reason="lost" extent="unknown" ' 
     + 'unit="character"/>');
    */
    //illegible charactes +++
    const regexIllegibleCharacter = /([+])+/g;
    text = text.replace(regexIllegibleCharacter, function (match) {
        console.log('Ici match:' + match.length);
        return '<gap reason="illegible" quantity="' + match.length + '" unit="character"/>';
    });

    //gap charachter with dot
    const regexGapCharacter = /\[(․{1,20})\]/g;
    text = text.replace(regexGapCharacter, function (match) {
        console.log('Ici match:' + match.length);
        const length = match.length - 2;
        return '<gap reason="illegible" quantity="' + length + '" unit="character"/>';
    });


    /*        CLEANING opening supplied followed by GAP*/
    text = text.replace(/<supplied reason=\"lost\"> <gap/g,
        ' <gap');



    //Hedera as hed.
    const regexHed = /(hed\.)̣/gi;
    const substHed = '<g type="hedera">❦</g>';
    text = text.replace(regexHed, substHed);

    //vac
    const regexVac = /vac\./gi;
    const substVac = '<space extent="unknown" unit="character"/>';
    text = text.replace(regexVac, substVac);



    /*
     *****************************
     *       superfluous         *
     *****************************/

    const regexSuperfluous = /\{([^\x00-\x7F]*[aA-zZ]*)\}/g;
    const substSuperfluous = '<surplus>$1</surplus>';
    text = text.replace(regexSuperfluous, substSuperfluous);
    /*
    *******************************
    *              Erased               *
    *******************************
    */
    /* 〚 U+301A  and U+301B*/
    regex = /(〚)(([^\x00-\x7F]*[aA-zZ]*)([\s\,\.]([^\x00-\x7F]*[aA-zZ]*))*)(〛)/gm;
    subst = '<del rend="erasure">$2</del>';
    text = text.replace(regex, subst);

    /*With  ⟦ U+27E6 U+27E7*/
    regex = /(⟦)(([^\x00-\x7F]*[aA-zZ]*)([\s\,\.]([^\x00-\x7F]*[aA-zZ]*))*)(⟧)/gm;
    subst = '<del rend="erasure">$2</del>';
    text = text.replace(regex, subst);

    regex = /(\[){2}(([^\x00-\x7F]*[aA-zZ]*)([\s\,\.]([^\x00-\x7F]*[aA-zZ]*))*)(\]){2}/gm;
    subst = '<del rend="erasure">$2</del>';
    text = text.replace(regex, subst);


    /*
     *****************************
     *       Restitutions        *
     *****************************/

    //restituted text
    /*    var regexTextInLacuna = /(\[)(\w*\s?\w*)(])/g;*/


    const regexTextInLacuna = /(\[)(([^\x00-\x7F]*[aA-zZ]*)([\s\,\.]([^\x00-\x7F]*[aA-zZ]*))*)(\])/gm;
    const substTextInLacuna = '<supplied reason="lost">$2</supplied>';
    text = text.replace(regexTextInLacuna, substTextInLacuna);
    regex = new RegExp('\u{61}', 'u');

    //text + ---- in lacuna
    const regexTextandUnkInLacuna = /(\[)([^\x00-\x7F]*[aA-zZ]*\s?[^\x00-\x7F]*[aA-zZ]*)(---)?(\])/g;
    const substTextandUnkInLacuna = '<supplied reason="lost">$2</supplied><gap reason="lost" />';
    text = text.replace(regexTextandUnkInLacuna, substTextandUnkInLacuna);




    //text = text.replace('extent=\"8\" unit=\"letter\"', 'class="gap8letters"');

    //Dotted characters
    const regexDotted = /([^\x00-\x7F]?[aA-zZ]?)̣/g;
    const substDotted = '<unclear>$1</unclear>';
    text = text.replace(regexDotted, substDotted);
    //Cleaning consecutive unclear
    const regexUnclearClean = /(\<\/unclear\>\<unclear\>)/g;
    const substUnclearClean = '';
    text = text.replace(regexUnclearClean, substUnclearClean);



    /* //Cleaning supplied ending with ? — —]
     var regexSuppliedCleanClose2 = /(\? — —\])/g;
     var substSuppliedCleanClose2 = '</supplied>';
     var text = text.replace(regexSuppliedCleanClose2, substSuppliedCleanClose2);
     */
    //Cleaning <gap reason="lost" atLeast="5" atMost="7" unit="character"/><expan><abbr>li</supplied>
    text = text.replace(/(<gap reason=\"lost\" atLeast=\"[0-9]*\" atMost=\"[0-9]\" unit=\"character\"\/>)<expan><abbr>((?:[^\x00-\x7F]*?[aA-zZ]*?)*)<\/supplied>/g,
        '$1<expan><abbr><supplied reason="lost">$2</supplied>');
    //Cleaning <gap reason="lost" atLeast="5" atMost="7" unit="character"/></supplied>
    text = text.replace(/(<gap reason=\"lost\" atLeast=\"[0-9]*\" atMost=\"[0-9]\" unit=\"character\"\/>)<\/supplied>/g,
        '$1');
    //Cleaning <gap reason="lost" quantity="5" unit="character" precision="low"/><expan><abbr>li</supplied>
    text = text.replace(/(<gap reason=\"lost\" atLeast=\"[0-9]*\" atMost=\"[0-9]\" unit=\"character\"\/>)<expan><abbr>((?:[^\x00-\x7F]*?[aA-zZ]*?)*)<\/supplied>/g,
        '$1<expan><abbr><supplied reason="lost">$2</supplied>');
    //Cleaning <gap reason="lost" quantity="5" atMost="7" unit="character" precision="low"/></supplied>
    text = text.replace(/(<gap reason=\"lost\" quantity=\"[0-9]*\" unit=\"character\" precision=\"low\"\/>)<\/supplied>/g,
        '$1');
    //Cleaning lb not preceded by a carraige
    text = text.replace(/(\/\>)\s(\<lb n=\"[0-9]*\"\/\>)/g, "$1\n$2")
    //Cleaning not full line in lacuna
    text = text.replace(/unit=\"line\"\/>\s(\w)/g, 'unit="line"/>$1');


    //∙
    text = text.replace(/ ?∙ ?/g, ' <g type="interpunct">▴</g> ');
    /*    text = text.replace(/ ?❦ ?/g, '<g type="hedera">❦</g>');*/
    text = text.replace(/ ?𐆖 ?/g, ' <g type="denarius"/> ');

    text = text.replace(/❦/g, ' <g type="hedera">❦</g> ');
    //Cleaning double space
    text = text.replace(/\s\s/g, ' ');

    /*    Centered WORDS*/
    /*TODO: check if space after lb*/

    //    var text = text.replace(/<lb n= {2,99}/g, "");

    text = text.replace(/ {2,99}/g, "");

    /*    console.log("Converted text for preview: " + text);*/
    return text;




};