/*
 * Decompiled with CFR 0.152.
 */
package org.opendataloader.pdf.utils;

import java.util.HashSet;
import java.util.Set;
import org.verapdf.wcag.algorithms.entities.SemanticTextNode;
import org.verapdf.wcag.algorithms.entities.content.TextLine;

public class BulletedParagraphUtils {
    private static final String POSSIBLE_LABELS = "\u2218*+-.=\u2010\u2011\u2012\u2013\u2014\u2015\u2022\u2023\u2024\u2027\u203b\u2043\u204e\u2192\u21b3\u21d2\u21e8\u21fe\u2219\u25a0\u25a1\u25a2\u25a3\u25a4\u25a5\u25a6\u25a7\u25a8\u25a9\u25aa\u25ac\u25ad\u25ae\u25af\u25b0\u25b1\u25b2\u25b3\u25b4\u25b5\u25b6\u25b7\u25b8\u25b9\u25ba\u25bb\u25bc\u25bd\u25be\u25bf\u25c0\u25c1\u25c2\u25c3\u25c4\u25c5\u25c6\u25c7\u25c8\u25c9\u25ca\u25cb\u25cc\u25cd\u25ce\u25cf\u25d0\u25d1\u25d2\u25d3\u25d4\u25d5\u25d6\u25d7\u25d8\u25d9\u25e2\u25e3\u25e4\u25e5\u25e6\u25e7\u25e8\u25e9\u25ea\u25eb\u25ec\u25ed\u25ee\u25ef\u25f0\u25f1\u25f2\u25f3\u25f4\u25f5\u25f6\u25f7\u25f8\u25f9\u25fa\u25fb\u25fc\u25fd\u25fe\u25ff\u2605\u2606\u2610\u2611\u2612\u2613\u261b\u261e\u2660\u2661\u2662\u2663\u2664\u2665\u2666\u2667\u26aa\u26ab\u26ac\u2713\u2714\u2715\u2716\u2717\u2718\u2719\u271a\u271b\u271c\u271d\u271e\u271f\u2726\u2727\u2728\u274d\u274f\u2750\u2751\u2752\u2756\u2794\u2799\u279b\u279c\u279d\u279e\u279f\u27a0\u27a1\u27a2\u27a3\u27a4\u27a5\u27a6\u27a7\u27a8\u27a9\u27aa\u27ad\u27ae\u27af\u27b1\u2b1b\u2b1c\u2b1d\u2b1e\u2b1f\u2b20\u2b21\u2b22\u2b23\u2b24\u2b25\u2b26\u2b27\u2b28\u2b29\u2b2a\u2b2b\u2b2c\u2b2d\u2b2e\u2b2f\u2b50\u2b51\u2b52\u2b53\u2b54\u2b55\u2b56\u2b57\u2b58\u2b59\u2bc0\u2bc1\u2bc2\u2bc3\u2bc4\u2bc5\u2bc6\u2bc7\u2bc8\u2bcc\u2bcd\u2bce\u2bcf\u2bd0\u3007\uf046\uf06c\uf06d\uf06e\uf06f\uf070\uf071\uf072\uf073\uf074\uf075\uf076\uf077\uf09e\uf09f\uf0a0\uf0a1\uf0a2\uf0a3\uf0a4\uf0a5\uf0a6\uf0a7\uf0a8\uf0a9\uf0aa\uf0ab\uf0ac\uf0ad\uf0ae\uf0af\uf0b2\uf0b6\uf0d8\uf0dc\uf0e0\uf0e8\uf0fc\uf0fe\udb80\udc7e\udb80\udeea\udb80\udeeb\udb80\udeec\udb80\udeed\udb80\udeee\udb80\udeef\udb80\udef0\udb80\udef1\udb80\udef2\udb80\udef3\udb80\udef4\udb80\udef5\udb80\udef6\udb80\udef7\udb80\udef8\udb80\udef9\udb80\udefa\udb80\udefb\udb80\udefc";
    private static final Set<String> BULLET_REGEXES = new HashSet<String>();
    private static final Set<String> ARABIC_NUMBER_REGEXES = new HashSet<String>();
    private static final String KOREAN_NUMBERS_REGEX = "[\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788]";
    public static final String KOREAN_CHAPTER_REGEX = "^(\uc81c\\d+[\uc7a5\uc870\uc808]).*";

    public static String getLabel(SemanticTextNode semanticTextNode) {
        return semanticTextNode.getValue().substring(0, 1);
    }

    public static boolean isBulletedParagraph(SemanticTextNode textNode) {
        return BulletedParagraphUtils.isBulletedLine(textNode.getFirstLine());
    }

    public static boolean isBulletedLine(TextLine textLine) {
        return BulletedParagraphUtils.isLabeledLine(textLine);
    }

    public static boolean isLabeledLine(TextLine textLine) {
        String value = textLine.getValue();
        char character = value.charAt(0);
        if (POSSIBLE_LABELS.indexOf(character) != -1) {
            return true;
        }
        if (textLine.getConnectedLineArtLabel() != null) {
            return true;
        }
        for (String regex : BULLET_REGEXES) {
            if (!value.matches(regex)) continue;
            return true;
        }
        return false;
    }

    public static boolean isBulletedLineArtParagraph(SemanticTextNode textNode) {
        return textNode.getFirstLine().getConnectedLineArtLabel() != null;
    }

    public static String getLabelRegex(SemanticTextNode textNode) {
        String value = textNode.getFirstLine().getValue();
        for (String regex : BULLET_REGEXES) {
            if (!value.matches(regex)) continue;
            return regex;
        }
        return null;
    }

    static {
        ARABIC_NUMBER_REGEXES.add("^\\d+[ \\.\\]\\)>].*");
        BULLET_REGEXES.add("^\\(\\d+\\).*");
        ARABIC_NUMBER_REGEXES.add("^<\\d+>.*");
        ARABIC_NUMBER_REGEXES.add("^\\[\\d+\\].*");
        ARABIC_NUMBER_REGEXES.add("^{\\d+}.*");
        ARABIC_NUMBER_REGEXES.add("^\u3010\\d+\u3011.*");
        BULLET_REGEXES.add("^\\d+[\\.\\)]\\s+.*");
        BULLET_REGEXES.add("^[\u3131\u3134\u3137\u3139\u3141\u3142\u3145\u3147\u3148\u314a\u314b\u314c\u314d\u314e][\\.\\)\\]>].*");
        BULLET_REGEXES.add("^[\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788]\\..+");
        BULLET_REGEXES.add("^[\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788][)\\]>].*");
        BULLET_REGEXES.add("^[\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788](-\\d+).*");
        BULLET_REGEXES.add("^\\([\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788]\\).*");
        BULLET_REGEXES.add("^<[\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788]>.*");
        BULLET_REGEXES.add("^\\[[\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788]\\].*");
        BULLET_REGEXES.add("^[{][\uac00\ub098\ub2e4\ub77c\ub9c8\ubc14\uc0ac\uc544\uc790\ucc28\uce74\ud0c0\ud30c\ud558\uac70\ub108\ub354\ub7ec\uba38\ubc84\uc11c\uc5b4\uc800\ucc98\ucee4\ud130\ud37c\ud5c8\uace0\ub178\ub3c4\ub85c\ubaa8\ubcf4\uc18c\uc624\uc870\ucd08\ucf54\ud1a0\ud3ec\ud638\uad6c\ub204\ub450\ub8e8\ubb34\ubd80\uc218\uc6b0\uc8fc\ucd94\ucfe0\ud22c\ud478\ud6c4\uadf8\ub290\ub4dc\ub974\ubbc0\ube0c\uc2a4\uc73c\uc988\uce20\ud06c\ud2b8\ud504\ud750\uae30\ub2c8\ub514\ub9ac\ubbf8\ube44\uc2dc\uc774\uc9c0\uce58\ud0a4\ud2f0\ud53c\ud788][}].*");
        BULLET_REGEXES.add(KOREAN_CHAPTER_REGEX);
        BULLET_REGEXES.add("^\ubc95\\.(\uc81c\\d+\uc870).*");
        BULLET_REGEXES.add("^[I]\\..*");
        BULLET_REGEXES.add("^[\u2160-\u216b].*");
        BULLET_REGEXES.add("^[\u2170-\u217b].*");
        BULLET_REGEXES.add("^[\u2460-\u2473].*");
        BULLET_REGEXES.add("^[\u2474-\u2487].*");
        BULLET_REGEXES.add("^[\u2488-\u249b].*");
        BULLET_REGEXES.add("^[\u249c-\u24b5].*");
        BULLET_REGEXES.add("^[\u24b6-\u24cf].*");
        BULLET_REGEXES.add("^[\u24d0-\u24e9].*");
        BULLET_REGEXES.add("^[\u24f5-\u24fe].*");
        BULLET_REGEXES.add("^[\u2776-\u277f].*");
        BULLET_REGEXES.add("^[\u2780-\u2789].*");
        BULLET_REGEXES.add("^[\u278a-\u2793].*");
        BULLET_REGEXES.add("^[\u326e-\u327b].*");
        BULLET_REGEXES.add("^[\uf081-\uf08a].*");
        BULLET_REGEXES.add("^[\uf08c-\uf095].*");
    }
}

