package org.alicebot.ab;

import java.lang.Character;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.reduls.sanmoku.Morpheme;
import net.reduls.sanmoku.Tagger;

/* loaded from: classes5.dex */
public class JapaneseTokenizer {

    /* renamed from: a, reason: collision with root package name */
    static final Pattern f7447a = Pattern.compile("(<.*>.*</.*>)|(<.*/>)");
    static Set<Character.UnicodeBlock> b = new HashSet<Character.UnicodeBlock>() { // from class: org.alicebot.ab.JapaneseTokenizer.1
        {
            add(Character.UnicodeBlock.HIRAGANA);
            add(Character.UnicodeBlock.KATAKANA);
            add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
        }
    };

    public static String buildFragment(String str) {
        Iterator<Morpheme> it = Tagger.parse(str).iterator();
        String str2 = "";
        while (it.hasNext()) {
            str2 = str2 + it.next().surface + " ";
        }
        return str2.trim();
    }

    public static String morphSentence(String str) {
        if (!MagicBooleans.jp_morphological_analysis) {
            return str;
        }
        Matcher matcher = f7447a.matcher(str);
        String str2 = "";
        while (matcher.find()) {
            int start = matcher.start();
            int end = matcher.end();
            String substring = start > 0 ? str.substring(0, start - 1) : "";
            str2 = str2 + " " + buildFragment(substring) + " " + str.substring(start, end);
            str = end < str.length() ? str.substring(end, str.length()) : "";
        }
        String str3 = str2 + " " + buildFragment(str);
        while (str3.contains("$ ")) {
            str3 = str3.replace("$ ", "$");
        }
        while (str3.contains("  ")) {
            str3 = str3.replace("  ", " ");
        }
        return str3.trim();
    }
}
