package com.samsung.sr.nmt.t2t.translator.core.pipeline.engine.vocab.endecoder;

import com.samsung.sr.nmt.t2t.translator.core.utils.UtilityKt;
import java.util.Iterator;
import java.util.Set;
import javax.inject.Inject;
import kotlin.Metadata;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.IntRange;
import kotlin.sequences.SequencesKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.apache.commons.lang3.StringUtils;

/* compiled from: UnknownCharacterFilter.kt */
@Metadata(d1 = {"\u00002\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\"\n\u0002\u0010\f\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\u0007\n\u0002\b\u0002\u0018\u00002\u00020\u0001B\u0007\b\u0007¢\u0006\u0002\u0010\u0002J\u001c\u0010\u0006\u001a\u00020\u00072\f\u0010\b\u001a\b\u0012\u0004\u0012\u00020\n0\t2\u0006\u0010\u000b\u001a\u00020\fJ\u001c\u0010\r\u001a\u00020\u000e2\u0006\u0010\u000f\u001a\u00020\f2\f\u0010\b\u001a\b\u0012\u0004\u0012\u00020\n0\tR\u000e\u0010\u0003\u001a\u00020\u0004X\u0082\u0004¢\u0006\u0002\n\u0000R\u000e\u0010\u0005\u001a\u00020\u0004X\u0082\u0004¢\u0006\u0002\n\u0000¨\u0006\u0010"}, d2 = {"Lcom/samsung/sr/nmt/t2t/translator/core/pipeline/engine/vocab/endecoder/UnknownCharacterFilter;", "", "()V", "punctuationRegex", "Lkotlin/text/Regex;", "punctuationRegexForFiltering", "filterUnknownCharacter", "Lcom/samsung/sr/nmt/t2t/translator/core/pipeline/engine/vocab/endecoder/FilteredResult;", "alphabets", "", "", "sourceText", "", "getUnknownCharsRatio", "", "input", "translator-core"}, k = 1, mv = {1, 6, 0}, xi = 48)
/* loaded from: classes3.dex */
public final class UnknownCharacterFilter {
    private final Regex punctuationRegex = new Regex("\\p{L}+");
    private final Regex punctuationRegexForFiltering = new Regex("((\\p{P}|\\p{S}|( ))+)");

    @Inject
    public UnknownCharacterFilter() {
    }

    public final FilteredResult filterUnknownCharacter(Set<Character> alphabets, String sourceText) {
        boolean z;
        IntRange range;
        Intrinsics.checkNotNullParameter(alphabets, "alphabets");
        Intrinsics.checkNotNullParameter(sourceText, "sourceText");
        Iterator it = Regex.findAll$default(this.punctuationRegexForFiltering, sourceText, 0, 2, null).iterator();
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        StringBuilder sb3 = new StringBuilder();
        MatchResult matchResult = (MatchResult) UtilityKt.nextOrNull(it);
        int i = 0;
        int i2 = 0;
        while (i < sourceText.length()) {
            while (true) {
                z = false;
                while (true) {
                    if (i >= sourceText.length() || alphabets.contains(Character.valueOf(sourceText.charAt(i)))) {
                        break;
                    }
                    int i3 = i + 1;
                    sb.append(sourceText.charAt(i));
                    i2++;
                    while (matchResult != null && matchResult.getRange().getFirst() < i3) {
                        matchResult = (MatchResult) UtilityKt.nextOrNull(it);
                    }
                    StringsKt.clear(sb2);
                    if ((matchResult == null || (range = matchResult.getRange()) == null || range.getFirst() != i3) ? false : true) {
                        sb2.append(StringsKt.substring(sourceText, matchResult.getRange()));
                        i = (matchResult.getRange().getLast() - matchResult.getRange().getFirst()) + 1 + i3;
                        if (matchResult.getGroups().get(2) != null) {
                            z = true;
                        }
                    } else {
                        i = i3;
                    }
                }
            }
            if (sb.length() == 1) {
                sb3.append(sb.toString());
            }
            if ((sb2.length() == 0) && z) {
                sb3.append(StringUtils.SPACE);
            } else {
                sb3.append(sb2.toString());
            }
            StringsKt.clear(sb2);
            while (i < sourceText.length() && alphabets.contains(Character.valueOf(sourceText.charAt(i)))) {
                sb3.append(sourceText.charAt(i));
                i++;
            }
        }
        String sb4 = sb3.toString();
        Intrinsics.checkNotNullExpressionValue(sb4, "StringBuilder().apply(builderAction).toString()");
        return new FilteredResult(sb4, i2 / sourceText.length());
    }

    public final float getUnknownCharsRatio(String input, Set<Character> alphabets) {
        Intrinsics.checkNotNullParameter(input, "input");
        Intrinsics.checkNotNullParameter(alphabets, "alphabets");
        int i = 0;
        String joinToString$default = SequencesKt.joinToString$default(SequencesKt.map(Regex.findAll$default(this.punctuationRegex, input, 0, 2, null), new Function1<MatchResult, String>() { // from class: com.samsung.sr.nmt.t2t.translator.core.pipeline.engine.vocab.endecoder.UnknownCharacterFilter$getUnknownCharsRatio$onlyCharacters$1
            @Override // kotlin.jvm.functions.Function1
            public final String invoke(MatchResult it) {
                Intrinsics.checkNotNullParameter(it, "it");
                return it.getValue();
            }
        }), "", null, null, 0, null, null, 62, null);
        if (alphabets.isEmpty()) {
            return 0.0f;
        }
        String str = joinToString$default;
        StringBuilder sb = new StringBuilder();
        int length = str.length();
        while (i < length) {
            int i2 = i + 1;
            char charAt = str.charAt(i);
            if (!alphabets.contains(Character.valueOf(charAt))) {
                sb.append(charAt);
            }
            i = i2;
        }
        Intrinsics.checkNotNullExpressionValue(sb.toString(), "filterTo(StringBuilder(), predicate).toString()");
        return r13.length() / joinToString$default.length();
    }
}
