/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.ml.inference.nlp.tokenizers;

import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BpeTokenizer;

public class BpeAnalyzer
extends Analyzer {
    private final List<String> vocabulary;
    private final List<String> merges;
    private final List<String> neverSplit;
    private final boolean isPrefixSpace;
    private BpeTokenizer innerTokenizer;
    private final String unknownToken;

    public BpeAnalyzer(List<String> vocabulary, List<String> merges, List<String> neverSplit, boolean isPrefixSpace, String unknownToken) {
        this.vocabulary = vocabulary;
        this.merges = merges;
        this.neverSplit = neverSplit;
        this.isPrefixSpace = isPrefixSpace;
        this.unknownToken = unknownToken;
    }

    protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
        this.innerTokenizer = BpeTokenizer.build(this.neverSplit, this.vocabulary, this.merges, this.unknownToken, this.isPrefixSpace);
        return new Analyzer.TokenStreamComponents((Tokenizer)this.innerTokenizer);
    }

    public List<BpeTokenizer.BpeToken> getTokens() {
        if (this.innerTokenizer != null) {
            return this.innerTokenizer.getTokenizedValues();
        }
        return List.of();
    }
}

