/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.esql.expression.function.scalar.string;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.inference.ChunkingSettings;
import org.elasticsearch.inference.ChunkingStrategy;
import org.elasticsearch.xpack.core.inference.chunking.Chunker;
import org.elasticsearch.xpack.core.inference.chunking.ChunkerBuilder;
import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsBuilder;
import org.elasticsearch.xpack.core.inference.chunking.ChunkingSettingsOptions;
import org.elasticsearch.xpack.core.inference.chunking.SentenceBoundaryChunkingSettings;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.MapExpression;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.Node;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo;
import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.MapParam;
import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.string.ChunkBytesRefEvaluator;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;

public class Chunk
extends EsqlScalarFunction
implements OptionalArgument {
    public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Chunk", Chunk::new);
    static final int DEFAULT_CHUNK_SIZE = 300;
    public static final ChunkingSettings DEFAULT_CHUNKING_SETTINGS = new SentenceBoundaryChunkingSettings(Integer.valueOf(300), Integer.valueOf(0));
    private final Expression field;
    private final Expression chunkingSettings;
    public static final Map<String, DataType> ALLOWED_CHUNKING_SETTING_OPTIONS = Map.ofEntries(Map.entry(ChunkingSettingsOptions.STRATEGY.toString(), DataType.KEYWORD), Map.entry(ChunkingSettingsOptions.MAX_CHUNK_SIZE.toString(), DataType.INTEGER), Map.entry(ChunkingSettingsOptions.OVERLAP.toString(), DataType.INTEGER), Map.entry(ChunkingSettingsOptions.SENTENCE_OVERLAP.toString(), DataType.INTEGER), Map.entry(ChunkingSettingsOptions.SEPARATOR_GROUP.toString(), DataType.KEYWORD), Map.entry(ChunkingSettingsOptions.SEPARATORS.toString(), DataType.KEYWORD));

    @FunctionInfo(appliesTo={@FunctionAppliesTo(lifeCycle=FunctionAppliesToLifecycle.PREVIEW, version="9.3.0")}, returnType={"keyword"}, preview=true, description="Use `CHUNK` to split a text field into smaller chunks.", detailedDescription="    Chunk can be used on fields from the text famiy like <<text, text>> and <<semantic-text, semantic_text>>.\n    Chunk will split a text field into smaller chunks, using a sentence-based chunking strategy.\n    The number of chunks returned, and the length of the sentences used to create the chunks can be specified.\n", examples={@Example(file="chunk", tag="chunk-with-field", applies_to="stack: preview 9.3.0"), @Example(file="chunk", tag="chunk-with-chunking-settings", applies_to="stack: preview 9.3.0")})
    public Chunk(Source source, @Param(name="field", type={"keyword", "text"}, description="The input to chunk.") Expression field, @MapParam(name="chunking_settings", description="Options to customize chunking behavior. Defaults to {\"strategy\":\"sentence\",\"max_chunk_size\":300,\"sentence_overlap\":0}.", optional=true, params={@MapParam.MapParamEntry(name="strategy", type={"keyword"}, description="The chunking strategy to use. Default value is `sentence`.", valueHint={"sentence", "word", "none", "recursive"}), @MapParam.MapParamEntry(name="max_chunk_size", type={"integer"}, description="The maximum size of a chunk in words. This value cannot be lower than `20` (for `sentence` strategy)\nor `10` (for `word` or `recursive` strategies). This model should not exceed the window size for any\nassociated models using the output of this function.\n", valueHint={"300"}), @MapParam.MapParamEntry(name="overlap", type={"integer"}, description="The number of overlapping words for chunks. It is applicable only to a `word` chunking strategy.\nThis value cannot be higher than half the `max_chunk_size` value.\n", valueHint={"0"}), @MapParam.MapParamEntry(name="sentence_overlap", type={"integer"}, description="The number of overlapping sentences for chunks. It is applicable only for a `sentence` chunking strategy.\nIt can be either `1` or `0`.\n", valueHint={"1", "0"}), @MapParam.MapParamEntry(name="separator_group", type={"keyword"}, description="Sets a predefined lists of separators based on the selected text type. Values may be `markdown` or `plaintext`.\nOnly applicable to the `recursive` chunking strategy. When using the `recursive` chunking strategy one of\n`separators` or `separator_group` must be specified.\n", valueHint={"markdown", "plaintext"}), @MapParam.MapParamEntry(name="separators", type={"keyword"}, description="A list of strings used as possible split points when chunking text. Each string can be a plain string or a\nregular expression (regex) pattern. The system tries each separator in order to split the text, starting from\nthe first item in the list. After splitting, it attempts to recombine smaller pieces into larger chunks that stay\nwithin the `max_chunk_size` limit, to reduce the total number of chunks generated. Only applicable to the\n`recursive` chunking strategy. When using the `recursive` chunking strategy one of `separators` or `separator_group`\nmust be specified.\n", valueHint={"(?<!\\n)\\n\\n(?!\\n)", "(?<!\\n)\\n(?!\\n)"})}) Expression chunkingSettings) {
        super(source, chunkingSettings == null ? List.of(field) : List.of(field, chunkingSettings));
        this.field = field;
        this.chunkingSettings = chunkingSettings;
    }

    public Chunk(StreamInput in) throws IOException {
        this(Source.readFrom((StreamInput)((PlanStreamInput)in)), (Expression)in.readNamedWriteable(Expression.class), (Expression)in.readOptionalNamedWriteable(Expression.class));
    }

    public void writeTo(StreamOutput out) throws IOException {
        this.source().writeTo(out);
        out.writeNamedWriteable((NamedWriteable)this.field);
        out.writeOptionalNamedWriteable((NamedWriteable)this.chunkingSettings);
    }

    public String getWriteableName() {
        return Chunk.ENTRY.name;
    }

    public DataType dataType() {
        return DataType.KEYWORD;
    }

    protected Expression.TypeResolution resolveType() {
        if (!this.childrenResolved()) {
            return new Expression.TypeResolution("Unresolved children");
        }
        return TypeResolutions.isString((Expression)this.field(), (String)this.sourceText(), (TypeResolutions.ParamOrdinal)TypeResolutions.ParamOrdinal.FIRST).and(this::validateChunkingSettings);
    }

    private Expression.TypeResolution validateChunkingSettings() {
        if (this.chunkingSettings == null) {
            return Expression.TypeResolution.TYPE_RESOLVED;
        }
        if (!(this.chunkingSettings instanceof MapExpression)) {
            return new Expression.TypeResolution("invalid chunking_settings, found [" + this.chunkingSettings.sourceText() + "]");
        }
        MapExpression chunkingSettingsMap = (MapExpression)this.chunkingSettings;
        List<String> errors = chunkingSettingsMap.keyFoldedMap().entrySet().stream().filter(e -> !(e.getValue() instanceof Literal)).map(e -> "invalid option for [" + (String)e.getKey() + "], expected a constant, found [" + String.valueOf(((Expression)e.getValue()).dataType()) + "]").toList();
        if (!errors.isEmpty()) {
            return new Expression.TypeResolution(String.join((CharSequence)"; ", errors));
        }
        try {
            Chunk.toChunkingSettings(chunkingSettingsMap);
        }
        catch (IllegalArgumentException e2) {
            return new Expression.TypeResolution(e2.getMessage());
        }
        return Expression.TypeResolution.TYPE_RESOLVED;
    }

    public boolean foldable() {
        return this.field().foldable() && (this.chunkingSettings() == null || this.chunkingSettings().foldable());
    }

    public Expression replaceChildren(List<Expression> newChildren) {
        return new Chunk(this.source(), newChildren.get(0), newChildren.size() > 1 ? newChildren.get(1) : null);
    }

    protected NodeInfo<? extends Expression> info() {
        return NodeInfo.create((Node)this, Chunk::new, (Object)this.field, (Object)this.chunkingSettings);
    }

    Expression field() {
        return this.field;
    }

    Expression chunkingSettings() {
        return this.chunkingSettings;
    }

    static void process(BytesRefBlock.Builder builder, BytesRef str, ChunkingSettings chunkingSettings) {
        boolean multivalued;
        String content = str.utf8ToString();
        List<String> chunks = Chunk.chunkText(content, chunkingSettings);
        boolean bl = multivalued = chunks.size() > 1;
        if (multivalued) {
            builder.beginPositionEntry();
        }
        for (String chunk : chunks) {
            builder.appendBytesRef(new BytesRef((CharSequence)chunk.trim()));
        }
        if (multivalued) {
            builder.endPositionEntry();
        }
    }

    public static List<String> chunkText(String content, ChunkingSettings chunkingSettings) {
        Chunker chunker = ChunkerBuilder.fromChunkingStrategy((ChunkingStrategy)chunkingSettings.getChunkingStrategy());
        return chunker.chunk(content, chunkingSettings).stream().map(offset -> content.substring(offset.start(), offset.end())).toList();
    }

    public boolean equals(Object o) {
        if (o == null || this.getClass() != o.getClass()) {
            return false;
        }
        Chunk chunk = (Chunk)o;
        return Objects.equals(this.field(), chunk.field()) && Objects.equals(this.chunkingSettings(), chunk.chunkingSettings());
    }

    public int hashCode() {
        return Objects.hash(this.field(), this.chunkingSettings());
    }

    @Override
    public EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) {
        ChunkingSettings chunkingSettings = DEFAULT_CHUNKING_SETTINGS;
        if (this.chunkingSettings() != null) {
            chunkingSettings = Chunk.toChunkingSettings((MapExpression)this.chunkingSettings());
        }
        return new ChunkBytesRefEvaluator.Factory(this.source(), toEvaluator.apply(this.field), chunkingSettings);
    }

    private static ChunkingSettings toChunkingSettings(MapExpression map) {
        Map<String, Object> chunkingSettingsMap = map.keyFoldedMap().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> {
            Object value = ((Expression)e.getValue()).fold(FoldContext.small());
            if (value instanceof BytesRef) {
                BytesRef bytesRef = (BytesRef)value;
                return bytesRef.utf8ToString();
            }
            if (value instanceof List) {
                List list = (List)value;
                return list.stream().map(item -> item instanceof BytesRef ? ((BytesRef)item).utf8ToString() : item).toList();
            }
            return value;
        }));
        return ChunkingSettingsBuilder.fromMap(chunkingSettingsMap);
    }
}

