/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.esql.expression.function.aggregate;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.compute.aggregation.AggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.CountDistinctBooleanAggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.CountDistinctBytesRefAggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.CountDistinctDoubleAggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.CountDistinctIntAggregatorFunctionSupplier;
import org.elasticsearch.compute.aggregation.CountDistinctLongAggregatorFunctionSupplier;
import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.Node;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.EsqlTypeResolutions;
import org.elasticsearch.xpack.esql.expression.Foldables;
import org.elasticsearch.xpack.esql.expression.SurrogateExpression;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.FunctionType;
import org.elasticsearch.xpack.esql.expression.function.OptionalArgument;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvCount;
import org.elasticsearch.xpack.esql.expression.function.scalar.multivalue.MvDedupe;
import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.esql.planner.ToAggregator;

public class CountDistinct
extends AggregateFunction
implements OptionalArgument,
ToAggregator,
SurrogateExpression {
    public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "CountDistinct", CountDistinct::new);
    private static final Map<DataType, Function<Integer, AggregatorFunctionSupplier>> SUPPLIERS = Map.ofEntries(Map.entry(DataType.BOOLEAN, precision -> new CountDistinctBooleanAggregatorFunctionSupplier()), Map.entry(DataType.LONG, CountDistinctLongAggregatorFunctionSupplier::new), Map.entry(DataType.DATETIME, CountDistinctLongAggregatorFunctionSupplier::new), Map.entry(DataType.DATE_NANOS, CountDistinctLongAggregatorFunctionSupplier::new), Map.entry(DataType.INTEGER, CountDistinctIntAggregatorFunctionSupplier::new), Map.entry(DataType.DOUBLE, CountDistinctDoubleAggregatorFunctionSupplier::new), Map.entry(DataType.KEYWORD, CountDistinctBytesRefAggregatorFunctionSupplier::new), Map.entry(DataType.IP, CountDistinctBytesRefAggregatorFunctionSupplier::new), Map.entry(DataType.VERSION, CountDistinctBytesRefAggregatorFunctionSupplier::new), Map.entry(DataType.TEXT, CountDistinctBytesRefAggregatorFunctionSupplier::new), Map.entry(DataType.TSID_DATA_TYPE, CountDistinctBytesRefAggregatorFunctionSupplier::new));
    private static final int DEFAULT_PRECISION = 3000;
    private final Expression precision;

    @FunctionInfo(returnType={"long"}, description="Returns the approximate number of distinct values.", note="[Counts are approximate](/reference/query-languages/esql/functions-operators/aggregation-functions.md#esql-agg-count-distinct-approximate).", appendix="### Counts are approximate [esql-agg-count-distinct-approximate]\n\nComputing exact counts requires loading values into a set and returning its\nsize. This doesn\u2019t scale when working on high-cardinality sets and/or large\nvalues as the required memory usage and the need to communicate those\nper-shard sets between nodes would utilize too many resources of the cluster.\n\nThis `COUNT_DISTINCT` function is based on the\n[HyperLogLog++](https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf)\nalgorithm, which counts based on the hashes of the values with some interesting\nproperties:\n\n:::{include} /reference/aggregations/_snippets/search-aggregations-metrics-cardinality-aggregation-explanation.md\n:::\n\nThe `COUNT_DISTINCT` function takes an optional second parameter to configure\nthe precision threshold. The `precision_threshold` options allows to trade memory\nfor accuracy, and defines a unique count below which counts are expected to be\nclose to accurate. Above this value, counts might become a bit more fuzzy. The\nmaximum supported value is `40000`, thresholds above this number will have the\nsame effect as a threshold of `40000`. The default value is `3000`.\n", type=FunctionType.AGGREGATE, examples={@Example(file="stats_count_distinct", tag="count-distinct"), @Example(description="With the optional second parameter to configure the precision threshold", file="stats_count_distinct", tag="count-distinct-precision"), @Example(description="The expression can use inline functions. This example splits a string into multiple values using the `SPLIT` function and counts the unique values", file="stats_count_distinct", tag="docsCountDistinctWithExpression")})
    public CountDistinct(Source source, @Param(name="field", type={"boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version", "_tsid"}, description="Column or literal for which to count the number of distinct values.") Expression field, @Param(optional=true, name="precision", type={"integer", "long", "unsigned_long"}, description="Precision threshold. Refer to <<esql-agg-count-distinct-approximate>>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000.") Expression precision) {
        this(source, field, (Expression)Literal.TRUE, (Expression)NO_WINDOW, precision);
    }

    public CountDistinct(Source source, Expression field, Expression filter, Expression window, Expression precision) {
        this(source, field, filter, window, (List<Expression>)(precision != null ? List.of(precision) : List.of()));
    }

    private CountDistinct(Source source, Expression field, Expression filter, Expression window, List<Expression> params) {
        super(source, field, filter, window, params);
        this.precision = params.size() > 0 ? params.get(0) : null;
    }

    private CountDistinct(StreamInput in) throws IOException {
        this(Source.readFrom((StreamInput)((PlanStreamInput)in)), (Expression)in.readNamedWriteable(Expression.class), (Expression)in.readNamedWriteable(Expression.class), CountDistinct.readWindow(in), in.readNamedWriteableCollectionAsList(Expression.class));
    }

    public String getWriteableName() {
        return CountDistinct.ENTRY.name;
    }

    protected NodeInfo<CountDistinct> info() {
        return NodeInfo.create((Node)this, CountDistinct::new, (Object)this.field(), (Object)this.filter(), (Object)this.window(), (Object)this.precision);
    }

    public CountDistinct replaceChildren(List<Expression> newChildren) {
        Expression precision = newChildren.size() > 3 ? newChildren.get(3) : null;
        return new CountDistinct(this.source(), newChildren.get(0), newChildren.get(1), newChildren.get(2), precision);
    }

    @Override
    public CountDistinct withFilter(Expression filter) {
        return new CountDistinct(this.source(), this.field(), filter, this.window(), this.precision);
    }

    public DataType dataType() {
        return DataType.LONG;
    }

    @Override
    protected Expression.TypeResolution resolveType() {
        if (!this.childrenResolved()) {
            return new Expression.TypeResolution("Unresolved children");
        }
        Expression.TypeResolution resolution = EsqlTypeResolutions.isExact(this.field(), this.sourceText(), TypeResolutions.ParamOrdinal.DEFAULT).and(TypeResolutions.isType((Expression)this.field(), SUPPLIERS::containsKey, (String)this.sourceText(), (TypeResolutions.ParamOrdinal)TypeResolutions.ParamOrdinal.DEFAULT, (String[])new String[]{"any exact type except unsigned_long, _source, or counter types"}));
        if (resolution.unresolved() || this.precision == null) {
            return resolution;
        }
        return TypeResolutions.isWholeNumber((Expression)this.precision, (String)this.sourceText(), (TypeResolutions.ParamOrdinal)TypeResolutions.ParamOrdinal.SECOND).and(TypeResolutions.isFoldable((Expression)this.precision, (String)this.sourceText(), (TypeResolutions.ParamOrdinal)TypeResolutions.ParamOrdinal.SECOND));
    }

    @Override
    public AggregatorFunctionSupplier supplier() {
        int precision;
        DataType type = this.field().dataType();
        int n = precision = this.precision == null ? 3000 : this.precisionValue();
        if (!SUPPLIERS.containsKey(type)) {
            throw EsqlIllegalArgumentException.illegalDataType(type);
        }
        return SUPPLIERS.get(type).apply(precision);
    }

    private int precisionValue() {
        return Foldables.intValueOf(this.precision, this.source().text(), "Precision");
    }

    @Override
    public Expression surrogate() {
        Source s = this.source();
        Expression field = this.field();
        return field.foldable() ? new ToLong(s, (Expression)new Coalesce(s, (Expression)new MvCount(s, (Expression)new MvDedupe(s, field)), List.of(new Literal(s, (Object)0, DataType.INTEGER)))) : null;
    }

    Expression precision() {
        return this.precision;
    }
}

