"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.fetchSignificantCategories = void 0;
var _lodash = require("lodash");
var _mlChi2test = require("@kbn/ml-chi2test");
var _mlAggUtils = require("@kbn/ml-agg-utils");
var _constants = require("../constants");
var _fetch_categories = require("./fetch_categories");
var _fetch_category_counts = require("./fetch_category_counts");
var _get_normalized_score = require("./get_normalized_score");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

const getCategoriesTestData = categories => {
  const categoriesBaselineTotalCount = getCategoriesTotalCount(categories);
  return categories.map(d => ({
    key: d.key,
    doc_count: d.count,
    percentage: d.count / categoriesBaselineTotalCount
  }));
};
const getCategoriesTotalCount = categories => categories.reduce((p, c) => p + c.count, 0);
const fetchSignificantCategories = async ({
  esClient,
  abortSignal,
  emitError,
  logger,
  arguments: args
}) => {
  // The default value of 1 means no sampling will be used
  const {
    fieldNames,
    sampleProbability = 1,
    ...params
  } = args;
  const categoriesOverall = await (0, _fetch_categories.fetchCategories)(esClient, params, fieldNames, logger, sampleProbability, emitError, abortSignal);
  if (categoriesOverall.length !== fieldNames.length) return [];
  const significantCategories = [];

  // Using for...of to allow `await` within the loop.
  for (const [i, fieldName] of fieldNames.entries()) {
    if (categoriesOverall[i].categories.length === 0) {
      continue;
    }
    const categoriesBaseline = await (0, _fetch_category_counts.fetchCategoryCounts)(esClient, params, fieldName, categoriesOverall[i], params.baselineMin, params.baselineMax, logger, emitError, abortSignal);
    const categoriesDeviation = await (0, _fetch_category_counts.fetchCategoryCounts)(esClient, params, fieldName, categoriesOverall[i], params.deviationMin, params.deviationMax, logger, emitError, abortSignal);
    const categoriesBaselineTotalCount = getCategoriesTotalCount(categoriesBaseline.categories);
    const categoriesBaselineTestData = getCategoriesTestData(categoriesBaseline.categories);
    const categoriesDeviationTotalCount = getCategoriesTotalCount(categoriesDeviation.categories);
    const categoriesDeviationTestData = getCategoriesTestData(categoriesDeviation.categories);

    // Get all unique keys from both arrays
    const allKeys = (0, _lodash.uniq)([...categoriesBaselineTestData.map(term => term.key.toString()), ...categoriesDeviationTestData.map(term => term.key.toString())]);
    allKeys.forEach(key => {
      var _deviationTerm$percen, _baselineTerm$percent;
      const categoryData = categoriesOverall[i].categories.find(c => c.key === key);
      const baselineTerm = categoriesBaselineTestData.find(term => term.key === key);
      const deviationTerm = categoriesDeviationTestData.find(term => term.key === key);
      const observed = (_deviationTerm$percen = deviationTerm === null || deviationTerm === void 0 ? void 0 : deviationTerm.percentage) !== null && _deviationTerm$percen !== void 0 ? _deviationTerm$percen : 0;
      const expected = (_baselineTerm$percent = baselineTerm === null || baselineTerm === void 0 ? void 0 : baselineTerm.percentage) !== null && _baselineTerm$percent !== void 0 ? _baselineTerm$percent : 0;
      const chiSquared = Math.pow(observed - expected, 2) / (expected > 0 ? expected : 1e-6); // Prevent divide by zero

      const pValue = (0, _mlChi2test.criticalTableLookup)(chiSquared, 1);
      const score = Math.log(pValue);
      if (pValue <= _constants.LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD && observed > expected) {
        var _categoryData$example, _deviationTerm$doc_co, _baselineTerm$doc_cou;
        significantCategories.push({
          key,
          fieldName,
          fieldValue: (_categoryData$example = categoryData === null || categoryData === void 0 ? void 0 : categoryData.examples[0]) !== null && _categoryData$example !== void 0 ? _categoryData$example : '',
          doc_count: (_deviationTerm$doc_co = deviationTerm === null || deviationTerm === void 0 ? void 0 : deviationTerm.doc_count) !== null && _deviationTerm$doc_co !== void 0 ? _deviationTerm$doc_co : 0,
          bg_count: (_baselineTerm$doc_cou = baselineTerm === null || baselineTerm === void 0 ? void 0 : baselineTerm.doc_count) !== null && _baselineTerm$doc_cou !== void 0 ? _baselineTerm$doc_cou : 0,
          total_doc_count: categoriesDeviationTotalCount,
          total_bg_count: categoriesBaselineTotalCount,
          score,
          pValue,
          normalizedScore: (0, _get_normalized_score.getNormalizedScore)(score),
          type: _mlAggUtils.SIGNIFICANT_ITEM_TYPE.LOG_PATTERN
        });
      }
    });
  }
  return significantCategories;
};
exports.fetchSignificantCategories = fetchSignificantCategories;