"use strict";

var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.getLogPatterns = getLogPatterns;
exports.runCategorizeTextAggregation = runCategorizeTextAggregation;
var _categorization_analyzer = require("@kbn/aiops-log-pattern-analysis/categorization_analyzer");
var _calculateAuto = require("@kbn/calculate-auto");
var _lodash = require("lodash");
var _moment = _interopRequireDefault(require("moment"));
var _esQuery = require("@kbn/es-query");
var _p_value_to_label = require("../../utils/p_value_to_label");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

async function runCategorizeTextAggregation({
  esClient,
  fields,
  metadata,
  index,
  query,
  samplingProbability,
  useMlStandardTokenizer,
  includeChanges,
  size,
  start,
  end
}) {
  const aggs = Object.fromEntries(fields.map(field => [field, {
    categorize_text: {
      field,
      min_doc_count: 1,
      size,
      categorization_analyzer: useMlStandardTokenizer ? {
        tokenizer: 'ml_standard',
        char_filter: [{
          type: 'pattern_replace',
          pattern: '\\\\n',
          replacement: ''
        }]
      } : _categorization_analyzer.categorizationAnalyzer
    },
    aggs: {
      minTimestamp: {
        min: {
          field: '@timestamp'
        }
      },
      maxTimestamp: {
        max: {
          field: '@timestamp'
        }
      },
      ...(includeChanges ? {
        timeseries: {
          date_histogram: {
            field: '@timestamp',
            min_doc_count: 0,
            extended_bounds: {
              min: start,
              max: end
            },
            fixed_interval: `${_calculateAuto.calculateAuto.atLeast(30, _moment.default.duration(end - start, 'ms')).asMilliseconds()}ms`
          }
        },
        changes: {
          change_point: {
            buckets_path: 'timeseries>_count'
          }
        }
      } : {}),
      sample: {
        top_hits: {
          size: 1,
          _source: false,
          fields: [field, ...metadata],
          sort: {
            _score: {
              order: 'desc'
            }
          },
          highlight: {
            fields: {
              '*': {}
            }
          }
        }
      }
    }
  }]));
  const response = await esClient.search('get_log_patterns', {
    index,
    size: 0,
    track_total_hits: false,
    timeout: '10s',
    query: {
      bool: {
        filter: [query, ...(0, _esQuery.dateRangeQuery)(start, end)]
      }
    },
    aggregations: {
      sampler: {
        random_sampler: {
          probability: samplingProbability
        },
        aggs
      }
    }
  });
  if (!response.aggregations) {
    return [];
  }
  const fieldAggregates = (0, _lodash.omit)(response.aggregations.sampler, 'seed', 'doc_count', 'probability');
  return Object.entries(fieldAggregates).flatMap(([fieldName, aggregate]) => {
    const buckets = aggregate.buckets;
    return buckets.map(bucket => {
      var _bucket$sample$hits$h;
      return {
        field: fieldName,
        count: bucket.doc_count,
        pattern: bucket.key,
        regex: bucket.regex,
        sample: bucket.sample.hits.hits[0].fields[fieldName][0],
        highlight: (_bucket$sample$hits$h = bucket.sample.hits.hits[0].highlight) !== null && _bucket$sample$hits$h !== void 0 ? _bucket$sample$hits$h : {},
        metadata: bucket.sample.hits.hits[0].fields,
        firstOccurrence: new Date(bucket.minTimestamp.value).toISOString(),
        lastOccurrence: new Date(bucket.maxTimestamp.value).toISOString(),
        ...('timeseries' in bucket ? {
          // @ts-expect-error timeseries result types can't be inferred
          timeseries: bucket.timeseries.buckets.map(dateBucket => ({
            x: dateBucket.key,
            y: dateBucket.doc_count
          })),
          // @ts-expect-error changes result types can't be inferred
          change: Object.entries(bucket.changes.type).map(([changePointType, change]) => {
            return {
              type: changePointType,
              significance:
              // @ts-expect-error changes result types can't be inferred
              change.p_value !== undefined ? (0, _p_value_to_label.pValueToLabel)(change.p_value) : null,
              // @ts-expect-error changes result types can't be inferred
              change_point: change.change_point,
              // @ts-expect-error changes result types can't be inferred
              p_value: change.p_value,
              timestamp:
              // @ts-expect-error changes result types can't be inferred
              change.change_point !== undefined ?
              // @ts-expect-error changes and timeseries result types can't be inferred
              bucket.timeseries.buckets[change.change_point].key_as_string : undefined
            };
          })[0]
        } : {})
      };
    });
  });
}
async function getLogPatterns({
  esClient,
  start,
  end,
  index,
  kql,
  includeChanges,
  metadata = [],
  fields
}) {
  const fieldCapsResponse = await esClient.fieldCaps('get_field_caps_for_log_pattern_analysis', {
    fields,
    index_filter: {
      bool: {
        filter: [...(0, _esQuery.dateRangeQuery)(start, end)]
      }
    },
    index,
    types: ['text', 'match_only_text']
  });
  const fieldsInFieldCaps = Object.keys(fieldCapsResponse.fields);
  if (!fieldsInFieldCaps.length) {
    return [];
  }
  const totalDocsResponse = await esClient.search('get_total_docs_for_log_pattern_analysis', {
    index,
    size: 0,
    track_total_hits: true,
    query: {
      bool: {
        filter: [...(0, _esQuery.kqlQuery)(kql), ...(0, _esQuery.dateRangeQuery)(start, end)]
      }
    }
  });
  const totalHits = totalDocsResponse.hits.total.value;
  if (totalHits === 0) {
    return [];
  }
  let samplingProbability = 100_000 / totalHits;
  if (samplingProbability >= 0.5) {
    samplingProbability = 1;
  }
  const fieldGroups = includeChanges ? fieldsInFieldCaps.map(field => [field]) : [fieldsInFieldCaps];
  const allPatterns = await Promise.all(fieldGroups.map(async fieldGroup => {
    const topMessagePatterns = await runCategorizeTextAggregation({
      esClient,
      index,
      fields: fieldGroup,
      query: {
        bool: {
          filter: (0, _esQuery.kqlQuery)(kql)
        }
      },
      samplingProbability,
      useMlStandardTokenizer: false,
      size: 100,
      start,
      end,
      includeChanges,
      metadata
    });
    if (topMessagePatterns.length === 0) {
      return [];
    }
    const patternsToExclude = topMessagePatterns.filter(pattern => pattern.count >= 50);
    const excludeQueries = patternsToExclude.map(pattern => {
      return {
        match: {
          [pattern.field]: {
            query: pattern.pattern,
            fuzziness: 0,
            operator: 'and',
            auto_generate_synonyms_phrase_query: false
          }
        }
      };
    });
    const rareMessagePatterns = await runCategorizeTextAggregation({
      esClient,
      index,
      fields: fieldGroup,
      start,
      end,
      query: {
        bool: {
          filter: (0, _esQuery.kqlQuery)(kql),
          must_not: excludeQueries
        }
      },
      size: 1000,
      includeChanges,
      samplingProbability: 1,
      useMlStandardTokenizer: true,
      metadata
    });
    return [...patternsToExclude, ...rareMessagePatterns];
  }));
  return (0, _lodash.uniqBy)((0, _lodash.orderBy)(allPatterns.flat(), pattern => pattern.count, 'desc'), pattern => pattern.sample);
}