"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.getAnchorLogsForTimeRange = getAnchorLogsForTimeRange;
var _lodash = require("lodash");
var _crypto = require("crypto");
var _ecs_otel_fields = require("../../../utils/ecs_otel_fields");
var _get_typed_search = require("../../../utils/get_typed_search");
var _dsl_filters = require("../../../utils/dsl_filters");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

async function getAnchorLogsForTimeRange({
  esClient,
  logsIndices,
  startTime,
  endTime,
  kqlFilter,
  errorLogsOnly,
  correlationFields,
  logger,
  maxSequences
}) {
  const search = (0, _get_typed_search.getTypedSearch)(esClient.asCurrentUser);

  // Use aggregation-based approach to get diverse samples across all correlation fields.
  // This prevents "starvation" where a single sequence with many anchors would prevent other sequences from being discovered.
  const searchRequest = {
    size: 0,
    track_total_hits: false,
    index: logsIndices,
    query: {
      bool: {
        filter: [...(0, _dsl_filters.timeRangeFilter)('@timestamp', {
          start: startTime,
          end: endTime
        }), ...(0, _dsl_filters.kqlFilter)(kqlFilter),
        // must have at least one correlation field
        {
          bool: {
            should: correlationFields.map(field => ({
              exists: {
                field
              }
            })),
            minimum_should_match: 1
          }
        },
        // filter by error severity (default) or include all logs
        ...(errorLogsOnly ? [(0, _ecs_otel_fields.warningAndAboveLogFilter)()] : [])]
      }
    },
    aggs: buildDiversifiedSamplerAggregations(correlationFields, maxSequences)
  };
  const response = await search(searchRequest);
  const anchorLogs = parseAnchorLogsFromAggregations(response.aggregations, correlationFields);
  logger.debug(`Found ${anchorLogs.length} unique anchor logs across correlation fields`);
  return anchorLogs.slice(0, maxSequences);
}

/**
 * Generates a unique aggregation key from a field name.
 * Combines a human-readable sanitized name with a short hash to guarantee uniqueness.
 * e.g., 'trace.id' -> 'field_trace_id_a1b2c3', 'trace_id' -> 'field_trace_id_d4e5f6'
 */
function getAggNameForField(field) {
  const sanitized = field.replace(/[^a-zA-Z0-9]/g, '_');
  const hash = (0, _crypto.createHash)('sha256').update(field).digest('hex').slice(0, 6);
  return `field_${sanitized}_${hash}`;
}

/**
 * Builds aggregations for diverse sampling of anchor logs across multiple correlation fields.
 *
 * This uses a layered approach to handle high-cardinality fields efficiently:
 * 1. Filter (exists): Skips documents without the field (fast, uses inverted index)
 * 2. Diversified Sampler: Limits scope and ensures unique values per field
 * 3. Terms with execution_hint 'map': Avoids Global Ordinals memory overhead
 * 4. Top Hits: Fetches document metadata in a single pass
 */
function buildDiversifiedSamplerAggregations(correlationFields, maxSequences) {
  return correlationFields.reduce((acc, field) => {
    const aggName = getAggNameForField(field);
    const fieldAgg = {
      filter: {
        exists: {
          field
        }
      },
      aggs: {
        diverse_sampler: {
          diversified_sampler: {
            shard_size: Math.max(100, maxSequences * 10),
            // Oversample to improve diversity
            // shard_size: 1000, // Fixed oversampling to improve diversity
            field,
            max_docs_per_value: 1
          },
          aggs: {
            unique_values: {
              terms: {
                field,
                size: maxSequences,
                execution_hint: 'map' // Disables "Global Ordinals". This is critically important for high-cardinality fields
              },
              aggs: {
                anchor_doc: {
                  top_hits: {
                    size: 1,
                    _source: ['@timestamp']
                  }
                }
              }
            }
          }
        }
      }
    };
    return {
      ...acc,
      [aggName]: fieldAgg
    };
  }, {});
}
function parseAnchorLogsFromAggregations(aggregations, correlationFields) {
  if (!aggregations) return [];
  const allAnchors = correlationFields.flatMap(field => {
    var _filterAgg$diverse_sa, _filterAgg$diverse_sa2, _filterAgg$diverse_sa3;
    const aggName = getAggNameForField(field);
    const filterAgg = aggregations[aggName];
    if (!filterAgg) return [];
    const buckets = (_filterAgg$diverse_sa = (_filterAgg$diverse_sa2 = filterAgg.diverse_sampler) === null || _filterAgg$diverse_sa2 === void 0 ? void 0 : (_filterAgg$diverse_sa3 = _filterAgg$diverse_sa2.unique_values) === null || _filterAgg$diverse_sa3 === void 0 ? void 0 : _filterAgg$diverse_sa3.buckets) !== null && _filterAgg$diverse_sa !== void 0 ? _filterAgg$diverse_sa : [];
    return buckets.map(bucket => {
      var _firstHit$_source$Ti, _firstHit$_source, _firstHit$_id;
      const firstHit = bucket.anchor_doc.hits.hits[0];
      return {
        '@timestamp': (_firstHit$_source$Ti = firstHit === null || firstHit === void 0 ? void 0 : (_firstHit$_source = firstHit._source) === null || _firstHit$_source === void 0 ? void 0 : _firstHit$_source['@timestamp']) !== null && _firstHit$_source$Ti !== void 0 ? _firstHit$_source$Ti : '',
        correlation: {
          field,
          value: String(bucket.key),
          anchorLogId: (_firstHit$_id = firstHit === null || firstHit === void 0 ? void 0 : firstHit._id) !== null && _firstHit$_id !== void 0 ? _firstHit$_id : 'unknown'
        }
      };
    });
  });

  // Dedupe by anchor document ID first (keeps first occurrence = highest priority field).
  // When the same document has multiple correlation fields (e.g., trace.id AND request.id),
  // we keep only the one from the highest priority field since correlationFields is ordered.
  // Then dedupe by field+value to ensure each correlation identity appears only once.
  const dedupedByDocId = (0, _lodash.uniqBy)(allAnchors, anchor => anchor.correlation.anchorLogId);
  return (0, _lodash.uniqBy)(dedupedByDocId, anchor => `${anchor.correlation.field}_${anchor.correlation.value}`);
}