"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.extractGrokPatternDangerouslySlow = extractGrokPatternDangerouslySlow;
var _normalize_columns = require("./normalize_columns");
var _tokenize_lines = require("./tokenize_lines");
var _mask_first_pass_patterns = require("./mask_first_pass_patterns");
var _find_delimiter = require("./find_delimiter");
var _find_consistent_split_chars = require("./find_consistent_split_chars");
var _get_useful_groups = require("./get_useful_groups");
var _flatten_groups = require("./flatten_groups");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

/**
 * WARNING: DO NOT RUN THIS FUNCTION ON THE MAIN THREAD
 *
 * Extracts structured fields (nodes) from an array of log messages by analyzing
 * patterns, delimiters, and column structures.
 *
 * This function performs multiple passes to identify consistent tokenization patterns
 * and normalize the data into a structured format. It is computationally intensive
 * and should not be run on the main thread.
 *
 * Steps:
 * 1. Masks specific patterns (e.g. quoted strings, parentheses) in the messages.
 * 2. Detects the most likely delimiter (e.g. whitespace, `|`, `;`).
 * 3. Splits messages into columns using the detected delimiter and tokenizes them.
 * 4. Identifies consistent split characters for further tokenization.
 * 5. Refines tokenization using consistent split characters.
 * 6. Normalizes columns into a unified structure across all messages.
 * 7. Identifies useful columns and collapses others into a single GREEDYDATA column.
 * 8. Flattens the structured columns into a list of tokens with delimiters inlined.
 */
function extractGrokPatternDangerouslySlow(messages) {
  if (!messages.length) {
    return [];
  }

  // 1. Mask messages by matching highly specific patterns, quoted strings and parenthesis
  const maskedMessages = messages.map(_mask_first_pass_patterns.maskFirstPassPatterns);

  // 2. Find the most likely delimiter (e.g. `\s`, `|`, `;`)
  const delimiter = (0, _find_delimiter.findDelimiter)(maskedMessages.map(({
    masked
  }) => masked));

  // 3. Split each message into columns (using the detected delimiter) and tokenize those columns (using all possible split chars)
  const firstPassColumnsPerLine = (0, _tokenize_lines.tokenizeLines)(maskedMessages, delimiter);

  // 4. Find consistent split characters for each message by ruling out split chars that do not create consistent token counts (e.g. `[/path/to/file]` vs. `[/file]`)
  const nextPassSplitChars = (0, _find_consistent_split_chars.findConsistentSplitChars)(firstPassColumnsPerLine);

  // 5. Split each message into columns (using the detected delimiter) and tokenize them (using only the split chars that produced consistent token counts)
  const columnsPerLine = (0, _tokenize_lines.tokenizeLines)(maskedMessages, delimiter, nextPassSplitChars);

  // 6. Normalize columns for each line into one single set of columns that represents a common structure
  const normalizedColumns = (0, _normalize_columns.normalizeColumns)(columnsPerLine);

  // 7. Determine which columns contain useful information and collapse the rest into a single GREEDYDATA column
  const groups = (0, _get_useful_groups.getUsefulGroups)(normalizedColumns);

  // 8. Flatten all columns into a single list of tokens with whitespace and delimiter characters inlined
  const nodes = (0, _flatten_groups.flattenGroups)(groups, delimiter);
  return nodes;
}