"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.runEvaluations = void 0;
var _std = require("@kbn/std");
var _langsmith = require("langsmith");
var _evaluation = require("langsmith/evaluation");
var _get_evaluator_llm = require("../helpers/get_evaluator_llm");
var _get_custom_evaluator = require("../helpers/get_custom_evaluator");
var _get_default_prompt_template = require("../helpers/get_custom_evaluator/get_default_prompt_template");
var _get_graph_input_overrides = require("../helpers/get_graph_input_overrides");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

/**
 * Runs an evaluation for each graph so they show up separately (resulting in
 * each dataset run grouped by connector)
 */
const runEvaluations = async ({
  actionsClient,
  connectorTimeout,
  evaluatorConnectorId,
  datasetName,
  graphs,
  langSmithApiKey,
  logger
}) => (0, _std.asyncForEach)(graphs, async ({
  connector,
  graph,
  llmType,
  name,
  traceOptions
}) => {
  const subject = `connector "${connector.name}" (${llmType}), running experiment "${name}"`;
  try {
    logger.info(() => `Evaluating ${subject} with dataset "${datasetName}" and evaluator "${evaluatorConnectorId}"`);
    const predict = async input => {
      var _traceOptions$tracers;
      logger.debug(() => `Raw example Input for ${subject}":\n ${input}`);

      // The example `Input` may have overrides for the initial state of the graph:
      const overrides = (0, _get_graph_input_overrides.getGraphInputOverrides)(input);
      return graph.invoke({
        ...overrides
      }, {
        callbacks: [...((_traceOptions$tracers = traceOptions.tracers) !== null && _traceOptions$tracers !== void 0 ? _traceOptions$tracers : [])],
        runName: name,
        tags: ['evaluation', llmType !== null && llmType !== void 0 ? llmType : '']
      });
    };
    const llm = await (0, _get_evaluator_llm.getEvaluatorLlm)({
      actionsClient,
      connectorTimeout,
      evaluatorConnectorId,
      experimentConnector: connector,
      langSmithApiKey,
      logger
    });
    const customEvaluator = (0, _get_custom_evaluator.getCustomEvaluator)({
      criteria: 'correctness',
      key: 'attack_discovery_correctness',
      llm,
      template: (0, _get_default_prompt_template.getDefaultPromptTemplate)()
    });
    const evalOutput = await (0, _evaluation.evaluate)(predict, {
      client: new _langsmith.Client({
        apiKey: langSmithApiKey
      }),
      data: datasetName !== null && datasetName !== void 0 ? datasetName : '',
      evaluators: [customEvaluator],
      experimentPrefix: name,
      maxConcurrency: 5 // prevents rate limiting
    });
    logger.info(() => `Evaluation complete for ${subject}`);
    logger.debug(() => `Evaluation output for ${subject}:\n ${JSON.stringify(evalOutput, null, 2)}`);
  } catch (e) {
    logger.error(`Error evaluating ${subject}: ${e}`);
  }
});
exports.runEvaluations = runEvaluations;