interface EvalConfig {
    evaluatorType: keyof EvaluatorType;
    formatEvaluatorInputs: EvaluatorInputFormatter;
    agentTools?: StructuredToolInterface[];
    chainOptions?: Partial<Omit<LLMEvalChainInput<EvalOutputType, BaseLanguageModelInterface>, "llm">>;
    criteria?: "detail" | ConstitutionalPrinciple | {
        [key: string]: string;
    } | "conciseness" | "relevance" | "correctness" | "coherence" | "harmfulness" | "maliciousness" | "helpfulness" | "controversiality" | "misogyny" | "criminality" | "insensitivity" | "depth" | "creativity";
    distanceMetric?: "cosine" | "euclidean" | "manhattan" | "chebyshev";
    embedding?: any;
    feedbackKey?: string;
    llm?: any;
}

Hierarchy (view full)

Properties

evaluatorType: keyof EvaluatorType

The name of the evaluator to use. Example: labeled_criteria, criteria, etc.

formatEvaluatorInputs: EvaluatorInputFormatter

Convert the evaluation data into formats that can be used by the evaluator. This should most commonly be a string. Parameters are the raw input from the run, the raw output, raw reference output, and the raw run.

Example

// Chain input: { input: "some string" }
// Chain output: { output: "some output" }
// Reference example output format: { output: "some reference output" }
const formatEvaluatorInputs = ({
rawInput,
rawPrediction,
rawReferenceOutput,
}) => {
return {
input: rawInput.input,
prediction: rawPrediction.output,
reference: rawReferenceOutput.output,
};
};

Returns

The prepared data.

agentTools?: StructuredToolInterface[]

A list of tools available to the agent, for TrajectoryEvalChain.

chainOptions?: Partial<Omit<LLMEvalChainInput<EvalOutputType, BaseLanguageModelInterface>, "llm">>
criteria?: "detail" | ConstitutionalPrinciple | {
    [key: string]: string;
} | "conciseness" | "relevance" | "correctness" | "coherence" | "harmfulness" | "maliciousness" | "helpfulness" | "controversiality" | "misogyny" | "criminality" | "insensitivity" | "depth" | "creativity"

The criteria to use for the evaluator.

Type declaration

  • [key: string]: string
distanceMetric?: "cosine" | "euclidean" | "manhattan" | "chebyshev"

The distance metric to use for comparing the embeddings.

embedding?: any

The embedding objects to vectorize the outputs.

feedbackKey?: string

The feedback (or metric) name to use for the logged evaluation results. If none provided, we default to the evaluationName.

llm?: any

Generated using TypeDoc