Skip to content

Support templates in our string evaluators #60

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions cmd/eval/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ func (h *evalCommandHandler) runEvaluators(ctx context.Context, testCase map[str
func (h *evalCommandHandler) runSingleEvaluator(ctx context.Context, evaluator prompt.Evaluator, testCase map[string]interface{}, response string) (EvaluationResult, error) {
switch {
case evaluator.String != nil:
return h.runStringEvaluator(evaluator.Name, *evaluator.String, response)
return h.runStringEvaluator(evaluator.Name, *evaluator.String, testCase, response)
case evaluator.LLM != nil:
return h.runLLMEvaluator(ctx, evaluator.Name, *evaluator.LLM, testCase, response)
case evaluator.Uses != "":
Expand All @@ -366,23 +366,39 @@ func (h *evalCommandHandler) runSingleEvaluator(ctx context.Context, evaluator p
}
}

func (h *evalCommandHandler) runStringEvaluator(name string, eval prompt.StringEvaluator, response string) (EvaluationResult, error) {
func (h *evalCommandHandler) runStringEvaluator(name string, eval prompt.StringEvaluator, testCase map[string]interface{}, response string) (EvaluationResult, error) {
var passed bool
var details string

switch {
case eval.Equals != "":
passed = response == eval.Equals
details = fmt.Sprintf("Expected exact match: '%s'", eval.Equals)
equals, err := h.templateString(eval.Equals, testCase)
if err != nil {
return EvaluationResult{}, fmt.Errorf("failed to template message content: %w", err)
}
Comment on lines +376 to +378
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I do agree with Copilot in that this looks a little gnarly. But with the way that Go wants to handle errors we kinda need this everywhere.

That is unless, we create the helper method here — that should there be an error, we just default back to the provided string. But open to suggestions/opinions.

passed = response == equals
details = fmt.Sprintf("Expected exact match: '%s'", equals)
case eval.Contains != "":
passed = strings.Contains(strings.ToLower(response), strings.ToLower(eval.Contains))
details = fmt.Sprintf("Expected to contain: '%s'", eval.Contains)
contains, err := h.templateString(eval.Contains, testCase)
if err != nil {
return EvaluationResult{}, fmt.Errorf("failed to template message content: %w", err)
}
passed = strings.Contains(strings.ToLower(response), strings.ToLower(contains))
details = fmt.Sprintf("Expected to contain: '%s'", contains)
case eval.StartsWith != "":
passed = strings.HasPrefix(strings.ToLower(response), strings.ToLower(eval.StartsWith))
details = fmt.Sprintf("Expected to start with: '%s'", eval.StartsWith)
startsWith, err := h.templateString(eval.StartsWith, testCase)
if err != nil {
return EvaluationResult{}, fmt.Errorf("failed to template message content: %w", err)
}
passed = strings.HasPrefix(strings.ToLower(response), strings.ToLower(startsWith))
details = fmt.Sprintf("Expected to start with: '%s'", startsWith)
case eval.EndsWith != "":
passed = strings.HasSuffix(strings.ToLower(response), strings.ToLower(eval.EndsWith))
details = fmt.Sprintf("Expected to end with: '%s'", eval.EndsWith)
endsWith, err := h.templateString(eval.EndsWith, testCase)
if err != nil {
return EvaluationResult{}, fmt.Errorf("failed to template message content: %w", err)
}
passed = strings.HasSuffix(strings.ToLower(response), strings.ToLower(endsWith))
details = fmt.Sprintf("Expected to end with: '%s'", endsWith)
default:
return EvaluationResult{}, errors.New("no string evaluation criteria specified")
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/eval/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ evaluators:

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := handler.runStringEvaluator("test", tt.evaluator, tt.response)
result, err := handler.runStringEvaluator("test", tt.evaluator, map[string]interface{}{}, tt.response)
require.NoError(t, err)
require.Equal(t, tt.expected, result.Passed)
if tt.expected {
Expand Down
4 changes: 3 additions & 1 deletion examples/sample_prompt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ modelParameters:
maxTokens: 50
testData:
- input: 'hello world'
string: hello
expected: 'greeting response'
- input: 'goodbye world'
string: goodbye
expected: 'farewell response'
messages:
- role: system
Expand All @@ -17,6 +19,6 @@ messages:
evaluators:
- name: string evaluator
string:
contains: world
contains: '{{string}}'
- name: similarity check
uses: github/similarity
Loading