responder/pkg/audiocap/gemini.go
2026-04-09 10:21:20 +02:00

222 lines
4.7 KiB
Go

package audiocap
import (
"context"
"errors"
"fmt"
"log"
"os"
"strings"
"sync"
"time"
"github.com/google/generative-ai-go/genai"
"google.golang.org/api/option"
)
const (
Reset = "\033[0m"
Dim = "\033[2m"
Red = "\033[31m"
Green = "\033[32m"
Yellow = "\033[33m"
Cyan = "\033[36m"
)
type Responder struct {
tts *Client
BufferedResponse string
mu sync.Mutex
running bool
genaiClient *genai.Client
model *genai.GenerativeModel
}
func NewResponder(ctx context.Context, ttsClient *Client) (*Responder, error) {
apiKey := os.Getenv("GEMINI_API_KEY")
if apiKey == "" {
return nil, errors.New("GEMINI_API_KEY not set")
}
client, err := genai.NewClient(ctx, option.WithAPIKey(apiKey))
if err != nil {
return nil, fmt.Errorf("genai client: %w", err)
}
model := client.GenerativeModel("gemini-2.5-flash")
model.SetTemperature(0.43)
model.SystemInstruction = &genai.Content{
Parts: []genai.Part{genai.Text(fmt.Sprintf(sysPrompt, userCV))},
}
model.SafetySettings = []*genai.SafetySetting{
{Category: genai.HarmCategoryHarassment, Threshold: genai.HarmBlockNone},
{Category: genai.HarmCategoryHateSpeech, Threshold: genai.HarmBlockNone},
{Category: genai.HarmCategorySexuallyExplicit, Threshold: genai.HarmBlockNone},
{Category: genai.HarmCategoryDangerousContent, Threshold: genai.HarmBlockNone},
}
return &Responder{
tts: ttsClient,
genaiClient: client,
model: model,
}, nil
}
func (r *Responder) ResponseDaemon(ctx context.Context) {
ticker := time.NewTicker(2 * time.Second)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
puffMsg := r.Get() // In case a final text was transmitted during speak. 2s is fine.
if puffMsg != "" {
r.GetResponse("")
}
}
}
}
const sysPrompt = `You are a user at an interview, give one short suitable and good response that fit into a job interview.
Respond directly with the answer just if you are spoken directly to.
This is YOUR CV:
%s`
func (r *Responder) Set(message string) {
r.mu.Lock()
r.BufferedResponse = r.BufferedResponse + message
r.mu.Unlock()
}
func (r *Responder) GetAndClear() string {
r.mu.Lock()
res := r.BufferedResponse
r.BufferedResponse = ""
r.mu.Unlock()
return res
}
func (r *Responder) Get() string {
r.mu.Lock()
res := r.BufferedResponse
r.mu.Unlock()
return res
}
func (r *Responder) Clear() {
r.mu.Lock()
r.BufferedResponse = ""
r.mu.Unlock()
}
type HistoryMessage struct {
Role string
Message string
}
type HistoryT struct {
History []HistoryMessage
my sync.Mutex
}
var History HistoryT
func (h *HistoryT) Set(role, message string) {
h.my.Lock()
h.History = append(h.History, HistoryMessage{
Role: role,
Message: message,
})
h.my.Unlock()
}
func (h *HistoryT) GetLastN(n int) string {
h.my.Lock()
defer h.my.Unlock()
start := 0
if len(h.History) > n {
start = len(h.History) - n
}
var sb strings.Builder
for _, m := range h.History[start:] {
fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Message)
}
return sb.String()
}
func buildPrompt(question string) string {
return fmt.Sprintf(`Last 5 messages:
%s
CURRENT QUESTION: %s`, History.GetLastN(5), question)
}
func (r *Responder) GetResponse(question string) {
go func() {
if question != "" {
r.Set(question + " ")
}
r.mu.Lock()
if r.running {
r.mu.Unlock()
return
}
r.running = true
r.mu.Unlock()
finalQuestion := r.GetAndClear()
prompt := buildPrompt(finalQuestion) // Wenn sie laufen soll, den Buffer leeren.
res, err := r.callGemini(prompt)
if err != nil {
log.Printf("❌ gemini: %v", err)
r.mu.Lock()
r.running = false
r.mu.Unlock()
return
}
fmt.Printf("%s🤖 %s%s\n", Green, res, Reset)
History.Set("user", finalQuestion)
History.Set("model", res)
if r.tts != nil {
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
fmt.Printf("%s🤖 %s%s\n", Cyan, "Speak Start!", Reset)
if err := r.tts.Speak(ctx, res); err != nil {
log.Printf("%s❌ tts: %v%s", Red, err, Reset)
} else {
fmt.Printf("%s🤖 %s%s\n", Cyan, "Speak End!", Reset)
}
}
r.mu.Lock()
r.running = false
r.mu.Unlock()
}()
}
func (r *Responder) callGemini(prompt string) (string, error) {
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
resp, err := r.model.GenerateContent(ctx, genai.Text(prompt))
if err != nil {
return "", fmt.Errorf("generate: %w", err)
}
if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
return "", errors.New("no content")
}
text, ok := resp.Candidates[0].Content.Parts[0].(genai.Text)
if !ok {
return "", errors.New("unexpected content type")
}
return string(text), nil
}