package audiocap import ( "context" "errors" "fmt" "log" "os" "strings" "sync" "time" "github.com/google/generative-ai-go/genai" "google.golang.org/api/option" ) const ( Reset = "\033[0m" Dim = "\033[2m" Red = "\033[31m" Green = "\033[32m" Yellow = "\033[33m" Cyan = "\033[36m" ) type Responder struct { tts *Client BufferedResponse string mu sync.Mutex running bool genaiClient *genai.Client model *genai.GenerativeModel } func NewResponder(ctx context.Context, ttsClient *Client) (*Responder, error) { apiKey := os.Getenv("GEMINI_API_KEY") if apiKey == "" { return nil, errors.New("GEMINI_API_KEY not set") } client, err := genai.NewClient(ctx, option.WithAPIKey(apiKey)) if err != nil { return nil, fmt.Errorf("genai client: %w", err) } model := client.GenerativeModel("gemini-2.5-flash") model.SetTemperature(0.43) model.SystemInstruction = &genai.Content{ Parts: []genai.Part{genai.Text(fmt.Sprintf(sysPrompt, userCV))}, } model.SafetySettings = []*genai.SafetySetting{ {Category: genai.HarmCategoryHarassment, Threshold: genai.HarmBlockNone}, {Category: genai.HarmCategoryHateSpeech, Threshold: genai.HarmBlockNone}, {Category: genai.HarmCategorySexuallyExplicit, Threshold: genai.HarmBlockNone}, {Category: genai.HarmCategoryDangerousContent, Threshold: genai.HarmBlockNone}, } return &Responder{ tts: ttsClient, genaiClient: client, model: model, }, nil } func (r *Responder) ResponseDaemon(ctx context.Context) { ticker := time.NewTicker(2 * time.Second) for { select { case <-ctx.Done(): return case <-ticker.C: puffMsg := r.Get() // In case a final text was transmitted during speak. 2s is fine. if puffMsg != "" { r.GetResponse("") } } } } const sysPrompt = `You are a user at an interview, give one short suitable and good response that fit into a job interview. Respond directly with the answer just if you are spoken directly to. This is YOUR CV: %s` func (r *Responder) Set(message string) { r.mu.Lock() r.BufferedResponse = r.BufferedResponse + message r.mu.Unlock() } func (r *Responder) GetAndClear() string { r.mu.Lock() res := r.BufferedResponse r.BufferedResponse = "" r.mu.Unlock() return res } func (r *Responder) Get() string { r.mu.Lock() res := r.BufferedResponse r.mu.Unlock() return res } func (r *Responder) Clear() { r.mu.Lock() r.BufferedResponse = "" r.mu.Unlock() } type HistoryMessage struct { Role string Message string } type HistoryT struct { History []HistoryMessage my sync.Mutex } var History HistoryT func (h *HistoryT) Set(role, message string) { h.my.Lock() h.History = append(h.History, HistoryMessage{ Role: role, Message: message, }) h.my.Unlock() } func (h *HistoryT) GetLastN(n int) string { h.my.Lock() defer h.my.Unlock() start := 0 if len(h.History) > n { start = len(h.History) - n } var sb strings.Builder for _, m := range h.History[start:] { fmt.Fprintf(&sb, "%s: %s\n", m.Role, m.Message) } return sb.String() } func buildPrompt(question string) string { return fmt.Sprintf(`Last 5 messages: %s CURRENT QUESTION: %s`, History.GetLastN(5), question) } func (r *Responder) GetResponse(question string) { go func() { if question != "" { r.Set(question + " ") } r.mu.Lock() if r.running { r.mu.Unlock() return } r.running = true r.mu.Unlock() finalQuestion := r.GetAndClear() prompt := buildPrompt(finalQuestion) // Wenn sie laufen soll, den Buffer leeren. res, err := r.callGemini(prompt) if err != nil { log.Printf("āŒ gemini: %v", err) r.mu.Lock() r.running = false r.mu.Unlock() return } fmt.Printf("%sšŸ¤– %s%s\n", Green, res, Reset) History.Set("user", finalQuestion) History.Set("model", res) if r.tts != nil { ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) defer cancel() fmt.Printf("%sšŸ¤– %s%s\n", Cyan, "Speak Start!", Reset) if err := r.tts.Speak(ctx, res); err != nil { log.Printf("%sāŒ tts: %v%s", Red, err, Reset) } else { fmt.Printf("%sšŸ¤– %s%s\n", Cyan, "Speak End!", Reset) } } r.mu.Lock() r.running = false r.mu.Unlock() }() } func (r *Responder) callGemini(prompt string) (string, error) { ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() resp, err := r.model.GenerateContent(ctx, genai.Text(prompt)) if err != nil { return "", fmt.Errorf("generate: %w", err) } if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 { return "", errors.New("no content") } text, ok := resp.Candidates[0].Content.Parts[0].(genai.Text) if !ok { return "", errors.New("unexpected content type") } return string(text), nil }