68 lines
1.6 KiB
Go
68 lines
1.6 KiB
Go
package audiocap
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os/exec"
|
|
|
|
texttospeech "cloud.google.com/go/texttospeech/apiv1"
|
|
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
|
|
)
|
|
|
|
type Client struct {
|
|
client *texttospeech.Client
|
|
sink string // pulse sink to play into
|
|
}
|
|
|
|
func New(ctx context.Context, sink string) (*Client, error) {
|
|
c, err := texttospeech.NewClient(ctx)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("tts client: %w", err)
|
|
}
|
|
return &Client{client: c, sink: sink}, nil
|
|
}
|
|
|
|
func (c *Client) Close() error {
|
|
return c.client.Close()
|
|
}
|
|
|
|
// Speak synthesizes text and plays it into the configured sink.
|
|
func (c *Client) Speak(ctx context.Context, text string) error {
|
|
req := &texttospeechpb.SynthesizeSpeechRequest{
|
|
Input: &texttospeechpb.SynthesisInput{
|
|
InputSource: &texttospeechpb.SynthesisInput_Text{Text: text},
|
|
},
|
|
Voice: &texttospeechpb.VoiceSelectionParams{
|
|
LanguageCode: "en-US",
|
|
Name: "en-US-Neural2-D",
|
|
},
|
|
AudioConfig: &texttospeechpb.AudioConfig{
|
|
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
|
|
SampleRateHertz: 24000,
|
|
},
|
|
}
|
|
|
|
resp, err := c.client.SynthesizeSpeech(ctx, req)
|
|
if err != nil {
|
|
return fmt.Errorf("synthesize: %w", err)
|
|
}
|
|
|
|
// LINEAR16 from Google TTS is raw PCM wrapped in a WAV header.
|
|
// paplay handles WAV directly.
|
|
cmd := exec.CommandContext(ctx, "paplay",
|
|
"--device="+c.sink,
|
|
)
|
|
stdin, err := cmd.StdinPipe()
|
|
if err != nil {
|
|
return fmt.Errorf("stdin: %w", err)
|
|
}
|
|
if err := cmd.Start(); err != nil {
|
|
return fmt.Errorf("paplay: %w", err)
|
|
}
|
|
if _, err := stdin.Write(resp.AudioContent); err != nil {
|
|
return fmt.Errorf("write: %w", err)
|
|
}
|
|
stdin.Close()
|
|
return cmd.Wait()
|
|
}
|