feat: implement unified media preview and transcription extraction

This commit is contained in:
AI Engineer 2026-05-13 23:29:43 +08:00
parent 762370e839
commit 6573dd6f24
3 changed files with 177 additions and 7 deletions

View File

@ -81,14 +81,17 @@ frame.Blur(2.0)
vision.Save(frame, "preview.jpg")
```
### 5. 验证码生成
### 5. 多媒体预览与转写转码
针对 Web 端、列表缩略图或语音转写场景的一站式优化预览。支持自动缩放并裁剪以适应指定尺寸。
```go
captcha := vision.GenerateCaptcha(&vision.CaptchaOption{
Length: 6,
Width: 200,
Height: 60,
})
vision.Save(captcha, "captcha.png")
// 生成图片缩略图 (WebP, 自动填充/裁剪)
vision.GenerateImagePreview("photo.jpg", "thumb.webp", 200, 200)
// 生成 4 帧动画 WebP (自动填充/裁剪, 效果动态)
vision.GenerateVideoPreview("movie.mp4", "preview.webp", 320, 180)
// 提取音频预览片段 (16kHz Ogg Opus, 最长 3 分钟)
vision.GenerateAudioPreview("input.mp4", "preview.ogg")
```
## 🛠 API 概览

79
preview.go Normal file
View File

@ -0,0 +1,79 @@
package vision
import (
"fmt"
"os"
"os/exec"
"path/filepath"
)
// GenerateImagePreview 生成图片预览 (WebP)
// 支持缩放并裁剪以填充指定尺寸 (Fill 模式)
func GenerateImagePreview(srcPath, outPath string, width, height int) error {
c, err := Load(srcPath)
if err != nil {
return err
}
c.Fill(width, height)
return Save(c, outPath)
}
// GenerateVideoPreview 生成视频预览 (4帧动画 WebP)
// 支持缩放并裁剪以填充指定尺寸 (Fill 模式)
func GenerateVideoPreview(videoPath, outPath string, width, height int) error {
v, err := NewVideo()
if err != nil {
return err
}
duration, err := getVideoDuration(videoPath)
if err != nil {
return err
}
times := []float64{0, duration * 0.33, duration * 0.67, duration * 0.90}
tmpDir, _ := os.MkdirTemp("", "frames")
defer os.RemoveAll(tmpDir)
// 使用 ffmpeg 的 scale 和 crop 滤镜实现 Fill 效果
// force_original_aspect_ratio=increase 确保图片至少覆盖目标尺寸
// crop=w:h 裁剪中心区域
vf := fmt.Sprintf("scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d", width, height, width, height)
for i, t := range times {
framePath := filepath.Join(tmpDir, fmt.Sprintf("frame_%d.png", i))
cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, framePath)
if err := cmd.Run(); err != nil {
return err
}
}
cmd := exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%d.png"),
"-c:v", "libwebp", "-lossless", "0", "-quality", "70", "-loop", "0", outPath)
return cmd.Run()
}
// GenerateAudioPreview 提取 3 分钟内的音频用于转写/预览
// 格式: Ogg Opus, 16kHz, 单声道
func GenerateAudioPreview(mediaPath, outPath string) error {
v, err := NewVideo()
if err != nil {
return err
}
// -vn: 禁用视频
// -c:a libopus: 高效音频压缩
// -ar 16000: 采样率 16k
// -t 180: 最长 180 秒
cmd := exec.Command(v.FFmpegPath, "-i", mediaPath, "-vn", "-c:a", "libopus", "-ar", "16000", "-ac", "1", "-t", "180", outPath)
return cmd.Run()
}
func getVideoDuration(videoPath string) (float64, error) {
out, err := exec.Command("ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", videoPath).Output()
if err != nil {
return 0, err
}
var duration float64
_, err = fmt.Sscanf(string(out), "%f", &duration)
return duration, err
}

88
preview_test.go Normal file
View File

@ -0,0 +1,88 @@
package vision
import (
"fmt"
"image/color"
"os"
"os/exec"
"path/filepath"
"testing"
"github.com/fogleman/gg"
)
func TestPreviewer(t *testing.T) {
// 1. 创建测试环境
tmpDir, err := os.MkdirTemp("", "vision_test")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(tmpDir)
videoPath := filepath.Join(tmpDir, "test.mp4")
webPPath := filepath.Join(tmpDir, "preview.webp")
oggPath := filepath.Join(tmpDir, "preview.ogg")
// 2. 模拟生成素材 (生成 5 张纯色帧图片用于合成视频)
imgPattern := filepath.Join(tmpDir, "frame_%d.png")
for i := 0; i < 5; i++ {
dc := gg.NewContext(320, 240)
dc.SetColor(color.RGBA{uint8(i * 50), 100, 200, 255})
dc.Clear()
dc.SavePNG(fmt.Sprintf(imgPattern, i))
}
// 使用现有的 vision.Video 逻辑生成视频
v, err := NewVideo()
if err != nil {
t.Fatal(err)
}
// 这里通过 ffmpeg 拼接图片并添加一个静音音频产生视频
err = v.CreateVideoFromImages(filepath.Join(tmpDir, "frame_%d.png"), 1, videoPath)
if err != nil {
t.Skip("FFmpeg video generation failed, skipping integration test")
}
// 为测试视频添加静音音频,否则提取音频会失败
audioPath := filepath.Join(tmpDir, "silent.aac")
err = exec.Command("ffmpeg", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=mono", "-t", "5", "-c:a", "aac", audioPath).Run()
if err == nil {
finalVideo := filepath.Join(tmpDir, "final.mp4")
exec.Command("ffmpeg", "-i", videoPath, "-i", audioPath, "-c", "copy", finalVideo).Run()
videoPath = finalVideo
}
// 3. 测试 Preview 功能
t.Run("GenerateImagePreview", func(t *testing.T) {
imgPath := filepath.Join(tmpDir, "frame_0.png")
previewImgPath := filepath.Join(tmpDir, "img_preview.webp")
err := GenerateImagePreview(imgPath, previewImgPath, 100, 100)
if err != nil {
t.Errorf("GenerateImagePreview failed: %v", err)
}
if _, err := os.Stat(previewImgPath); os.IsNotExist(err) {
t.Error("Image preview output not created")
}
})
t.Run("GenerateVideoPreview", func(t *testing.T) {
err := GenerateVideoPreview(videoPath, webPPath, 160, 120)
if err != nil {
t.Errorf("GenerateVideoPreview failed: %v", err)
}
if _, err := os.Stat(webPPath); os.IsNotExist(err) {
t.Error("WebP output not created")
}
})
t.Run("GenerateAudioPreview", func(t *testing.T) {
err := GenerateAudioPreview(videoPath, oggPath)
if err != nil {
t.Errorf("GenerateAudioPreview failed: %v", err)
}
if _, err := os.Stat(oggPath); os.IsNotExist(err) {
t.Error("Ogg output not created")
}
})
}