feat(vision): enhance GenerateVideoPreview with format detection and frame interval

This commit is contained in:
AI Engineer 2026-05-17 12:46:45 +08:00
parent 1f60b4cb22
commit 5a06c07cb6
5 changed files with 91 additions and 27 deletions

View File

@ -87,9 +87,15 @@ vision.Save(frame, "preview.jpg")
// 生成图片缩略图 (WebP, 自动填充/裁剪) // 生成图片缩略图 (WebP, 自动填充/裁剪)
vision.GenerateImagePreview("photo.jpg", "thumb.webp", 200, 200) vision.GenerateImagePreview("photo.jpg", "thumb.webp", 200, 200)
// 生成 4 帧动画 WebP (自动填充/裁剪, 效果动态) // 生成动画预览 (WebP/GIF, 默认 30s 采样一帧,自动填充/裁剪)
vision.GenerateVideoPreview("movie.mp4", "preview.webp", 320, 180) vision.GenerateVideoPreview("movie.mp4", "preview.webp", 320, 180)
// 生成单张预览图 (JPG/PNG, 取视频中间帧)
vision.GenerateVideoPreview("movie.mp4", "preview.jpg", 320, 180)
// 提取多张预览帧到文件夹 (输出 1.webp, 2.webp...)
vision.GenerateVideoPreview("movie.mp4", "frames_dir", 320, 180)
// 提取音频预览片段 (16kHz Ogg Opus, 最长 3 分钟) // 提取音频预览片段 (16kHz Ogg Opus, 最长 3 分钟)
vision.GenerateAudioPreview("input.mp4", "preview.ogg") vision.GenerateAudioPreview("input.mp4", "preview.ogg")
``` ```
@ -119,7 +125,7 @@ vision --data "https://apigo.cc" -o qr.png --size 512
vision in.png -o out.png --resize 800x600 --blur 1.5 --grayscale vision in.png -o out.png --resize 800x600 --blur 1.5 --grayscale
# 5. 生成视频动态预览 (WebP) # 5. 生成视频动态预览 (WebP)
vision video.mp4 --type video -o preview.webp --width 320 --height 180 vision video.mp4 --type video -o preview.webp --width 320 --height 180 --step 30
# 6. 生成验证码 # 6. 生成验证码
vision --captcha -o captcha.png --len 6 vision --captcha -o captcha.png --len 6

View File

@ -39,6 +39,7 @@ var (
// 视频 // 视频
vtime = flag.Float64("time", 0, "提取视频帧的时间点 (秒)") vtime = flag.Float64("time", 0, "提取视频帧的时间点 (秒)")
vstep = flag.Int("step", 30, "视频预览采样间隔 (秒,默认 30)")
) )
const visionVersion = "1.0.0" const visionVersion = "1.0.0"
@ -160,7 +161,7 @@ func runPreview(src string) {
case "image": case "image":
err = vision.GenerateImagePreview(src, *outFile, w, h) err = vision.GenerateImagePreview(src, *outFile, w, h)
case "video": case "video":
err = vision.GenerateVideoPreview(src, *outFile, w, h) err = vision.GenerateVideoPreview(src, *outFile, w, h, *vstep)
case "audio": case "audio":
err = vision.GenerateAudioPreview(src, *outFile) err = vision.GenerateAudioPreview(src, *outFile)
default: default:

BIN
main

Binary file not shown.

View File

@ -38,9 +38,13 @@ func GenerateImagePreview(srcPath, outPath string, width, height int) error {
return Save(c, outPath) return Save(c, outPath)
} }
// GenerateVideoPreview 生成视频预览 (动态 WebP) // GenerateVideoPreview 生成视频预览
// 支持缩放并裁剪以填充指定尺寸 (Fill 模式) // 根据 outPath 后缀判断输出格式:
func GenerateVideoPreview(videoPath, outPath string, width, height int) error { // - .webp | .gif: 生成动态动画 (默认每 30 秒采样一帧,可通过 frameInterval 调整)
// - .jpg | .jpeg | .png: 生成单张预览图 (取视频中间帧)
// - 其他: 将 outPath 视为文件夹,在其中生成多张静态 .webp 图像
// frameInterval: 每隔多少秒采样一帧,默认 30。
func GenerateVideoPreview(videoPath, outPath string, width, height int, frameInterval ...int) error {
v, err := NewVideo() v, err := NewVideo()
if err != nil { if err != nil {
return err return err
@ -51,44 +55,71 @@ func GenerateVideoPreview(videoPath, outPath string, width, height int) error {
return err return err
} }
// 动态计算帧数: 适合交给大模型 (VLM) 处理 ext := strings.ToLower(filepath.Ext(outPath))
// 避免过多的帧浪费 Token每 30 秒 1 帧,最少 3 帧,最多 8 帧 vf := fmt.Sprintf("scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d", width, height, width, height)
frameCount := int(duration / 30.0)
// 1. 单张图片模式
if ext == ".jpg" || ext == ".jpeg" || ext == ".png" {
t := duration * 0.5
cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, "-y", outPath)
return cmd.Run()
}
// 2. 动画或多图模式需要计算多帧
interval := 30
if len(frameInterval) > 0 && frameInterval[0] > 0 {
interval = frameInterval[0]
}
// 动态计算帧数: 避免过多的帧浪费 Token每 interval 秒 1 帧,最少 3 帧,最多 8 帧
frameCount := int(duration / float64(interval))
if frameCount < 3 { if frameCount < 3 {
frameCount = 3 frameCount = 3
} else if frameCount > 8 { } else if frameCount > 8 {
frameCount = 8 frameCount = 8
} }
// 在 10% 到 90% 之间均匀采样,跳过片头片尾的可能黑屏
times := make([]float64, frameCount) times := make([]float64, frameCount)
if frameCount == 1 { for i := 0; i < frameCount; i++ {
times[0] = duration * 0.5 times[i] = duration * (0.10 + 0.80*(float64(i)/float64(frameCount-1)))
} else {
for i := 0; i < frameCount; i++ {
times[i] = duration * (0.10 + 0.80*(float64(i)/float64(frameCount-1)))
}
} }
tmpDir, _ := os.MkdirTemp("", "frames") // 2a. 动画模式 (.webp, .gif)
defer os.RemoveAll(tmpDir) if ext == ".webp" || ext == ".gif" {
tmpDir, _ := os.MkdirTemp("", "frames")
defer os.RemoveAll(tmpDir)
// 使用 ffmpeg 的 scale 和 crop 滤镜实现 Fill 效果 for i, t := range times {
vf := fmt.Sprintf("scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d", width, height, width, height) framePath := filepath.Join(tmpDir, fmt.Sprintf("frame_%02d.png", i))
cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, "-y", framePath)
if err := cmd.Run(); err != nil {
return err
}
}
var cmd *exec.Cmd
if ext == ".webp" {
cmd = exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%02d.png"),
"-c:v", "libwebp", "-lossless", "0", "-quality", "70", "-loop", "0", "-y", outPath)
} else {
cmd = exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%02d.png"), "-y", outPath)
}
return cmd.Run()
}
// 2b. 文件夹多图模式
if err := os.MkdirAll(outPath, 0755); err != nil {
return err
}
for i, t := range times { for i, t := range times {
framePath := filepath.Join(tmpDir, fmt.Sprintf("frame_%02d.png", i)) framePath := filepath.Join(outPath, fmt.Sprintf("%d.webp", i+1))
// 使用 -ss 快速 seek性能极高 cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, "-c:v", "libwebp", "-quality", "80", "-y", framePath)
cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, "-y", framePath)
if err := cmd.Run(); err != nil { if err := cmd.Run(); err != nil {
return err return err
} }
} }
// 生成 WebP 动画,每秒 1 帧 (1000ms 间隔,让大模型能看清每一帧) return nil
cmd := exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%02d.png"),
"-c:v", "libwebp", "-lossless", "0", "-quality", "70", "-loop", "0", "-y", outPath)
return cmd.Run()
} }
// GenerateAudioPreview 提取 3 分钟内的音频用于预览或语音转写 // GenerateAudioPreview 提取 3 分钟内的音频用于预览或语音转写

View File

@ -76,6 +76,32 @@ func TestPreviewer(t *testing.T) {
} }
}) })
t.Run("GenerateVideoPreview_SingleImage", func(t *testing.T) {
jpgPath := filepath.Join(tmpDir, "preview.jpg")
err := GenerateVideoPreview(videoPath, jpgPath, 160, 120)
if err != nil {
t.Errorf("GenerateVideoPreview (jpg) failed: %v", err)
}
if _, err := os.Stat(jpgPath); os.IsNotExist(err) {
t.Error("JPG output not created")
}
})
t.Run("GenerateVideoPreview_Directory", func(t *testing.T) {
dirPath := filepath.Join(tmpDir, "frames_dir")
err := GenerateVideoPreview(videoPath, dirPath, 160, 120)
if err != nil {
t.Errorf("GenerateVideoPreview (dir) failed: %v", err)
}
files, err := os.ReadDir(dirPath)
if err != nil {
t.Fatalf("ReadDir failed: %v", err)
}
if len(files) == 0 {
t.Error("No frames generated in directory")
}
})
t.Run("GenerateAudioPreview", func(t *testing.T) { t.Run("GenerateAudioPreview", func(t *testing.T) {
err := GenerateAudioPreview(videoPath, oggPath) err := GenerateAudioPreview(videoPath, oggPath)
if err != nil { if err != nil {