vision/preview.go

120 lines
3.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package vision
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
)
// GenerateImagePreview 生成图片预览
// 支持缩放并裁剪以填充指定尺寸 (Fill 模式)
func GenerateImagePreview(srcPath, outPath string, width, height int) error {
// 使用统一的 Load() 加载,内部已处理好 HEIC/sips/FFmpeg 的复杂格式兼容
c, err := Load(srcPath)
if err != nil {
return err
}
c.Fill(width, height)
if strings.HasSuffix(strings.ToLower(outPath), ".webp") {
// 借用 FFmpeg 将生成的画布转为高质量 WebP
tmpFile := filepath.Join(os.TempDir(), fmt.Sprintf("preview_%d.png", os.Getpid()))
defer os.Remove(tmpFile)
if err := Save(c, tmpFile); err != nil {
return err
}
v, err := NewVideo()
if err == nil {
cmd := exec.Command(v.FFmpegPath, "-i", tmpFile, "-c:v", "libwebp", "-quality", "80", "-y", outPath)
if err := cmd.Run(); err == nil {
return nil
}
}
}
return Save(c, outPath)
}
// GenerateVideoPreview 生成视频预览 (动态 WebP)
// 支持缩放并裁剪以填充指定尺寸 (Fill 模式)
func GenerateVideoPreview(videoPath, outPath string, width, height int) error {
v, err := NewVideo()
if err != nil {
return err
}
duration, err := getVideoDuration(videoPath)
if err != nil {
return err
}
// 动态计算帧数: 适合交给大模型 (VLM) 处理
// 避免过多的帧浪费 Token每 30 秒 1 帧,最少 3 帧,最多 8 帧
frameCount := int(duration / 30.0)
if frameCount < 3 {
frameCount = 3
} else if frameCount > 8 {
frameCount = 8
}
// 在 10% 到 90% 之间均匀采样,跳过片头片尾的可能黑屏
times := make([]float64, frameCount)
if frameCount == 1 {
times[0] = duration * 0.5
} else {
for i := 0; i < frameCount; i++ {
times[i] = duration * (0.10 + 0.80*(float64(i)/float64(frameCount-1)))
}
}
tmpDir, _ := os.MkdirTemp("", "frames")
defer os.RemoveAll(tmpDir)
// 使用 ffmpeg 的 scale 和 crop 滤镜实现 Fill 效果
vf := fmt.Sprintf("scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d", width, height, width, height)
for i, t := range times {
framePath := filepath.Join(tmpDir, fmt.Sprintf("frame_%02d.png", i))
// 使用 -ss 快速 seek性能极高
cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, "-y", framePath)
if err := cmd.Run(); err != nil {
return err
}
}
// 生成 WebP 动画,每秒 1 帧 (1000ms 间隔,让大模型能看清每一帧)
cmd := exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%02d.png"),
"-c:v", "libwebp", "-lossless", "0", "-quality", "70", "-loop", "0", "-y", outPath)
return cmd.Run()
}
// GenerateAudioPreview 提取 3 分钟内的音频用于预览或语音转写
// 格式: Ogg Opus, 16kHz, 单声道, 12kbps (极致压缩,保留人声特征)
func GenerateAudioPreview(mediaPath, outPath string) error {
v, err := NewVideo()
if err != nil {
return err
}
// -vn: 禁用视频
// -c:a libopus: 高效音频压缩
// -ar 16000: 采样率 16k (转写标准)
// -ac 1: 单声道
// -b:a 12k: 极致压缩
// -t 180: 最长 180 秒 (足以获得内容概要)
cmd := exec.Command(v.FFmpegPath, "-i", mediaPath, "-vn", "-c:a", "libopus", "-ar", "16000", "-ac", "1", "-b:a", "12k", "-t", "180", "-y", outPath)
return cmd.Run()
}
func getVideoDuration(videoPath string) (float64, error) {
out, err := exec.Command("ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", videoPath).Output()
if err != nil {
return 0, err
}
var duration float64
_, err = fmt.Sscanf(strings.TrimSpace(string(out)), "%f", &duration)
return duration, err
}