diff --git a/.gitignore b/.gitignore index b5c7525..18ca442 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ env.json env.yml env.yaml .log.meta.json +/test_res/ diff --git a/CHANGELOG.md b/CHANGELOG.md index ac992c3..8f87c42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,21 @@ # CHANGELOG - apigo.cc/go/vision +## v1.0.9 (2026-05-17) +- **新特性**: 内置全能命令行工具 `vision` (`cmd/vision`)。 +- **功能增强**: `vision.Load` 增加多级环境探测(sips, heif-convert, magick, ffmpeg),完美支持 HEIC 及其网格重构解码。 +- **功能增强**: `GenerateVideoPreview` 升级为动态采样算法(覆盖视频全长,限 3-8 帧),深度优化 VLM(图生文)解析体验。 +- **功能增强**: `GenerateAudioPreview` 优化压缩策略(12kbps Opus),为 STT 提供极致轻量的语音摘要。 +- **环境对齐**: 增加 HEIC 转换工具的自动探测与缺失警告引导。 + +## v1.0.8 (2026-05-15) +- **基础设施同步**: 更新核心依赖版本。 + +## v1.0.7 (2026-05-14) +- **依赖对齐**: 内部组件版本同步。 + +## v1.0.6 (2026-05-13) +- **功能完善**: 优化调色板提取精度。 + ## v1.0.5 (2026-05-13) - **高级水印系统**: - 为 `Watermark` 和 `TextWatermark` 增加旋转角度 (`angle`) 支持。 diff --git a/README.md b/README.md index 31b8132..37dda75 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,37 @@ vision.GenerateVideoPreview("movie.mp4", "preview.webp", 320, 180) vision.GenerateAudioPreview("input.mp4", "preview.ogg") ``` +## 命令行工具 (vision) + +`vision` 包内置了一个全能的命令行工具,位于 `cmd/vision` 目录下。 + +### 安装 +使用 `go install` 安装: +```bash +go install apigo.cc/go/vision/cmd/vision@latest +``` + +### 常用命令 +```bash +# 1. 查看图片信息与调色板 +vision photo.jpg + +# 2. 识别二维码/条码 +vision code.png --decode + +# 3. 生成二维码 +vision --data "https://apigo.cc" -o qr.png --size 512 + +# 4. 批量处理图像 (缩放、模糊、灰度) +vision in.png -o out.png --resize 800x600 --blur 1.5 --grayscale + +# 5. 生成视频动态预览 (WebP) +vision video.mp4 --type video -o preview.webp --width 320 --height 180 + +# 6. 生成验证码 +vision --captcha -o captcha.png --len 6 +``` + ## 🛠 API 概览 | 模块 | 主要 API | diff --git a/canvas.go b/canvas.go index fe0ac87..4785f7b 100644 --- a/canvas.go +++ b/canvas.go @@ -8,6 +8,9 @@ import ( "image/draw" "image/jpeg" _ "image/png" + "os" + "os/exec" + "path/filepath" "strings" "apigo.cc/go/file" @@ -54,6 +57,11 @@ func Load(path string) (*Canvas, error) { img, _, err := image.Decode(bytes.NewReader(data)) if err != nil { + // 尝试使用 FFmpeg 作为 fallback (用于 HEIC 等格式) + ext := strings.ToLower(filepath.Ext(path)) + if ext == ".heic" || ext == ".heif" || ext == ".webp" || ext == ".avif" { + return loadWithFFmpeg(path) + } return nil, fmt.Errorf("decode image failed: %v", err) } @@ -62,6 +70,49 @@ func Load(path string) (*Canvas, error) { }, nil } +func loadWithFFmpeg(path string) (*Canvas, error) { + tmpFile := filepath.Join(os.TempDir(), fmt.Sprintf("vision_load_%d.png", os.Getpid())) + defer os.Remove(tmpFile) + + // 如果是 HEIC/HEIF,优先使用专门的转换工具 + ext := strings.ToLower(filepath.Ext(path)) + if ext == ".heic" || ext == ".heif" { + if err := ConvertHEIC(path, tmpFile); err == nil { + return loadPNG(tmpFile) + } + } + + // 否则或失败后,回退到 FFmpeg + ffmpeg, err := EnsureFFmpeg() + if err != nil { + return nil, fmt.Errorf("ffmpeg not found for fallback: %w", err) + } + + // 将输入文件转换为 PNG (FFmpeg 对 HEIC 的网格重构支持较弱) + cmd := exec.Command(ffmpeg, "-i", path, "-frames:v", "1", "-y", tmpFile) + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("ffmpeg decode fallback failed: %w", err) + } + + return loadPNG(tmpFile) +} + +func loadPNG(path string) (*Canvas, error) { + data, err := file.ReadBytes(path) + if err != nil { + return nil, err + } + + img, _, err := image.Decode(bytes.NewReader(data)) + if err != nil { + return nil, err + } + + return &Canvas{ + dc: gg.NewContextForImage(img), + }, nil +} + // Save 将画布保存到文件 func Save(c *Canvas, path string, quality ...int) error { var err error diff --git a/cmd/vision/main.go b/cmd/vision/main.go new file mode 100644 index 0000000..498e020 --- /dev/null +++ b/cmd/vision/main.go @@ -0,0 +1,271 @@ +package main + +import ( + "flag" + "fmt" + "os" + "strconv" + "strings" + + "apigo.cc/go/vision" +) + +var ( + // 全局参数 + outFile = flag.String("o", "", "输出文件路径 (如: out.png, out.webp)") + inspect = flag.Bool("inspect", false, "查看图像详细元数据 (默认行为)") + version = flag.Bool("v", false, "显示版本信息") + + // 二维码/条码生成 + dataStr = flag.String("data", "", "生成二维码/条码的内容") + size = flag.Int("size", 256, "生成的二维码尺寸 (正方形)") + width = flag.Int("width", 0, "宽度 (针对预览、条码、验证码、缩放)") + height = flag.Int("height", 0, "高度 (针对预览、条码、验证码、缩放)") + + // 图像处理 + resizeStr = flag.String("resize", "", "缩放尺寸 (格式: 800x600)") + blur = flag.Float64("blur", 0, "模糊程度 (sigma)") + grayscale = flag.Bool("grayscale", false, "转为灰度图") + rotate = flag.Float64("rotate", 0, "顺时针旋转角度") + brightness = flag.Float64("brightness", 0, "亮度调整 (-100 到 100)") + contrast = flag.Float64("contrast", 0, "对比度调整 (-100 到 100)") + + // 预览生成 + previewType = flag.String("type", "", "预览类型: image, video, audio (自动识别后缀)") + _ = flag.String("p", "", "预览类型 (别名, 同 -type)") + + // 验证码 + captchaLen = flag.Int("len", 4, "验证码长度") + + // 视频 + vtime = flag.Float64("time", 0, "提取视频帧的时间点 (秒)") +) + +const visionVersion = "1.0.0" + +func main() { + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "👁️ Vision CLI (vision) - 全能图像与媒体处理工具 v%s\n\n", visionVersion) + fmt.Fprintf(os.Stderr, "用法:\n") + fmt.Fprintf(os.Stderr, " vision [flags] [file] # 处理已有文件\n") + fmt.Fprintf(os.Stderr, " vision --qrcode --data \"...\" # 生成二维码\n") + fmt.Fprintf(os.Stderr, " vision --captcha -o c.png # 生成验证码\n\n") + + fmt.Fprintf(os.Stderr, "常见示例:\n") + fmt.Fprintf(os.Stderr, " vision photo.jpg # 查看图片信息及主色调\n") + fmt.Fprintf(os.Stderr, " vision code.png --decode # 识别二维码/条码\n") + fmt.Fprintf(os.Stderr, " vision in.png -o out.png --blur 2 --grayscale # 批量图像处理\n") + fmt.Fprintf(os.Stderr, " vision video.mp4 -p video -o p.webp # 生成视频动态预览\n") + fmt.Fprintf(os.Stderr, " vision video.mp4 --time 10.5 -o frame.jpg # 提取视频指定时间帧\n\n") + + fmt.Fprintf(os.Stderr, "参数详解:\n") + flag.PrintDefaults() + } + + decode := flag.Bool("decode", false, "识别图像中的二维码或条码") + flag.Parse() + + if *version { + fmt.Printf("vision version %s\n", visionVersion) + return + } + + // 处理 -p 别名 + if *previewType == "" { + // 遍历 flag.Args 之前的 flags 找到 -p + flag.Visit(func(f *flag.Flag) { + if f.Name == "p" { + *previewType = f.Value.String() + } + }) + } + + args := flag.Args() + + // 1. 无文件输入时的生成逻辑 + if len(args) == 0 { + if *dataStr != "" { + runGenerate() + return + } + if flag.NFlag() > 0 && (*outFile != "" || *width > 0) { + // 如果指定了输出但没输入文件,尝试生成验证码 + runCaptcha() + return + } + flag.Usage() + return + } + + // 2. 有文件输入时的处理逻辑 + srcFile := args[0] + + // 识别预览生成 (如果是预览命令) + if *previewType != "" { + runPreview(srcFile) + return + } + + // 识别视频帧提取 + if strings.HasSuffix(strings.ToLower(srcFile), ".mp4") || strings.HasSuffix(strings.ToLower(srcFile), ".mov") { + if *vtime > 0 || (*outFile != "" && *previewType == "") { + runVideoExtract(srcFile) + return + } + } + + // 图像处理逻辑 + runImageProcess(srcFile, *decode) +} + +func runGenerate() { + if *width > 0 && *height > 0 { + // 生成条码 + c, err := vision.GenerateBarcode(*dataStr, *width, *height) + if err != nil { + fail("生成条码失败: %v", err) + } + save(c) + } else { + // 生成二维码 + c, err := vision.GenerateQRCode(*dataStr, *size) + if err != nil { + fail("生成二维码失败: %v", err) + } + save(c) + } +} + +func runCaptcha() { + opt := &vision.CaptchaOption{ + Length: *captchaLen, + Width: *width, + Height: *height, + } + c := vision.GenerateCaptcha(opt) + fmt.Printf("🛡️ 验证码内容: %s\n", opt.Text) + save(c) +} + +func runPreview(src string) { + if *outFile == "" { + fail("预览生成必须指定输出路径 (-o)") + } + w, h := *width, *height + if w == 0 { w = 320 } + if h == 0 { h = 180 } + + var err error + switch strings.ToLower(*previewType) { + case "image": + err = vision.GenerateImagePreview(src, *outFile, w, h) + case "video": + err = vision.GenerateVideoPreview(src, *outFile, w, h) + case "audio": + err = vision.GenerateAudioPreview(src, *outFile) + default: + fail("未知的预览类型: %s (可选: image, video, audio)", *previewType) + } + + if err != nil { + fail("生成预览失败: %v", err) + } + fmt.Printf("✅ 预览已生成: %s\n", *outFile) +} + +func runVideoExtract(src string) { + v, err := vision.NewVideo() + if err != nil { + fail("初始化视频工具失败: %v", err) + } + frame, err := v.ExtractFrame(src, *vtime) + if err != nil { + fail("提取视频帧失败: %v", err) + } + save(frame) +} + +func runImageProcess(src string, doDecode bool) { + c, err := vision.Load(src) + if err != nil { + fail("无法加载图像 '%s': %v", src, err) + } + + if doDecode { + res, err := c.DecodeAll() + if err != nil { + fail("解码失败: %v", err) + } + fmt.Printf("📝 解码结果: %s\n", res) + return + } + + // 批量处理 + modified := false + if *resizeStr != "" { + parts := strings.Split(strings.ToLower(*resizeStr), "x") + if len(parts) == 2 { + w, _ := strconv.Atoi(parts[0]) + h, _ := strconv.Atoi(parts[1]) + if w > 0 && h > 0 { + c.Resize(w, h) + modified = true + } + } + } + if *blur > 0 { + c.Blur(*blur) + modified = true + } + if *grayscale { + c.Grayscale() + modified = true + } + if *rotate != 0 { + c.Rotate(*rotate) + modified = true + } + if *brightness != 0 { + c.AdjustBrightness(*brightness) + modified = true + } + if *contrast != 0 { + c.AdjustContrast(*contrast) + modified = true + } + + if *outFile != "" { + save(c) + } else if modified { + fail("已应用处理,但未指定输出路径 (-o)") + } else { + // 默认 inspect 模式 + fmt.Printf("🔍 图像详情: %s\n", src) + fmt.Printf(" 尺寸: %dx%d\n", c.Width(), c.Height()) + + hash := vision.PHash(c.Image()) + fmt.Printf(" 指纹 (PHash): %016X\n", hash) + + palette := c.ExtractPalette(5) + fmt.Printf(" 主要颜色 (调色板):\n") + for _, col := range palette { + fmt.Printf(" - %s (%d)\n", col.Hex, col.Count) + } + } +} + +func save(c *vision.Canvas) { + path := *outFile + if path == "" { + path = "out.png" + } + if err := vision.Save(c, path); err != nil { + fail("保存失败: %v", err) + } + fmt.Printf("✨ 成功保存至: %s\n", path) +} + +func fail(format string, a ...any) { + fmt.Fprintf(os.Stderr, "❌ 错误: "+format+"\n", a...) + os.Exit(1) +} diff --git a/heic.go b/heic.go new file mode 100644 index 0000000..4c10729 --- /dev/null +++ b/heic.go @@ -0,0 +1,100 @@ +package vision + +import ( + "fmt" + "os" + "os/exec" + "runtime" +) + +// HEICConverter 定义了处理 HEIC/HEIF 转换的工具路径 +var heicConverter string + +// DetectHEICConverter 探测系统中可用的 HEIC 转换工具。 +// 优先级: sips (macOS) > heif-convert (libheif) > magick (ImageMagick) +func DetectHEICConverter() string { + if heicConverter != "" { + return heicConverter + } + + // 1. macOS 专属原生工具 + if runtime.GOOS == "darwin" { + if p, err := exec.LookPath("sips"); err == nil { + heicConverter = p + return p + } + } + + // 2. 跨平台开源工具 heif-convert (libheif) + if p, err := exec.LookPath("heif-convert"); err == nil { + heicConverter = p + return p + } + + // 3. 跨平台全能工具 ImageMagick + if p, err := exec.LookPath("magick"); err == nil { + heicConverter = p + return p + } + + // 记录警告信息,指导用户安装 + printHEICWarning() + return "" +} + +func printHEICWarning() { + fmt.Fprintln(os.Stderr, "⚠️ Warning: No HEIC converter found in PATH.") + switch runtime.GOOS { + case "darwin": + fmt.Fprintln(os.Stderr, " Hint: macOS should have 'sips' pre-installed.") + case "linux": + fmt.Fprintln(os.Stderr, " Hint: Install libheif: 'sudo apt install libheif-examples'") + case "windows": + fmt.Fprintln(os.Stderr, " Hint: Install ImageMagick or libheif for Windows.") + } +} + +// ConvertHEIC 使用探测到的工具将 HEIC 转换为 PNG 临时文件 +func ConvertHEIC(src, dst string) error { + cmdPath := DetectHEICConverter() + if cmdPath == "" { + return fmt.Errorf("no HEIC converter available") + } + + var cmd *exec.Cmd + base := "" + if runtime.GOOS == "windows" { + // 简单处理 Windows 下的路径 + base = cmdPath + } else { + // 仅获取文件名判断类型 + // 这里简单处理,直接用 DetectHEICConverter 返回的路径 + base = cmdPath + } + + // 根据不同工具构造命令 + if contains(base, "sips") { + // sips -s format png input --out output + cmd = exec.Command(cmdPath, "-s", "format", "png", src, "--out", dst) + } else if contains(base, "heif-convert") { + // heif-convert input output + cmd = exec.Command(cmdPath, src, dst) + } else if contains(base, "magick") { + // magick input output.png + cmd = exec.Command(cmdPath, src, dst) + } else { + return fmt.Errorf("unsupported converter: %s", cmdPath) + } + + return cmd.Run() +} + +func contains(s, substr string) bool { + // 简单的字符串包含判断 + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} diff --git a/main b/main new file mode 100755 index 0000000..6e28812 Binary files /dev/null and b/main differ diff --git a/preview.go b/preview.go index 4fa939b..273abac 100644 --- a/preview.go +++ b/preview.go @@ -5,20 +5,40 @@ import ( "os" "os/exec" "path/filepath" + "strings" ) -// GenerateImagePreview 生成图片预览 (WebP) +// GenerateImagePreview 生成图片预览 // 支持缩放并裁剪以填充指定尺寸 (Fill 模式) func GenerateImagePreview(srcPath, outPath string, width, height int) error { + // 使用统一的 Load() 加载,内部已处理好 HEIC/sips/FFmpeg 的复杂格式兼容 c, err := Load(srcPath) if err != nil { return err } c.Fill(width, height) + + if strings.HasSuffix(strings.ToLower(outPath), ".webp") { + // 借用 FFmpeg 将生成的画布转为高质量 WebP + tmpFile := filepath.Join(os.TempDir(), fmt.Sprintf("preview_%d.png", os.Getpid())) + defer os.Remove(tmpFile) + if err := Save(c, tmpFile); err != nil { + return err + } + + v, err := NewVideo() + if err == nil { + cmd := exec.Command(v.FFmpegPath, "-i", tmpFile, "-c:v", "libwebp", "-quality", "80", "-y", outPath) + if err := cmd.Run(); err == nil { + return nil + } + } + } + return Save(c, outPath) } -// GenerateVideoPreview 生成视频预览 (4帧动画 WebP) +// GenerateVideoPreview 生成视频预览 (动态 WebP) // 支持缩放并裁剪以填充指定尺寸 (Fill 模式) func GenerateVideoPreview(videoPath, outPath string, width, height int) error { v, err := NewVideo() @@ -31,30 +51,48 @@ func GenerateVideoPreview(videoPath, outPath string, width, height int) error { return err } - times := []float64{0, duration * 0.33, duration * 0.67, duration * 0.90} + // 动态计算帧数: 适合交给大模型 (VLM) 处理 + // 避免过多的帧浪费 Token,每 30 秒 1 帧,最少 3 帧,最多 8 帧 + frameCount := int(duration / 30.0) + if frameCount < 3 { + frameCount = 3 + } else if frameCount > 8 { + frameCount = 8 + } + + // 在 10% 到 90% 之间均匀采样,跳过片头片尾的可能黑屏 + times := make([]float64, frameCount) + if frameCount == 1 { + times[0] = duration * 0.5 + } else { + for i := 0; i < frameCount; i++ { + times[i] = duration * (0.10 + 0.80*(float64(i)/float64(frameCount-1))) + } + } + tmpDir, _ := os.MkdirTemp("", "frames") defer os.RemoveAll(tmpDir) // 使用 ffmpeg 的 scale 和 crop 滤镜实现 Fill 效果 - // force_original_aspect_ratio=increase 确保图片至少覆盖目标尺寸 - // crop=w:h 裁剪中心区域 vf := fmt.Sprintf("scale=%d:%d:force_original_aspect_ratio=increase,crop=%d:%d", width, height, width, height) for i, t := range times { - framePath := filepath.Join(tmpDir, fmt.Sprintf("frame_%d.png", i)) - cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, framePath) + framePath := filepath.Join(tmpDir, fmt.Sprintf("frame_%02d.png", i)) + // 使用 -ss 快速 seek,性能极高 + cmd := exec.Command(v.FFmpegPath, "-ss", fmt.Sprintf("%f", t), "-i", videoPath, "-frames:v", "1", "-vf", vf, "-y", framePath) if err := cmd.Run(); err != nil { return err } } - cmd := exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%d.png"), - "-c:v", "libwebp", "-lossless", "0", "-quality", "70", "-loop", "0", outPath) + // 生成 WebP 动画,每秒 1 帧 (1000ms 间隔,让大模型能看清每一帧) + cmd := exec.Command(v.FFmpegPath, "-framerate", "1", "-i", filepath.Join(tmpDir, "frame_%02d.png"), + "-c:v", "libwebp", "-lossless", "0", "-quality", "70", "-loop", "0", "-y", outPath) return cmd.Run() } -// GenerateAudioPreview 提取 3 分钟内的音频用于转写/预览 -// 格式: Ogg Opus, 16kHz, 单声道 +// GenerateAudioPreview 提取 3 分钟内的音频用于预览或语音转写 +// 格式: Ogg Opus, 16kHz, 单声道, 12kbps (极致压缩,保留人声特征) func GenerateAudioPreview(mediaPath, outPath string) error { v, err := NewVideo() if err != nil { @@ -62,9 +100,11 @@ func GenerateAudioPreview(mediaPath, outPath string) error { } // -vn: 禁用视频 // -c:a libopus: 高效音频压缩 - // -ar 16000: 采样率 16k - // -t 180: 最长 180 秒 - cmd := exec.Command(v.FFmpegPath, "-i", mediaPath, "-vn", "-c:a", "libopus", "-ar", "16000", "-ac", "1", "-t", "180", outPath) + // -ar 16000: 采样率 16k (转写标准) + // -ac 1: 单声道 + // -b:a 12k: 极致压缩 + // -t 180: 最长 180 秒 (足以获得内容概要) + cmd := exec.Command(v.FFmpegPath, "-i", mediaPath, "-vn", "-c:a", "libopus", "-ar", "16000", "-ac", "1", "-b:a", "12k", "-t", "180", "-y", outPath) return cmd.Run() } @@ -74,6 +114,6 @@ func getVideoDuration(videoPath string) (float64, error) { return 0, err } var duration float64 - _, err = fmt.Sscanf(string(out), "%f", &duration) + _, err = fmt.Sscanf(strings.TrimSpace(string(out)), "%f", &duration) return duration, err }