vision/transform.go

132 lines
3.4 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package vision
import (
"image"
"image/color"
"math"
"github.com/fogleman/gg"
)
// WarpPerspective 执行透视变换4点变换
// srcPoints: 源图像中的 4 个点 [TL, TR, BR, BL]
// dstWidth, dstHeight: 目标图像的尺寸
func (c *Canvas) WarpPerspective(srcPoints [4]image.Point, dstWidth, dstHeight int) {
src := c.dc.Image()
dst := image.NewRGBA(image.Rect(0, 0, dstWidth, dstHeight))
// 计算透视变换矩阵 (3x3)
// 这里使用简化的线性方程求解
matrix := getPerspectiveTransform(srcPoints, dstWidth, dstHeight)
// 应用反向映射
for y := 0; y < dstHeight; y++ {
for x := 0; x < dstWidth; x++ {
// 计算源坐标
tmpX := matrix[0]*float64(x) + matrix[1]*float64(y) + matrix[2]
tmpY := matrix[3]*float64(x) + matrix[4]*float64(y) + matrix[5]
tmpW := matrix[6]*float64(x) + matrix[7]*float64(y) + matrix[8]
sx := tmpX / tmpW
sy := tmpY / tmpW
// 双线性插值
dst.Set(x, y, bilinearInterpolation(src, sx, sy))
}
}
c.dc = gg.NewContextForImage(dst)
}
// 辅助函数:计算透视变换矩阵的逆矩阵 (用于目标到源的映射)
func getPerspectiveTransform(src [4]image.Point, dw, dh int) [9]float64 {
// 这里实现一个基础的矩阵求解逻辑 (Dlt 算法简化版)
// 为了保持精简,我们直接计算从目标到源的映射矩阵
dst := [4]image.Point{
{0, 0}, {dw, 0}, {dw, dh}, {0, dh},
}
var a [8][8]float64
var b [8]float64
for i := 0; i < 4; i++ {
a[i][0] = float64(dst[i].X)
a[i][1] = float64(dst[i].Y)
a[i][2] = 1
a[i][6] = -float64(dst[i].X) * float64(src[i].X)
a[i][7] = -float64(dst[i].Y) * float64(src[i].X)
b[i] = float64(src[i].X)
a[i+4][3] = float64(dst[i].X)
a[i+4][4] = float64(dst[i].Y)
a[i+4][5] = 1
a[i+4][6] = -float64(dst[i].X) * float64(src[i].Y)
a[i+4][7] = -float64(dst[i].Y) * float64(src[i].Y)
b[i+4] = float64(src[i].Y)
}
// 简单的正向消元求解 (假设非奇异)
res := solveLinearSystem(a, b)
return [9]float64{res[0], res[1], res[2], res[3], res[4], res[5], res[6], res[7], 1.0}
}
func solveLinearSystem(a [8][8]float64, b [8]float64) [8]float64 {
// 高斯消元
for i := 0; i < 8; i++ {
pivot := a[i][i]
for j := i + 1; j < 8; j++ {
factor := a[j][i] / pivot
for k := i; k < 8; k++ {
a[j][k] -= a[i][k] * factor
}
b[j] -= b[i] * factor
}
}
var x [8]float64
for i := 7; i >= 0; i-- {
sum := 0.0
for j := i + 1; j < 8; j++ {
sum += a[i][j] * x[j]
}
x[i] = (b[i] - sum) / a[i][i]
}
return x
}
func bilinearInterpolation(img image.Image, x, y float64) color.Color {
x0, y0 := int(math.Floor(x)), int(math.Floor(y))
x1, y1 := x0+1, y0+1
bounds := img.Bounds()
if x0 < bounds.Min.X || x1 >= bounds.Max.X || y0 < bounds.Min.Y || y1 >= bounds.Max.Y {
return img.At(int(x), int(y)) // 越界直接返回
}
dx := x - float64(x0)
dy := y - float64(y0)
c00 := img.At(x0, y0)
c01 := img.At(x0, y1)
c10 := img.At(x1, y0)
c11 := img.At(x1, y1)
r00, g00, b00, a00 := c00.RGBA()
r01, g01, b01, a01 := c01.RGBA()
r10, g10, b10, a10 := c10.RGBA()
r11, g11, b11, a11 := c11.RGBA()
lerp := func(v00, v01, v10, v11 uint32) uint8 {
v0 := float64(v00)*(1-dx) + float64(v10)*dx
v1 := float64(v01)*(1-dx) + float64(v11)*dx
return uint8(uint32(v0*(1-dy)+v1*dy) >> 8)
}
return color.RGBA{
R: lerp(r00, r01, r10, r11),
G: lerp(g00, g01, g10, g11),
B: lerp(b00, b01, b10, b11),
A: lerp(a00, a01, a10, a11),
}
}