vision/transform.go

132 lines
3.4 KiB
Go
Raw Normal View History

package vision
import (
"image"
"image/color"
"math"
"github.com/fogleman/gg"
)
// WarpPerspective 执行透视变换4点变换
// srcPoints: 源图像中的 4 个点 [TL, TR, BR, BL]
// dstWidth, dstHeight: 目标图像的尺寸
func (c *Canvas) WarpPerspective(srcPoints [4]image.Point, dstWidth, dstHeight int) {
src := c.dc.Image()
dst := image.NewRGBA(image.Rect(0, 0, dstWidth, dstHeight))
// 计算透视变换矩阵 (3x3)
// 这里使用简化的线性方程求解
matrix := getPerspectiveTransform(srcPoints, dstWidth, dstHeight)
// 应用反向映射
for y := 0; y < dstHeight; y++ {
for x := 0; x < dstWidth; x++ {
// 计算源坐标
tmpX := matrix[0]*float64(x) + matrix[1]*float64(y) + matrix[2]
tmpY := matrix[3]*float64(x) + matrix[4]*float64(y) + matrix[5]
tmpW := matrix[6]*float64(x) + matrix[7]*float64(y) + matrix[8]
sx := tmpX / tmpW
sy := tmpY / tmpW
// 双线性插值
dst.Set(x, y, bilinearInterpolation(src, sx, sy))
}
}
c.dc = gg.NewContextForImage(dst)
}
// 辅助函数:计算透视变换矩阵的逆矩阵 (用于目标到源的映射)
func getPerspectiveTransform(src [4]image.Point, dw, dh int) [9]float64 {
// 这里实现一个基础的矩阵求解逻辑 (Dlt 算法简化版)
// 为了保持精简,我们直接计算从目标到源的映射矩阵
dst := [4]image.Point{
{0, 0}, {dw, 0}, {dw, dh}, {0, dh},
}
var a [8][8]float64
var b [8]float64
for i := 0; i < 4; i++ {
a[i][0] = float64(dst[i].X)
a[i][1] = float64(dst[i].Y)
a[i][2] = 1
a[i][6] = -float64(dst[i].X) * float64(src[i].X)
a[i][7] = -float64(dst[i].Y) * float64(src[i].X)
b[i] = float64(src[i].X)
a[i+4][3] = float64(dst[i].X)
a[i+4][4] = float64(dst[i].Y)
a[i+4][5] = 1
a[i+4][6] = -float64(dst[i].X) * float64(src[i].Y)
a[i+4][7] = -float64(dst[i].Y) * float64(src[i].Y)
b[i+4] = float64(src[i].Y)
}
// 简单的正向消元求解 (假设非奇异)
res := solveLinearSystem(a, b)
return [9]float64{res[0], res[1], res[2], res[3], res[4], res[5], res[6], res[7], 1.0}
}
func solveLinearSystem(a [8][8]float64, b [8]float64) [8]float64 {
// 高斯消元
for i := 0; i < 8; i++ {
pivot := a[i][i]
for j := i + 1; j < 8; j++ {
factor := a[j][i] / pivot
for k := i; k < 8; k++ {
a[j][k] -= a[i][k] * factor
}
b[j] -= b[i] * factor
}
}
var x [8]float64
for i := 7; i >= 0; i-- {
sum := 0.0
for j := i + 1; j < 8; j++ {
sum += a[i][j] * x[j]
}
x[i] = (b[i] - sum) / a[i][i]
}
return x
}
func bilinearInterpolation(img image.Image, x, y float64) color.Color {
x0, y0 := int(math.Floor(x)), int(math.Floor(y))
x1, y1 := x0+1, y0+1
bounds := img.Bounds()
if x0 < bounds.Min.X || x1 >= bounds.Max.X || y0 < bounds.Min.Y || y1 >= bounds.Max.Y {
return img.At(int(x), int(y)) // 越界直接返回
}
dx := x - float64(x0)
dy := y - float64(y0)
c00 := img.At(x0, y0)
c01 := img.At(x0, y1)
c10 := img.At(x1, y0)
c11 := img.At(x1, y1)
r00, g00, b00, a00 := c00.RGBA()
r01, g01, b01, a01 := c01.RGBA()
r10, g10, b10, a10 := c10.RGBA()
r11, g11, b11, a11 := c11.RGBA()
lerp := func(v00, v01, v10, v11 uint32) uint8 {
v0 := float64(v00)*(1-dx) + float64(v10)*dx
v1 := float64(v01)*(1-dx) + float64(v11)*dx
return uint8(uint32(v0*(1-dy)+v1*dy) >> 8)
}
return color.RGBA{
R: lerp(r00, r01, r10, r11),
G: lerp(g00, g01, g10, g11),
B: lerp(b00, b01, b10, b11),
A: lerp(a00, a01, a10, a11),
}
}