AntiScamAI/go/website.go

160 lines
3.6 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"encoding/json"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/ssgo/log"
"github.com/ssgo/u"
"net/http"
"strings"
"time"
)
func checkUrl(url string) error {
log.DefaultLogger.Info("UrlTest", "url", url)
score := analyzeHandler(url)
if score == -1 || score == -2 {
log.DefaultLogger.Error("ckwebsiteErr")
}
log.DefaultLogger.Info("ckwebsite2", "score", score)
err := conn.WriteJSON(map[string]any{
"type": "urlResult",
"result": score,
})
if err != nil {
log.DefaultLogger.Error("sendURLScore Error", "err", err)
return err
}
log.DefaultLogger.Info("sendURLScore")
return nil
}
const (
MaxContentLength = 8000 // 根据模型上下文窗口限制
RequestTimeout = 15 * time.Second
)
type DeepseekConfig struct {
Model string
Temperature float32
MaxTokens int
}
var (
aiConfig = DeepseekConfig{
Model: "deepseek/deepseek-r1/community", // 根据API权限选择
Temperature: 0.7,
MaxTokens: 500,
}
)
// HTTP处理函数
func analyzeHandler(targetURL string) int {
// 安全爬取网页内容
content, err := safeCrawl(targetURL)
if err != nil {
return -2
}
// 调用OpenAI分析
score, err := analyzeContent(content)
if err != nil {
return -1
}
log.DefaultLogger.Info("ckwebsite1", "score", score)
return score
}
func safeCrawl(targetURL string) (string, error) {
client := &http.Client{
Timeout: RequestTimeout,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 5 {
return fmt.Errorf("too many redirects (max 5)")
}
return nil
},
}
fmt.Println(u.Red(111))
resp, err := client.Get(targetURL)
if err != nil {
log.DefaultLogger.Error("222Err")
fmt.Println(err)
return "", fmt.Errorf("HTTP request failed: %v", err)
}
defer resp.Body.Close()
fmt.Println(u.Red(222))
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
log.DefaultLogger.Error("223Err")
fmt.Println(err)
return "", fmt.Errorf("invalid status code: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
log.DefaultLogger.Error("224Err")
fmt.Println(err)
return "", fmt.Errorf("HTML parsing failed: %v", err)
}
doc.Find("script, style, noscript").Remove()
text := doc.Find("body").Text()
// 预处理内容
cleanText := strings.Join(strings.Fields(text), " ")
if len(cleanText) > MaxContentLength {
cleanText = cleanText[:MaxContentLength]
}
log.DefaultLogger.Info("getwebsite", "cleanText", cleanText)
return cleanText, nil
}
// AI分析处理
func analyzeContent(content string) (int, error) {
return parseRiskScore(ask(buildDetectionPrompt(content)))
}
// 构造模板prompt
func buildDetectionPrompt(content string) string {
return fmt.Sprintf(`作为网络安全分析系统,请对以下网页内容进行风险评估。评估标准包括:
1. 紧急金融操作请求(转账、验证账户等)
2. 仿冒品牌或误导性信息
3. 可疑链接/下载请求
4. 语法及排版异常
5. 非法个人信息收集
请返回JSON格式
{
"risk_score": 0-100
"risk_reason": "主要风险点"
}
网页内容:%s`, content)
}
func parseRiskScore(response string) (int, error) {
var result struct {
RiskScore int `json:"risk_score"`
}
start := strings.Index(response, "{")
end := strings.LastIndex(response, "}")
if start == -1 || end <= start {
return 0, fmt.Errorf("invalid response format")
}
err := json.Unmarshal([]byte(response[start:end+1]), &result)
if err != nil {
return 0, fmt.Errorf("failed to parse JSON: %v", err)
}
if result.RiskScore < 0 || result.RiskScore > 100 {
return 0, fmt.Errorf("invalid risk score range")
}
return result.RiskScore, nil
}