160 lines
3.6 KiB
Go
160 lines
3.6 KiB
Go
package main
|
||
|
||
import (
|
||
"encoding/json"
|
||
"fmt"
|
||
"github.com/PuerkitoBio/goquery"
|
||
"github.com/ssgo/log"
|
||
"github.com/ssgo/u"
|
||
"net/http"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
func checkUrl(url string) error {
|
||
log.DefaultLogger.Info("UrlTest", "url", url)
|
||
score := analyzeHandler(url)
|
||
if score == -1 || score == -2 {
|
||
log.DefaultLogger.Error("ckwebsiteErr")
|
||
}
|
||
log.DefaultLogger.Info("ckwebsite2", "score", score)
|
||
err := conn.WriteJSON(map[string]any{
|
||
"type": "urlResult",
|
||
"result": score,
|
||
})
|
||
if err != nil {
|
||
log.DefaultLogger.Error("sendURLScore Error", "err", err)
|
||
return err
|
||
}
|
||
log.DefaultLogger.Info("sendURLScore")
|
||
return nil
|
||
}
|
||
|
||
const (
|
||
MaxContentLength = 8000 // 根据模型上下文窗口限制
|
||
RequestTimeout = 15 * time.Second
|
||
)
|
||
|
||
type DeepseekConfig struct {
|
||
Model string
|
||
Temperature float32
|
||
MaxTokens int
|
||
}
|
||
|
||
var (
|
||
aiConfig = DeepseekConfig{
|
||
Model: "deepseek/deepseek-r1/community", // 根据API权限选择
|
||
Temperature: 0.7,
|
||
MaxTokens: 500,
|
||
}
|
||
)
|
||
|
||
// HTTP处理函数
|
||
func analyzeHandler(targetURL string) int {
|
||
|
||
// 安全爬取网页内容
|
||
content, err := safeCrawl(targetURL)
|
||
if err != nil {
|
||
return -2
|
||
}
|
||
|
||
// 调用OpenAI分析
|
||
score, err := analyzeContent(content)
|
||
if err != nil {
|
||
return -1
|
||
}
|
||
log.DefaultLogger.Info("ckwebsite1", "score", score)
|
||
return score
|
||
}
|
||
|
||
func safeCrawl(targetURL string) (string, error) {
|
||
client := &http.Client{
|
||
Timeout: RequestTimeout,
|
||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||
if len(via) >= 5 {
|
||
return fmt.Errorf("too many redirects (max 5)")
|
||
}
|
||
return nil
|
||
},
|
||
}
|
||
fmt.Println(u.Red(111))
|
||
resp, err := client.Get(targetURL)
|
||
if err != nil {
|
||
log.DefaultLogger.Error("222Err")
|
||
fmt.Println(err)
|
||
return "", fmt.Errorf("HTTP request failed: %v", err)
|
||
}
|
||
defer resp.Body.Close()
|
||
fmt.Println(u.Red(222))
|
||
|
||
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
|
||
log.DefaultLogger.Error("223Err")
|
||
fmt.Println(err)
|
||
return "", fmt.Errorf("invalid status code: %d", resp.StatusCode)
|
||
}
|
||
|
||
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
||
if err != nil {
|
||
log.DefaultLogger.Error("224Err")
|
||
fmt.Println(err)
|
||
return "", fmt.Errorf("HTML parsing failed: %v", err)
|
||
}
|
||
|
||
doc.Find("script, style, noscript").Remove()
|
||
text := doc.Find("body").Text()
|
||
|
||
// 预处理内容
|
||
cleanText := strings.Join(strings.Fields(text), " ")
|
||
if len(cleanText) > MaxContentLength {
|
||
cleanText = cleanText[:MaxContentLength]
|
||
}
|
||
log.DefaultLogger.Info("getwebsite", "cleanText", cleanText)
|
||
return cleanText, nil
|
||
}
|
||
|
||
// AI分析处理
|
||
func analyzeContent(content string) (int, error) {
|
||
return parseRiskScore(ask(buildDetectionPrompt(content)))
|
||
}
|
||
|
||
// 构造模板prompt
|
||
func buildDetectionPrompt(content string) string {
|
||
return fmt.Sprintf(`作为网络安全分析系统,请对以下网页内容进行风险评估。评估标准包括:
|
||
1. 紧急金融操作请求(转账、验证账户等)
|
||
2. 仿冒品牌或误导性信息
|
||
3. 可疑链接/下载请求
|
||
4. 语法及排版异常
|
||
5. 非法个人信息收集
|
||
|
||
请返回JSON格式:
|
||
{
|
||
"risk_score": 0-100,
|
||
"risk_reason": "主要风险点"
|
||
}
|
||
|
||
网页内容:%s`, content)
|
||
}
|
||
|
||
func parseRiskScore(response string) (int, error) {
|
||
var result struct {
|
||
RiskScore int `json:"risk_score"`
|
||
}
|
||
|
||
start := strings.Index(response, "{")
|
||
end := strings.LastIndex(response, "}")
|
||
if start == -1 || end <= start {
|
||
return 0, fmt.Errorf("invalid response format")
|
||
}
|
||
|
||
err := json.Unmarshal([]byte(response[start:end+1]), &result)
|
||
if err != nil {
|
||
return 0, fmt.Errorf("failed to parse JSON: %v", err)
|
||
}
|
||
|
||
if result.RiskScore < 0 || result.RiskScore > 100 {
|
||
return 0, fmt.Errorf("invalid risk score range")
|
||
}
|
||
|
||
return result.RiskScore, nil
|
||
}
|