160 lines
3.6 KiB
Go
160 lines
3.6 KiB
Go
|
package main
|
|||
|
|
|||
|
import (
|
|||
|
"encoding/json"
|
|||
|
"fmt"
|
|||
|
"github.com/PuerkitoBio/goquery"
|
|||
|
"github.com/ssgo/log"
|
|||
|
"github.com/ssgo/u"
|
|||
|
"net/http"
|
|||
|
"strings"
|
|||
|
"time"
|
|||
|
)
|
|||
|
|
|||
|
func checkUrl(url string) error {
|
|||
|
log.DefaultLogger.Info("UrlTest", "url", url)
|
|||
|
score := analyzeHandler(url)
|
|||
|
if score == -1 || score == -2 {
|
|||
|
log.DefaultLogger.Error("ckwebsiteErr")
|
|||
|
}
|
|||
|
log.DefaultLogger.Info("ckwebsite2", "score", score)
|
|||
|
err := conn.WriteJSON(map[string]any{
|
|||
|
"type": "urlResult",
|
|||
|
"result": score,
|
|||
|
})
|
|||
|
if err != nil {
|
|||
|
log.DefaultLogger.Error("sendURLScore Error", "err", err)
|
|||
|
return err
|
|||
|
}
|
|||
|
log.DefaultLogger.Info("sendURLScore")
|
|||
|
return nil
|
|||
|
}
|
|||
|
|
|||
|
const (
|
|||
|
MaxContentLength = 8000 // 根据模型上下文窗口限制
|
|||
|
RequestTimeout = 15 * time.Second
|
|||
|
)
|
|||
|
|
|||
|
type DeepseekConfig struct {
|
|||
|
Model string
|
|||
|
Temperature float32
|
|||
|
MaxTokens int
|
|||
|
}
|
|||
|
|
|||
|
var (
|
|||
|
aiConfig = DeepseekConfig{
|
|||
|
Model: "deepseek/deepseek-r1/community", // 根据API权限选择
|
|||
|
Temperature: 0.7,
|
|||
|
MaxTokens: 500,
|
|||
|
}
|
|||
|
)
|
|||
|
|
|||
|
// HTTP处理函数
|
|||
|
func analyzeHandler(targetURL string) int {
|
|||
|
|
|||
|
// 安全爬取网页内容
|
|||
|
content, err := safeCrawl(targetURL)
|
|||
|
if err != nil {
|
|||
|
return -2
|
|||
|
}
|
|||
|
|
|||
|
// 调用OpenAI分析
|
|||
|
score, err := analyzeContent(content)
|
|||
|
if err != nil {
|
|||
|
return -1
|
|||
|
}
|
|||
|
log.DefaultLogger.Info("ckwebsite1", "score", score)
|
|||
|
return score
|
|||
|
}
|
|||
|
|
|||
|
func safeCrawl(targetURL string) (string, error) {
|
|||
|
client := &http.Client{
|
|||
|
Timeout: RequestTimeout,
|
|||
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|||
|
if len(via) >= 5 {
|
|||
|
return fmt.Errorf("too many redirects (max 5)")
|
|||
|
}
|
|||
|
return nil
|
|||
|
},
|
|||
|
}
|
|||
|
fmt.Println(u.Red(111))
|
|||
|
resp, err := client.Get(targetURL)
|
|||
|
if err != nil {
|
|||
|
log.DefaultLogger.Error("222Err")
|
|||
|
fmt.Println(err)
|
|||
|
return "", fmt.Errorf("HTTP request failed: %v", err)
|
|||
|
}
|
|||
|
defer resp.Body.Close()
|
|||
|
fmt.Println(u.Red(222))
|
|||
|
|
|||
|
if resp.StatusCode < 200 || resp.StatusCode >= 400 {
|
|||
|
log.DefaultLogger.Error("223Err")
|
|||
|
fmt.Println(err)
|
|||
|
return "", fmt.Errorf("invalid status code: %d", resp.StatusCode)
|
|||
|
}
|
|||
|
|
|||
|
doc, err := goquery.NewDocumentFromReader(resp.Body)
|
|||
|
if err != nil {
|
|||
|
log.DefaultLogger.Error("224Err")
|
|||
|
fmt.Println(err)
|
|||
|
return "", fmt.Errorf("HTML parsing failed: %v", err)
|
|||
|
}
|
|||
|
|
|||
|
doc.Find("script, style, noscript").Remove()
|
|||
|
text := doc.Find("body").Text()
|
|||
|
|
|||
|
// 预处理内容
|
|||
|
cleanText := strings.Join(strings.Fields(text), " ")
|
|||
|
if len(cleanText) > MaxContentLength {
|
|||
|
cleanText = cleanText[:MaxContentLength]
|
|||
|
}
|
|||
|
log.DefaultLogger.Info("getwebsite", "cleanText", cleanText)
|
|||
|
return cleanText, nil
|
|||
|
}
|
|||
|
|
|||
|
// AI分析处理
|
|||
|
func analyzeContent(content string) (int, error) {
|
|||
|
return parseRiskScore(ask(buildDetectionPrompt(content)))
|
|||
|
}
|
|||
|
|
|||
|
// 构造模板prompt
|
|||
|
func buildDetectionPrompt(content string) string {
|
|||
|
return fmt.Sprintf(`作为网络安全分析系统,请对以下网页内容进行风险评估。评估标准包括:
|
|||
|
1. 紧急金融操作请求(转账、验证账户等)
|
|||
|
2. 仿冒品牌或误导性信息
|
|||
|
3. 可疑链接/下载请求
|
|||
|
4. 语法及排版异常
|
|||
|
5. 非法个人信息收集
|
|||
|
|
|||
|
请返回JSON格式:
|
|||
|
{
|
|||
|
"risk_score": 0-100,
|
|||
|
"risk_reason": "主要风险点"
|
|||
|
}
|
|||
|
|
|||
|
网页内容:%s`, content)
|
|||
|
}
|
|||
|
|
|||
|
func parseRiskScore(response string) (int, error) {
|
|||
|
var result struct {
|
|||
|
RiskScore int `json:"risk_score"`
|
|||
|
}
|
|||
|
|
|||
|
start := strings.Index(response, "{")
|
|||
|
end := strings.LastIndex(response, "}")
|
|||
|
if start == -1 || end <= start {
|
|||
|
return 0, fmt.Errorf("invalid response format")
|
|||
|
}
|
|||
|
|
|||
|
err := json.Unmarshal([]byte(response[start:end+1]), &result)
|
|||
|
if err != nil {
|
|||
|
return 0, fmt.Errorf("failed to parse JSON: %v", err)
|
|||
|
}
|
|||
|
|
|||
|
if result.RiskScore < 0 || result.RiskScore > 100 {
|
|||
|
return 0, fmt.Errorf("invalid risk score range")
|
|||
|
}
|
|||
|
|
|||
|
return result.RiskScore, nil
|
|||
|
}
|