You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

153 lines
3.4 KiB
Go

package spider
import (
"github.com/metacubex/mihomo/common/convert"
"github.com/metacubex/mihomo/config"
"github.com/metacubex/mihomo/log"
"regexp"
"strings"
"sync"
"time"
)
func init() {
Register(CollectFuzzy, NewFuzzyCollect)
}
type Fuzzy struct {
Url string
}
func (c *Fuzzy) Get() []map[string]any {
content := GetBytes(c.Url)
return ComputeFuzzy(content)
}
func (c *Fuzzy) Get2ChanWG(pc chan []map[string]any, wg *sync.WaitGroup) {
defer wg.Done()
nodes := c.Get()
log.Infoln("STATISTIC: Fuzzy count=%d url=%s", len(nodes), c.Url)
if len(nodes) > 0 {
pc <- nodes
}
}
func NewFuzzyCollect(getter Getter) Collect {
return &Fuzzy{Url: getter.Url}
}
type void struct{}
var nullValue void
var re = regexp.MustCompile(`proxies|api|token|raw|subscribe|txt|yaml|yml|sub|uuid`)
var not = regexp.MustCompile(`svg|png|mp4|mp3|jpg|jpeg|m3u8|flv|gif|icon|ktv|mov|webcam`)
var urlRe = regexp.MustCompile("(https|http)://[-A-Za-z0-9\u4e00-\u9ea5+&@#/%?=~_!:,.;]+[-A-Za-z0-9\u4e00-\u9ea5+&@#/%=~_]")
func grepFuzzy(all []byte) map[string]void {
set := make(map[string]void) // New empty set
subUrls := urlRe.FindAllString(string(all), -1)
for _, url := range subUrls {
if !re.MatchString(url) || not.MatchString(url) {
continue
}
set[url] = nullValue
}
return set
}
func ComputeFuzzy(content []byte) []map[string]any {
proxies := make([]map[string]any, 0)
if content == nil {
return proxies
}
// 进行订阅抓取
fuzzy := grepFuzzy(content)
pool := NewTimeoutPoolWithDefaults()
pool.WaitCount(len(fuzzy))
var cFlag = regexp.MustCompile(`proxies|clash|yaml|yml`)
lock := sync.Mutex{}
for temp := range fuzzy {
url := temp
pool.SubmitWithTimeout(func(done chan struct{}) {
defer func() {
// 每个协程内部使用recover捕获可能在调用逻辑中发生的panic
if e := recover(); e != nil {
log.Errorln("====抓取错误====%s", e)
}
done <- struct{}{}
}()
getter := Getter{Url: url}
var ok []map[string]any
if cFlag.MatchString(url) {
collect, _ := NewCollect(CollectClash, getter)
ok = collect.Get()
} else if strings.Contains(url, "README.md") {
collect, _ := NewCollect(CollectSharelink, getter)
ok = collect.Get()
} else {
all := GetBytes(url)
if all == nil || len(all) < 16 {
return
}
isGo := true
rawCfg, err := config.UnmarshalRawConfig(all)
if err == nil && rawCfg.Proxy != nil {
ok = rawCfg.Proxy
isGo = false
}
if isGo {
v2ray, err := convert.ConvertsV2Ray(all)
if err == nil && v2ray != nil {
ok = v2ray
isGo = false
}
}
if isGo {
builder := strings.Builder{}
for _, link := range grepShareLink(all) {
builder.WriteString(link + "\n")
}
if builder.Len() > 0 {
all = []byte(builder.String())
v2ray, err := convert.ConvertsV2Ray(all)
if err == nil && v2ray != nil {
ok = v2ray
}
}
}
}
if ok != nil && len(ok) > 0 {
lock.Lock()
proxies = append(proxies, ok...)
lock.Unlock()
}
}, time.Minute)
}
pool.StartAndWait()
// 进行分享链接抓取
builder := strings.Builder{}
for _, link := range grepShareLink(content) {
builder.WriteString(link + "\n")
}
if builder.Len() > 0 {
content = []byte(builder.String())
v2ray, err := convert.ConvertsV2Ray(content)
if err != nil && v2ray != nil {
lock.Lock()
proxies = append(proxies, v2ray...)
lock.Unlock()
}
}
return proxies
}