|
|
|
|
package spider
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"github.com/metacubex/mihomo/common/convert"
|
|
|
|
|
"github.com/metacubex/mihomo/config"
|
|
|
|
|
"log"
|
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
|
|
|
|
"sync"
|
|
|
|
|
"time"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
Register(CollectFuzzy, NewFuzzyCollect)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type Fuzzy struct {
|
|
|
|
|
Url string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (c *Fuzzy) Get() []map[string]any {
|
|
|
|
|
content := GetBytes(c.Url)
|
|
|
|
|
return ComputeFuzzy(content)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (c *Fuzzy) Get2ChanWG(pc chan []map[string]any, wg *sync.WaitGroup) {
|
|
|
|
|
defer wg.Done()
|
|
|
|
|
nodes := c.Get()
|
|
|
|
|
log.Printf("STATISTIC: Fuzzy count=%d url=%s\n", len(nodes), c.Url)
|
|
|
|
|
if len(nodes) > 0 {
|
|
|
|
|
pc <- nodes
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func NewFuzzyCollect(getter Getter) Collect {
|
|
|
|
|
return &Fuzzy{Url: getter.Url}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type void struct{}
|
|
|
|
|
|
|
|
|
|
var nullValue void
|
|
|
|
|
|
|
|
|
|
var re = regexp.MustCompile(`proxies|api|token|raw|subscribe|txt|yaml|yml|sub|uuid`)
|
|
|
|
|
var not = regexp.MustCompile(`svg|png|mp4|mp3|jpg|jpeg|m3u8|flv|gif|icon|ktv|mov|webcam`)
|
|
|
|
|
var urlRe = regexp.MustCompile("(https|http)://[-A-Za-z0-9\u4e00-\u9ea5+&@#/%?=~_!:,.;]+[-A-Za-z0-9\u4e00-\u9ea5+&@#/%=~_]")
|
|
|
|
|
|
|
|
|
|
func grepFuzzy(all []byte) map[string]void {
|
|
|
|
|
set := make(map[string]void) // New empty set
|
|
|
|
|
|
|
|
|
|
subUrls := urlRe.FindAllString(string(all), -1)
|
|
|
|
|
for _, url := range subUrls {
|
|
|
|
|
if !re.MatchString(url) || not.MatchString(url) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
set[url] = nullValue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return set
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func ComputeFuzzy(content []byte) []map[string]any {
|
|
|
|
|
|
|
|
|
|
proxies := make([]map[string]any, 0)
|
|
|
|
|
if content == nil {
|
|
|
|
|
return proxies
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 进行订阅抓取
|
|
|
|
|
fuzzy := grepFuzzy(content)
|
|
|
|
|
pool := NewTimeoutPoolWithDefaults()
|
|
|
|
|
pool.WaitCount(len(fuzzy))
|
|
|
|
|
|
|
|
|
|
var cFlag = regexp.MustCompile(`proxies|clash|yaml|yml`)
|
|
|
|
|
lock := sync.Mutex{}
|
|
|
|
|
for temp := range fuzzy {
|
|
|
|
|
url := temp
|
|
|
|
|
pool.SubmitWithTimeout(func(done chan struct{}) {
|
|
|
|
|
defer func() {
|
|
|
|
|
// 每个协程内部使用recover捕获可能在调用逻辑中发生的panic
|
|
|
|
|
if e := recover(); e != nil {
|
|
|
|
|
log.Printf("====抓取错误====%s\n", e)
|
|
|
|
|
}
|
|
|
|
|
done <- struct{}{}
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
getter := Getter{Url: url}
|
|
|
|
|
var ok []map[string]any
|
|
|
|
|
if cFlag.MatchString(url) {
|
|
|
|
|
collect, _ := NewCollect(CollectClash, getter)
|
|
|
|
|
ok = collect.Get()
|
|
|
|
|
} else if strings.Contains(url, "README.md") {
|
|
|
|
|
collect, _ := NewCollect(CollectSharelink, getter)
|
|
|
|
|
ok = collect.Get()
|
|
|
|
|
} else {
|
|
|
|
|
all := GetBytes(url)
|
|
|
|
|
if all == nil || len(all) < 16 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
isGo := true
|
|
|
|
|
rawCfg, err := config.UnmarshalRawConfig(all)
|
|
|
|
|
if err == nil && rawCfg.Proxy != nil {
|
|
|
|
|
ok = rawCfg.Proxy
|
|
|
|
|
isGo = false
|
|
|
|
|
}
|
|
|
|
|
if isGo {
|
|
|
|
|
v2ray, err := convert.ConvertsV2Ray(all)
|
|
|
|
|
if err == nil && v2ray != nil {
|
|
|
|
|
ok = v2ray
|
|
|
|
|
isGo = false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if isGo {
|
|
|
|
|
builder := strings.Builder{}
|
|
|
|
|
for _, link := range grepShareLink(all) {
|
|
|
|
|
builder.WriteString(link + "\n")
|
|
|
|
|
}
|
|
|
|
|
if builder.Len() > 0 {
|
|
|
|
|
all = []byte(builder.String())
|
|
|
|
|
v2ray, err := convert.ConvertsV2Ray(all)
|
|
|
|
|
if err == nil && v2ray != nil {
|
|
|
|
|
ok = v2ray
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ok != nil && len(ok) > 0 {
|
|
|
|
|
lock.Lock()
|
|
|
|
|
proxies = append(proxies, ok...)
|
|
|
|
|
lock.Unlock()
|
|
|
|
|
}
|
|
|
|
|
}, time.Minute)
|
|
|
|
|
}
|
|
|
|
|
pool.StartAndWait()
|
|
|
|
|
|
|
|
|
|
// 进行分享链接抓取
|
|
|
|
|
builder := strings.Builder{}
|
|
|
|
|
for _, link := range grepShareLink(content) {
|
|
|
|
|
builder.WriteString(link + "\n")
|
|
|
|
|
}
|
|
|
|
|
if builder.Len() > 0 {
|
|
|
|
|
content = []byte(builder.String())
|
|
|
|
|
v2ray, err := convert.ConvertsV2Ray(content)
|
|
|
|
|
if err != nil && v2ray != nil {
|
|
|
|
|
lock.Lock()
|
|
|
|
|
proxies = append(proxies, v2ray...)
|
|
|
|
|
lock.Unlock()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return proxies
|
|
|
|
|
}
|