From 9d96acd8dbc60850129a3b1c4f64520d51095a73 Mon Sep 17 00:00:00 2001
From: Toby <tobyxdd@gmail.com>
Date: Wed, 20 Mar 2024 18:07:26 -0700
Subject: [PATCH 1/2] feat: new heuristics for trojan analyzer

---
 analyzer/tcp/trojan.go | 83 +++++++++++++++++++++++-------------------
 docs/Analyzers.md      |  3 +-
 2 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/analyzer/tcp/trojan.go b/analyzer/tcp/trojan.go
index 37a430b..71694d7 100644
--- a/analyzer/tcp/trojan.go
+++ b/analyzer/tcp/trojan.go
@@ -9,22 +9,14 @@ import (
 var _ analyzer.TCPAnalyzer = (*TrojanAnalyzer)(nil)
 
 // CCS stands for "Change Cipher Spec"
-var trojanCCS = []byte{20, 3, 3, 0, 1, 1}
+var ccsPattern = []byte{20, 3, 3, 0, 1, 1}
 
-const (
-	trojanUpLB    = 650
-	trojanUpUB    = 1000
-	trojanDownLB1 = 170
-	trojanDownUB1 = 180
-	trojanDownLB2 = 3000
-	trojanDownUB2 = 7500
-)
-
-// TrojanAnalyzer uses a very simple packet length based check to determine
-// if a TLS connection is actually the Trojan proxy protocol.
-// The algorithm is from the following project, with small modifications:
-// https://github.com/XTLS/Trojan-killer
-// Warning: Experimental only. This method is known to have significant false positives and false negatives.
+// TrojanAnalyzer uses length-based heuristics to detect Trojan traffic based on
+// its "TLS-in-TLS" nature. The heuristics are trained using a decision tree with
+// about 2000 samples. This is highly experimental and is known to have significant
+// false positives (about 8% false positives & 2% false negatives).
+// We do NOT recommend directly blocking all positive connections, as this is likely
+// to break many normal TLS connections.
 type TrojanAnalyzer struct{}
 
 func (a *TrojanAnalyzer) Name() string {
@@ -32,7 +24,7 @@ func (a *TrojanAnalyzer) Name() string {
 }
 
 func (a *TrojanAnalyzer) Limit() int {
-	return 16384
+	return 512000
 }
 
 func (a *TrojanAnalyzer) NewTCP(info analyzer.TCPInfo, logger analyzer.Logger) analyzer.TCPStream {
@@ -40,10 +32,12 @@ func (a *TrojanAnalyzer) NewTCP(info analyzer.TCPInfo, logger analyzer.Logger) a
 }
 
 type trojanStream struct {
-	logger    analyzer.Logger
-	active    bool
-	upCount   int
-	downCount int
+	logger   analyzer.Logger
+	first    bool
+	count    bool
+	rev      bool
+	seq      [4]int
+	seqIndex int
 }
 
 func newTrojanStream(logger analyzer.Logger) *trojanStream {
@@ -57,33 +51,48 @@ func (s *trojanStream) Feed(rev, start, end bool, skip int, data []byte) (u *ana
 	if len(data) == 0 {
 		return nil, false
 	}
-	if !rev && !s.active && len(data) >= 6 && bytes.Equal(data[:6], trojanCCS) {
-		// Client CCS encountered, start counting
-		s.active = true
+
+	if s.first {
+		s.first = false
+		// Stop if it's not a valid TLS connection
+		if !(!rev && len(data) >= 3 && data[0] >= 0x16 && data[0] <= 0x17 &&
+			data[1] == 0x03 && data[2] <= 0x09) {
+			return nil, true
+		}
 	}
-	if s.active {
-		if rev {
-			// Down direction
-			s.downCount += len(data)
+
+	if !rev && !s.count && len(data) >= 6 && bytes.Equal(data[:6], ccsPattern) {
+		// Client Change Cipher Spec encountered, start counting
+		s.count = true
+	}
+
+	if s.count {
+		if rev == s.rev {
+			// Same direction as last time, just update the number
+			s.seq[s.seqIndex] = len(data)
 		} else {
-			// Up direction
-			if s.upCount >= trojanUpLB && s.upCount <= trojanUpUB &&
-				((s.downCount >= trojanDownLB1 && s.downCount <= trojanDownUB1) ||
-					(s.downCount >= trojanDownLB2 && s.downCount <= trojanDownUB2)) {
+			// Different direction, bump the index
+			s.seqIndex += 1
+			if s.seqIndex == 4 {
+				// Time to evaluate
+				yes := s.seq[0] >= 100 &&
+					s.seq[1] >= 88 &&
+					s.seq[2] >= 40 &&
+					s.seq[3] >= 51
 				return &analyzer.PropUpdate{
 					Type: analyzer.PropUpdateReplace,
 					M: analyzer.PropMap{
-						"up":   s.upCount,
-						"down": s.downCount,
-						"yes":  true,
+						"seq": s.seq,
+						"yes": yes,
 					},
 				}, true
 			}
-			s.upCount += len(data)
+			s.seq[s.seqIndex] = len(data)
+			s.rev = rev
 		}
 	}
-	// Give up when either direction is over the limit
-	return nil, s.upCount > trojanUpUB || s.downCount > trojanDownUB2
+
+	return nil, false
 }
 
 func (s *trojanStream) Close(limited bool) *analyzer.PropUpdate {
diff --git a/docs/Analyzers.md b/docs/Analyzers.md
index fea2efa..17a4c0d 100644
--- a/docs/Analyzers.md
+++ b/docs/Analyzers.md
@@ -251,8 +251,7 @@ Check https://github.com/XTLS/Trojan-killer for more information.
 ```json
 {
   "trojan": {
-    "down": 4712,
-    "up": 671,
+    "seq": [170, 282, 167, 470],
     "yes": true
   }
 }

From 0732dfa7a574147a3051ff96796cbddb1a72bab3 Mon Sep 17 00:00:00 2001
From: Toby <tobyxdd@gmail.com>
Date: Wed, 20 Mar 2024 18:10:22 -0700
Subject: [PATCH 2/2] docs: no longer use trojan-killer

---
 README.ja.md | 2 +-
 README.md    | 2 +-
 README.zh.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.ja.md b/README.ja.md
index 2247118..f3307b5 100644
--- a/README.ja.md
+++ b/README.ja.md
@@ -20,7 +20,7 @@ Telegram グループ: https://t.me/OpGFW
 - フル IP/TCP 再アセンブル、各種プロトコルアナライザー
   - HTTP、TLS、QUIC、DNS、SSH、SOCKS4/5、WireGuard、その他多数
   - Shadowsocks の「完全に暗号化されたトラフィック」の検出など (https://gfw.report/publications/usenixsecurity23/en/)
-  - トロイの木馬キラー (https://github.com/XTLS/Trojan-killer) に基づくトロイの木馬 (プロキシプロトコル) 検出
+  - Trojan プロキシプロトコルの検出
   - [WIP] 機械学習に基づくトラフィック分類
 - IPv4 と IPv6 をフルサポート
 - フローベースのマルチコア負荷分散
diff --git a/README.md b/README.md
index 36e5817..c0f35a4 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Telegram group: https://t.me/OpGFW
   - HTTP, TLS, QUIC, DNS, SSH, SOCKS4/5, WireGuard, and many more to come
   - "Fully encrypted traffic" detection for Shadowsocks,
     etc. (https://gfw.report/publications/usenixsecurity23/en/)
-  - Trojan (proxy protocol) detection based on Trojan-killer (https://github.com/XTLS/Trojan-killer)
+  - Trojan (proxy protocol) detection
   - [WIP] Machine learning based traffic classification
 - Full IPv4 and IPv6 support
 - Flow-based multicore load balancing
diff --git a/README.zh.md b/README.zh.md
index 4639e71..b89e47e 100644
--- a/README.zh.md
+++ b/README.zh.md
@@ -20,7 +20,7 @@ Telegram 群组： https://t.me/OpGFW
 - 完整的 IP/TCP 重组，各种协议解析器
   - HTTP, TLS, QUIC, DNS, SSH, SOCKS4/5, WireGuard, 更多协议正在开发中
   - Shadowsocks 等 "全加密流量" 检测 (https://gfw.report/publications/usenixsecurity23/zh/)
-  - 基于 Trojan-killer 的 Trojan 检测 (https://github.com/XTLS/Trojan-killer)
+  - Trojan 协议检测
   - [开发中] 基于机器学习的流量分类
 - 同等支持 IPv4 和 IPv6
 - 基于流的多核负载均衡