From 7d7818dc01a6bf9711826ed0c89eef3810e1ae60 Mon Sep 17 00:00:00 2001 From: BROBIRD <7692707+BROBIRD@users.noreply.github.com> Date: Tue, 2 Jun 2026 01:18:12 +0800 Subject: [PATCH] fix some bugs --- scripts/gfwlist_parser.py | 175 ++++++++++++++++++++++++++++++-------- 1 file changed, 139 insertions(+), 36 deletions(-) diff --git a/scripts/gfwlist_parser.py b/scripts/gfwlist_parser.py index 49ce8e1..3f343d0 100644 --- a/scripts/gfwlist_parser.py +++ b/scripts/gfwlist_parser.py @@ -4,6 +4,66 @@ import re import os import sys +def is_ip(s): + if not s: + return False + if s.endswith('^'): + s = s[:-1] + if '.' in s and ':' not in s: + ipv4_pattern = r'^(\d{1,3}\.){3}\d{1,3}$' + if re.match(ipv4_pattern, s): + parts = s.split('.') + if all(0 <= int(p) <= 255 for p in parts): + return True + if ':' in s: + valid_chars = set('0123456789abcdefABCDEF:') + if all(c in valid_chars for c in s): + return True + return False + return False + +def is_cidr(s): + if '/' not in s: + return False + ip_part = s.split('/')[0] + suffix = s.split('/')[1] + if not suffix.isdigit(): + return False + suffix_val = int(suffix) + if '.' in ip_part and ':' not in ip_part: + ipv4_pattern = r'^(\d{1,3}\.){3}\d{1,3}$' + if re.match(ipv4_pattern, ip_part): + parts = ip_part.split('.') + if all(0 <= int(p) <= 255 for p in parts): + if 0 <= suffix_val <= 32: + return True + return False + if ':' in ip_part: + if all(c in set('0123456789abcdefABCDEF:') for c in ip_part): + if 0 <= suffix_val <= 128: + return True + return False + return False + +def normalize_ip_rule(ip_rule): + ip_rule = ip_rule.strip() + if not ip_rule: + return None + if ip_rule.endswith('^'): + ip_rule = ip_rule[:-1] + if is_ip(ip_rule) or is_cidr(ip_rule): + return ip_rule + return None + +def normalize_ip_for_clash(ip_rule): + if is_cidr(ip_rule): + return ip_rule + if is_ip(ip_rule): + if ':' in ip_rule: + return ip_rule + '/128' + return ip_rule + '/32' + return None + def fetch_gfwlist(url="https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt"): try: import urllib.request @@ -21,44 +81,58 @@ def extract_domain_from_url(url): domain_match = re.search(r'(?:https?://)?(?:www\.)?([^/:]+)', url) if domain_match: - return domain_match.group(1) - return url + domain = domain_match.group(1) + if domain and not is_ip(domain) and not is_cidr(domain): + return domain + return None def parse_gfwlist(content): - blacklist = [] - whitelist = [] - + domain_blacklist = [] + domain_whitelist = [] + ip_blacklist = [] + ip_whitelist = [] + try: decoded = base64.b64decode(content).decode('utf-8') except: decoded = content - + for line in decoded.split('\n'): line = line.strip() - + if not line or line.startswith('!') or line.startswith('['): continue - + if line.startswith('@@||'): - domain = line[4:] - if domain.endswith('^'): - domain = domain[:-1] - whitelist.append(domain) + rule = line[4:] + if rule.endswith('^'): + rule = rule[:-1] + if is_ip(rule) or is_cidr(rule): + normalized = normalize_ip_rule(rule) + if normalized: + ip_whitelist.append(normalized) + else: + domain_whitelist.append(rule) elif line.startswith('||'): - domain = line[2:] - if domain.endswith('^'): - domain = domain[:-1] - blacklist.append(domain) + rule = line[2:] + if rule.endswith('^'): + rule = rule[:-1] + if is_ip(rule) or is_cidr(rule): + normalized = normalize_ip_rule(rule) + if normalized: + ip_blacklist.append(normalized) + else: + domain_blacklist.append(rule) elif line.startswith('@@|') and len(line) > 3: domain = extract_domain_from_url(line[3:]) if domain: - whitelist.append(domain) + domain_whitelist.append(domain) elif line.startswith('|') and len(line) > 1: domain = extract_domain_from_url(line[1:]) if domain: - blacklist.append(domain) - - return blacklist, whitelist + domain_blacklist.append(domain) + + return domain_blacklist, domain_whitelist, ip_blacklist, ip_whitelist def format_domain_suffix_rules(domains): rules = [] @@ -79,7 +153,25 @@ def write_file(filepath, content): with open(filepath, 'w', encoding='utf-8') as f: f.write(content) -def generate_acl_file(domains, filename, title="GFWList Rules"): +def format_ip_cidr_rules(ip_rules): + rules = [] + for ip in sorted(set(ip_rules)): + normalized = normalize_ip_for_clash(ip) + if normalized: + if ':' in normalized: + rules.append(f"IP-CIDR6,{normalized},no-resolve") + else: + rules.append(f"IP-CIDR,{normalized},no-resolve") + return rules + +def format_ip_cidr_acl_rules(ip_rules): + rules = [] + for ip in sorted(set(ip_rules)): + if ip: + rules.append(ip) + return rules + +def generate_acl_file(domain_list, ip_list, filename, title="GFWList Rules"): header = f"""#********************************************************************** # {title} # Generated from GFWList @@ -99,22 +191,31 @@ def generate_acl_file(domains, filename, title="GFWList Rules"): # GFWList """ - rules = format_acl_rules(domains) - content = header + '\n'.join(rules) + '\n' + domain_rules = format_acl_rules(domain_list) + ip_rules = format_ip_cidr_acl_rules(ip_list) + all_rules = domain_rules + ip_rules + content = header + '\n'.join(all_rules) + '\n' write_file(filename, content) -def generate_clash_provider_yaml(domains, filename, title="payload"): - unique_domains = sorted(set(domains)) +def generate_clash_provider_yaml(domain_list, ip_list, filename, title="payload"): + unique_domains = sorted(set(domain_list)) content = f"{title}:\n" for domain in unique_domains: content += f" - DOMAIN-SUFFIX,{domain}\n" + ip_rules = format_ip_cidr_rules(ip_list) + for rule in ip_rules: + content += f" - {rule}\n" write_file(filename, content) -def generate_clash_ruleset_list(domains, filename, title="GFWList"): - unique_domains = sorted(set(domains)) - content = f"# 内容:{title}\n# 数量:{len(unique_domains)}条\n" +def generate_clash_ruleset_list(domain_list, ip_list, filename, title="GFWList"): + unique_domains = sorted(set(domain_list)) + ip_rules = format_ip_cidr_rules(ip_list) + total = len(unique_domains) + len(ip_rules) + content = f"# 内容:{title}\n# 数量:{total}条\n" for domain in unique_domains: content += f"DOMAIN-SUFFIX,{domain}\n" + for rule in ip_rules: + content += f"{rule}\n" write_file(filename, content) def main(): @@ -122,24 +223,26 @@ def main(): content = fetch_gfwlist() print("Parsing GFWList...") - blacklist, whitelist = parse_gfwlist(content) + domain_blacklist, domain_whitelist, ip_blacklist, ip_whitelist = parse_gfwlist(content) - print(f"Blacklist entries: {len(blacklist)}") - print(f"Whitelist entries: {len(whitelist)}") + print(f"Domain blacklist entries: {len(domain_blacklist)}") + print(f"Domain whitelist entries: {len(domain_whitelist)}") + print(f"IP blacklist entries: {len(ip_blacklist)}") + print(f"IP whitelist entries: {len(ip_whitelist)}") - generate_acl_file(blacklist, 'Acl/fullgfwlist.acl', "GFWList Blacklist") + generate_acl_file(domain_blacklist, ip_blacklist, 'Acl/fullgfwlist.acl', "GFWList Blacklist") print("Generated: Acl/fullgfwlist.acl") - generate_clash_provider_yaml(blacklist, 'Clash/Providers/ProxyGFWlist.yaml', 'payload') + generate_clash_provider_yaml(domain_blacklist, ip_blacklist, 'Clash/Providers/ProxyGFWlist.yaml', 'payload') print("Generated: Clash/Providers/ProxyGFWlist.yaml") - generate_clash_ruleset_list(blacklist, 'Clash/Ruleset/ProxyGFWlist.list', 'GFWList 黑名单') + generate_clash_ruleset_list(domain_blacklist, ip_blacklist, 'Clash/Ruleset/ProxyGFWlist.list', 'GFWList 黑名单') print("Generated: Clash/Ruleset/ProxyGFWlist.list") - generate_clash_provider_yaml(whitelist, 'Clash/Providers/UnBan.yaml', 'payload') + generate_clash_provider_yaml(domain_whitelist, ip_whitelist, 'Clash/Providers/UnBan.yaml', 'payload') print("Generated: Clash/Providers/UnBan.yaml") - generate_clash_ruleset_list(whitelist, 'Clash/Ruleset/UnBan.list', 'GFWList 白名单') + generate_clash_ruleset_list(domain_whitelist, ip_whitelist, 'Clash/Ruleset/UnBan.list', 'GFWList 白名单') print("Generated: Clash/Ruleset/UnBan.list") if __name__ == "__main__":