Pisahin Domain per TLD Pakai Python Biar Nggak Ngandelin Notepad++

import os
from collections import defaultdict

input_file = 'alldomain.txt'

tlds = [
    '.com', '.net', '.org', '.dev', '.ai', '.io',
    '.stream', '.cloud', '.co'
]

id_variations = ['.id', '.co.id', '.net.id', '.my.id', '.or.id', '.ac.id', '.sch.id', '.go.id', '.mil.id']

with open(input_file, 'r', encoding='utf-8', errors='ignore') as f:
    domains = [line.strip() for line in f if line.strip()]

domain_groups = defaultdict(list)
remaining_domains = []

for domain in domains:
    matched = False
    lower_domain = domain.lower()

    for vid in id_variations:
        if lower_domain.endswith(vid):
            domain_groups['.id'].append(domain)
            matched = True
            break

    if not matched:
        for tld in tlds:
            if lower_domain.endswith(tld):
                domain_groups[tld].append(domain)
                matched = True
                break

    if not matched:
        remaining_domains.append(domain)

for tld, dom_list in domain_groups.items():
    clean_tld = tld.lstrip('.').replace('.', '_')
    output_file = f'domain_{clean_tld}.txt'

    with open(output_file, 'w', encoding='utf-8') as f:
        for dom in sorted(dom_list):
            f.write(dom + '\n')

    print(f'✅ {len(dom_list):,} domain disimpan ke → {output_file}')

with open(input_file, 'w', encoding='utf-8') as f:
    for dom in sorted(remaining_domains):
        f.write(dom + '\n')

print(f'\nSisa domain yang tidak masuk kategori: {len(remaining_domains):,}')
print('Proses selesai! 🎉')