|
| 1 | +#!/usr/bin/env node |
| 2 | +import { createWriteStream } from 'node:fs' |
| 3 | +import { URL } from 'node:url' |
| 4 | +import axios from 'axios' |
| 5 | +import { extract } from 'tar-stream' |
| 6 | +import { pipeline, Readable } from 'node:stream' |
| 7 | +import gunzip from "gunzip-maybe" |
| 8 | +import { promisify } from 'node:util' |
| 9 | +import randomUserAgent from 'random-useragent' |
| 10 | + |
| 11 | +const pipe = promisify(pipeline) |
| 12 | + |
| 13 | +process.on('uncaughtException', (err) => { |
| 14 | + console.error(err) |
| 15 | + process.exit(1) |
| 16 | +}) |
| 17 | + |
| 18 | +process.on('unhandledRejection', (err) => { |
| 19 | + console.error(err) |
| 20 | + process.exit(2) |
| 21 | +}) |
| 22 | + |
| 23 | +function isValidUrl (s) { |
| 24 | + try { |
| 25 | + new URL(`https://${s}`) |
| 26 | + return true |
| 27 | + } catch (err) { |
| 28 | + return false |
| 29 | + } |
| 30 | +}; |
| 31 | + |
| 32 | +const urls = [ |
| 33 | + 'https://big.oisd.nl/dnsmasq', |
| 34 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/pihole-youtube.txt', |
| 35 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-facebook.txt', |
| 36 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-tiktok.txt', |
| 37 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-reddit.txt', |
| 38 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-twitch.txt', |
| 39 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-snapchat.txt', |
| 40 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-whatsapp.txt', |
| 41 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-instagram.txt', |
| 42 | + 'https://raw.githubusercontent.com/gieljnssns/Social-media-Blocklists/master/adguard-twitter.txt', |
| 43 | + 'https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts', |
| 44 | + 'https://adaway.org/hosts.txt', |
| 45 | + 'https://v.firebog.net/hosts/AdguardDNS.txt', |
| 46 | + 'https://v.firebog.net/hosts/Easyprivacy.txt', |
| 47 | + 'https://bitbucket.org/ethanr/dns-blacklists/raw/8575c9f96e5b4a1308f2f12394abd86d0927a4a0/bad_lists/Mandiant_APT1_Report_Appendix_D.txt', |
| 48 | + 'https://raw.githubusercontent.com/DandelionSprout/adfilt/master/Alternate%20versions%20Anti-Malware%20List/AntiMalwareHosts.txt', |
| 49 | + 'https://blocklistproject.github.io/Lists/alt-version/ads-nl.txt', |
| 50 | + 'https://blocklistproject.github.io/Lists/alt-version/abuse-nl.txt', |
| 51 | + 'https://blocklistproject.github.io/Lists/alt-version/malware-nl.txt', |
| 52 | + 'https://blocklistproject.github.io/Lists/alt-version/tracking-nl.txt', |
| 53 | + 'https://blocklistproject.github.io/Lists/alt-version/phishing-nl.txt', |
| 54 | + 'https://blocklistproject.github.io/Lists/alt-version/scam-nl.txt', |
| 55 | + 'https://blocklistproject.github.io/Lists/alt-version/ransomware-nl.txt', |
| 56 | + 'https://blocklistproject.github.io/Lists/alt-version/ransomware-nl.txt', |
| 57 | + 'https://raw.githubusercontent.com/stamparm/aux/master/maltrail-malware-domains.txt', |
| 58 | + 'https://threatfox.abuse.ch/downloads/hostfile/', |
| 59 | + 'https://raw.githubusercontent.com/StevenBlack/hosts/master/data/StevenBlack/hosts', |
| 60 | + 'https://raw.githubusercontent.com/Sekhan/TheGreatWall/master/TheGreatWall.txt', |
| 61 | + 'https://raw.githubusercontent.com/dibdot/DoH-IP-blocklists/master/doh-domains.txt', |
| 62 | + 'https://dsi.ut-capitole.fr/blacklists/download/ads.tar.gz', |
| 63 | + 'https://dsi.ut-capitole.fr/blacklists/download/aggressive.tar.gz', |
| 64 | + 'https://dsi.ut-capitole.fr/blacklists/download/audio-video.tar.gz', |
| 65 | + 'https://dsi.ut-capitole.fr/blacklists/download/bitcoin.tar.gz', |
| 66 | + 'https://dsi.ut-capitole.fr/blacklists/download/chat.tar.gz', |
| 67 | + 'https://dsi.ut-capitole.fr/blacklists/download/astrology.tar.gz', |
| 68 | + 'https://dsi.ut-capitole.fr/blacklists/download/blog.tar.gz', |
| 69 | + 'https://dsi.ut-capitole.fr/blacklists/download/cryptojacking.tar.gz', |
| 70 | + 'https://dsi.ut-capitole.fr/blacklists/download/dangerous_material.tar.gz', |
| 71 | + 'https://dsi.ut-capitole.fr/blacklists/download/dating.tar.gz', |
| 72 | + 'https://dsi.ut-capitole.fr/blacklists/download/ddos.tar.gz', |
| 73 | + 'https://dsi.ut-capitole.fr/blacklists/download/dialer.tar.gz', |
| 74 | + 'https://dsi.ut-capitole.fr/blacklists/download/doh.tar.gz', |
| 75 | + 'https://dsi.ut-capitole.fr/blacklists/download/download.tar.gz', |
| 76 | + 'https://dsi.ut-capitole.fr/blacklists/download/drogue.tar.gz', |
| 77 | + 'https://dsi.ut-capitole.fr/blacklists/download/filehosting.tar.gz', |
| 78 | + 'https://dsi.ut-capitole.fr/blacklists/download/forums.tar.gz', |
| 79 | + 'https://dsi.ut-capitole.fr/blacklists/download/gambling.tar.gz', |
| 80 | + 'https://dsi.ut-capitole.fr/blacklists/download/games.tar.gz', |
| 81 | + 'https://dsi.ut-capitole.fr/blacklists/download/hacking.tar.gz', |
| 82 | + 'https://dsi.ut-capitole.fr/blacklists/download/lingerie.tar.gz', |
| 83 | + 'https://dsi.ut-capitole.fr/blacklists/download/malware.tar.gz', |
| 84 | + 'https://dsi.ut-capitole.fr/blacklists/download/manga.tar.gz', |
| 85 | + 'https://dsi.ut-capitole.fr/blacklists/download/mixed_adult.tar.gz', |
| 86 | + 'https://dsi.ut-capitole.fr/blacklists/download/phishing.tar.gz', |
| 87 | + 'https://dsi.ut-capitole.fr/blacklists/download/proxy.tar.gz', |
| 88 | + 'https://dsi.ut-capitole.fr/blacklists/download/publicite.tar.gz', |
| 89 | + 'https://dsi.ut-capitole.fr/blacklists/download/radio.tar.gz', |
| 90 | + 'https://dsi.ut-capitole.fr/blacklists/download/redirector.tar.gz', |
| 91 | + 'https://dsi.ut-capitole.fr/blacklists/download/remote-control.tar.gz', |
| 92 | + 'https://dsi.ut-capitole.fr/blacklists/download/shortener.tar.gz', |
| 93 | + 'https://dsi.ut-capitole.fr/blacklists/download/sexual_education.tar.gz', |
| 94 | + 'https://dsi.ut-capitole.fr/blacklists/download/social_networks.tar.gz', |
| 95 | + 'https://dsi.ut-capitole.fr/blacklists/download/stalkerware.tar.gz', |
| 96 | + 'https://dsi.ut-capitole.fr/blacklists/download/strict_redirector.tar.gz', |
| 97 | + 'https://dsi.ut-capitole.fr/blacklists/download/tricheur.tar.gz', |
| 98 | + 'https://dsi.ut-capitole.fr/blacklists/download/vpn.tar.gz', |
| 99 | + 'https://dsi.ut-capitole.fr/blacklists/download/violence.tar.gz', |
| 100 | + 'https://dsi.ut-capitole.fr/blacklists/download/warez.tar.gz', |
| 101 | + 'https://dsi.ut-capitole.fr/blacklists/download/webmail.tar.gz', |
| 102 | + 'https://dsi.ut-capitole.fr/blacklists/download/adult.tar.gz' |
| 103 | +] |
| 104 | + |
| 105 | + |
| 106 | +function extractUT1DomainFileFromGzipStream(resp) { |
| 107 | + // will be a domains entry |
| 108 | + // if we find it replace resp.data with the plain text version |
| 109 | + // if we don't find it, leave as is |
| 110 | + return new Promise((resolve) => { |
| 111 | + const extractStream = extract() |
| 112 | + const tarGzStream = typeof resp.data === 'string' ? Readable.from(Buffer.from(resp.data)) : Readable.from(resp.data) |
| 113 | + let foundDomains = false |
| 114 | + extractStream.on('entry', (header, stream, next) => { |
| 115 | + if(header.name.endsWith('domains')) { |
| 116 | + |
| 117 | + const chunks = [] |
| 118 | + stream.on('data', (chunk) => chunks.push(chunk)) |
| 119 | + |
| 120 | + stream.on('end', () => { |
| 121 | + resp.data = Buffer.concat(chunks).toString('utf8') |
| 122 | + foundDomains = true |
| 123 | + next() |
| 124 | + }) |
| 125 | + } else { |
| 126 | + stream.resume() |
| 127 | + next() |
| 128 | + } |
| 129 | + }) |
| 130 | + extractStream.on('end', () => { |
| 131 | + next() |
| 132 | + }) |
| 133 | + extractStream.on('error', (err) => { |
| 134 | + console.error(err) |
| 135 | + }) |
| 136 | + extractStream.on('finish', () => { |
| 137 | + if(!foundDomains) { |
| 138 | + resp.data = '' |
| 139 | + console.warn(`Failed to extract domain list form ${resp.headers}`) |
| 140 | + } |
| 141 | + resolve(resp) |
| 142 | + }) |
| 143 | + tarGzStream.pipe(gunzip()).pipe(extractStream) |
| 144 | + }) |
| 145 | +} |
| 146 | + |
| 147 | +const requests = urls.map((url) => axios.get(url, { |
| 148 | + headers: { |
| 149 | + 'User-Agent': randomUserAgent.getRandom() |
| 150 | + }, |
| 151 | + decompress: !url.endsWith('tar.gz'), |
| 152 | + responseType: url.endsWith('tar.gz') ? 'arraybuffer' : 'text' |
| 153 | +}).then((resp) => { |
| 154 | + if (resp.headers['content-type'] === 'application/x-gzip' || typeof resp.data !== 'string') { |
| 155 | + return extractUT1DomainFileFromGzipStream(resp) |
| 156 | + } else { |
| 157 | + return resp |
| 158 | + } |
| 159 | +}).catch((err) => { |
| 160 | + console.error(err) |
| 161 | + process.exit(3) |
| 162 | +})) |
| 163 | + |
| 164 | +const responses = await Promise.all(requests).catch((err) => { |
| 165 | + console.error(err) |
| 166 | + process.exit(1) |
| 167 | +}) |
| 168 | + |
| 169 | +const lines = responses.map((resp) => resp.data.split('\n')).flat() |
| 170 | +const blocked = new Set() |
| 171 | + |
| 172 | +const excludedLines = [ |
| 173 | + '0.0.0.0 0.0.0.0', |
| 174 | + '127.0.0.1 localhost', |
| 175 | + '127.0.0.1 localhost.localdomain', |
| 176 | + '127.0.0.1 local', |
| 177 | + '255.255.255.255 broadcasthost', |
| 178 | + '::1 localhost', |
| 179 | + '::1 ip6-localhost', |
| 180 | + '::1 ip6-loopback', |
| 181 | + 'fe80::1%lo0 localhost', |
| 182 | + 'ff00::0 ip6-localnet', |
| 183 | + 'ff00::0 ip6-mcastprefix', |
| 184 | + 'ff02::1 ip6-allnodes', |
| 185 | + 'ff02::2 ip6-allrouters', |
| 186 | + 'ff02::3 ip6-allhosts', |
| 187 | + '=', |
| 188 | + '::1' |
| 189 | +] |
| 190 | + |
| 191 | +const cantStartWith = [ |
| 192 | + '#', |
| 193 | + '!#', |
| 194 | + '!', |
| 195 | + 'if-a-large-hosts-file-contains-this-entry-then-it' |
| 196 | +] |
| 197 | + |
| 198 | +const stripPatterns = ['server=/', '/', '||', '^', '0.0.0.0', '127.0.0.1'] |
| 199 | + |
| 200 | +for (const line of lines) { |
| 201 | + if (!cantStartWith.some((blockedPrefix) => line.startsWith(blockedPrefix)) && !excludedLines.some((ele) => ele === line)) { |
| 202 | + blocked.add(line) |
| 203 | + } |
| 204 | +} |
| 205 | +let blockList = '' |
| 206 | +let blockedNormalizedHosts = [] |
| 207 | +for (let key of blocked) { |
| 208 | + for (const removePattern of stripPatterns) { |
| 209 | + key = key.replace(removePattern, '') |
| 210 | + } |
| 211 | + |
| 212 | + key = key.trim() |
| 213 | + key = key.split(' ')[0] |
| 214 | + if (isValidUrl(key)) { |
| 215 | + blockedNormalizedHosts.push(key) |
| 216 | + } else { |
| 217 | + console.warn(`${key} is not a valid url! Dropping from list!`) |
| 218 | + } |
| 219 | +} |
| 220 | +blockedNormalizedHosts = blockedNormalizedHosts.sort() |
| 221 | +const quarters = blockedNormalizedHosts.length/4 |
| 222 | + |
| 223 | +const quarter1Start = 0 |
| 224 | +const quarter1End = quarters * 1 |
| 225 | +const quarter2Start = quarters * 1 |
| 226 | +const quarter2End = quarters * 2 |
| 227 | +const quarter3Start = quarters * 2 |
| 228 | +const quarter3End = quarters * 3 |
| 229 | +const quarter4Start = quarters * 3 |
| 230 | + |
| 231 | +const lists = [blockedNormalizedHosts.slice(quarter1Start, quarter1End), blockedNormalizedHosts.slice(quarter2Start, quarter2End), blockedNormalizedHosts.slice(quarter3Start, quarter3End), blockedNormalizedHosts.slice(quarter4Start)] |
| 232 | +for(const i in lists) { |
| 233 | + const fileStream = createWriteStream(`dns${i}.txt`) |
| 234 | + let blockList = '' |
| 235 | + for (const blockedHost of lists[i]) { |
| 236 | + blockList += blockedHost + '\n' |
| 237 | + } |
| 238 | + const blockStream = Readable.from(Buffer.from(blockList)) |
| 239 | + await pipe(blockStream, fileStream) |
| 240 | +} |
0 commit comments