From 413b11c0419d46d0cdb04ddd3659ad30754e7dc1 Mon Sep 17 00:00:00 2001 From: Jonni Liljamo Date: Fri, 19 Jul 2024 16:07:37 +0300 Subject: [PATCH] wip: initial --- robots.txt | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 robots.txt diff --git a/robots.txt b/robots.txt new file mode 100644 index 0000000..3772658 --- /dev/null +++ b/robots.txt @@ -0,0 +1,75 @@ +# TODO: Hahaa! +# Maybe disallow everything for... everything (/), +# but allow just google to index it? +# Then track the bot user-agents, and disallow manually those that don't respect *! + + + +# Parts of this are from the very well put together sr.ht/robots.txt, +# especially the part below regarding Sourcehut paths +# Others are handpicked + +# For src.quest +User-agent: * +Disallow: /*?* +Disallow: /*.tar.gz$ +Disallow: /metrics +Disallow: /*/*/blame/* +Disallow: /*/*/log/* +Disallow: /*/*/tree/* +Disallow: /*/*/item/* +Disallow: /*/*/mbox +Disallow: /*/*/*/raw + +# Marketing/SEO +User-agent: SemrushBot +Disallow: / +User-agent: SemrushBot-SA +Disallow: / +User-agent: AhrefsBot +Disallow: / +User-agent: dotbot +Disallow: / +User-agent: rogerbot +Disallow: / +User-agent: BLEXBot +Disallow: / +User-agent: ZoominfoBot +Disallow: / +User-agent: Yandex +Disallow: / +User-agent: MJ12bot +Disallow: / +User-agent: DataForSeoBot +Disallow: / +User-agent: +Disallow: / + +# Not my jam +User-agent: GPTBot +Disallow: / + +# Some Huawei related AI services, no thanks +User-agent: PetalBot +Disallow: / +User-agent: AspiegelBot +Disallow: / + +# Alexa, nope +User-agent: Amazonbot +Disallow: / + +# Nnnnnope +User-agent: turnitinbot +Disallow: / +User-agent: Turnitin +Disallow: / + +# 80legs crawler +User-agent: 008 +Disallow: / + +# Apparently doesn't respect * +User-agent: Seekport Crawler +Disallow: / + -- 2.44.1