1
0
Fork 0
mirror of https://github.com/mastodon/mastodon.git synced 2024-08-20 21:08:15 -07:00
mastodon/public/robots.txt

93 lines
2.2 KiB
Text
Raw Normal View History

# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file
User-agent: *
Disallow: /media_proxy/
Disallow: /interact/
2024-07-31 19:28:24 -07:00
2024-07-31 19:29:24 -07:00
## AI Bots - Research from darkvisitors.com and multiple other sources.
2024-07-31 19:28:24 -07:00
2024-08-01 03:09:19 -07:00
# OpenAI Scraper/Crawler/Assistant - https://platform.openai.com/docs/bots
2024-08-01 03:09:02 -07:00
User-agent: GPTBot
User-agent: ChatGPT-User
User-agent: OAI-SearchBot
2024-08-01 02:54:48 -07:00
# AI Search Crawler - https://developer.amazon.com/amazonbot
2024-07-31 19:28:24 -07:00
User-agent: Amazonbot
2024-08-01 02:46:06 -07:00
# ??
User-agent: anthropic-ai
2024-07-31 19:28:24 -07:00
2024-08-01 02:54:48 -07:00
# AI Search Crawler - https://support.apple.com/en-au/HT204683
2024-07-31 19:28:24 -07:00
User-agent: Applebot
2024-08-01 02:54:48 -07:00
# AI Data Scraper - https://support.apple.com/en-au/119829
2024-07-31 19:28:24 -07:00
User-agent: Applebot-Extended
# AI Data Scraper
User-agent: Bytespider
2024-08-01 02:54:48 -07:00
# AI Data Scraper - https://commoncrawl.org/ccbot
2024-07-31 19:28:24 -07:00
User-agent: CCBot
2024-08-01 02:54:48 -07:00
# AI Data Scraper - https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler
2024-07-31 19:28:24 -07:00
User-agent: ClaudeBot
2024-08-01 02:46:06 -07:00
# ??
User-agent: Claude-Web
# ??
User-agent: cohere-ai
2024-07-31 19:28:24 -07:00
# AI Data Scraper
User-agent: Diffbot
2024-08-01 03:03:27 -07:00
# AI Data Scraper - https://developers.facebook.com/docs/sharing/bot/
2024-07-31 19:28:24 -07:00
User-agent: FacebookBot
2024-08-01 02:46:06 -07:00
2024-07-31 19:28:24 -07:00
# AI Data Scraper
User-agent: Google-Extended
2024-08-01 02:46:06 -07:00
2024-08-01 03:03:27 -07:00
# Search Engine Crawler - http://google.com/bot.html
2024-08-01 02:46:06 -07:00
User-agent: GoogleOther
2024-08-01 03:03:27 -07:00
# ?? - https://developers.google.com/search/docs/crawling-indexing/googlebot
2024-08-01 02:46:06 -07:00
User-agent: GoogleOther-Image
2024-08-01 03:03:27 -07:00
# ?? - https://developers.google.com/search/docs/crawling-indexing/googlebot
2024-08-01 02:46:06 -07:00
User-agent: GoogleOther-Video
2024-08-01 03:03:27 -07:00
# Intelligence Gatherer - https://imagesift.com/about
2024-08-01 02:46:06 -07:00
User-agent: ImagesiftBot
# ??
User-agent: img2dataset
2024-07-31 19:28:24 -07:00
2024-08-01 03:03:27 -07:00
# AI Data Scraper - https://developers.facebook.com/docs/sharing/webmasters/crawler
2024-07-31 19:28:24 -07:00
User-agent: Meta-ExternalAgent
2024-08-01 03:03:27 -07:00
# AI Assistant - https://developers.facebook.com/docs/sharing/webmasters/crawler
2024-07-31 19:28:24 -07:00
User-agent: Meta-ExternalFetcher
# AI Data Scraper
User-agent: omgili
2024-08-01 02:46:06 -07:00
# ??
User-agent: omgilibot
2024-07-31 19:28:24 -07:00
2024-08-01 03:03:27 -07:00
# AI Search Crawler - https://docs.perplexity.ai/docs/perplexitybot
2024-07-31 19:28:24 -07:00
User-agent: PerplexityBot
2024-08-01 02:46:06 -07:00
# ??
User-agent: Scrapy
2024-07-31 19:28:24 -07:00
# AI Data Scraper
User-agent: Timpibot
2024-08-01 02:46:06 -07:00
2024-08-01 03:03:27 -07:00
# AI Data Scraper - https://velen.io/
2024-08-01 02:46:06 -07:00
User-agent: VelenPublicWebCrawler
2024-07-31 19:28:24 -07:00
2024-08-01 03:03:27 -07:00
# AI Search Crawler - https://about.you.com/es/youbot/
2024-07-31 19:28:24 -07:00
User-agent: YouBot
2024-08-01 02:46:46 -07:00
2024-07-31 19:28:24 -07:00
Disallow: /