1
0
Fork 0
mirror of https://github.com/mastodon/mastodon.git synced 2024-08-20 21:08:15 -07:00
mastodon/public/robots.txt
2024-08-01 19:39:19 +09:30

92 lines
2.2 KiB
Text

# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file
User-agent: *
Disallow: /media_proxy/
Disallow: /interact/
## AI Bots - Research from darkvisitors.com and multiple other sources.
# OpenAI Scraper/Crawler/Assistant - https://platform.openai.com/docs/bots
User-agent: GPTBot
User-agent: ChatGPT-User
User-agent: OAI-SearchBot
# AI Search Crawler - https://developer.amazon.com/amazonbot
User-agent: Amazonbot
# ??
User-agent: anthropic-ai
# AI Search Crawler - https://support.apple.com/en-au/HT204683
User-agent: Applebot
# AI Data Scraper - https://support.apple.com/en-au/119829
User-agent: Applebot-Extended
# AI Data Scraper
User-agent: Bytespider
# AI Data Scraper - https://commoncrawl.org/ccbot
User-agent: CCBot
# AI Data Scraper - https://support.anthropic.com/en/articles/8896518-does-anthropic-crawl-data-from-the-web-and-how-can-site-owners-block-the-crawler
User-agent: ClaudeBot
# ??
User-agent: Claude-Web
# ??
User-agent: cohere-ai
# AI Data Scraper
User-agent: Diffbot
# AI Data Scraper - https://developers.facebook.com/docs/sharing/bot/
User-agent: FacebookBot
# AI Data Scraper
User-agent: Google-Extended
# Search Engine Crawler - http://google.com/bot.html
User-agent: GoogleOther
# ?? - https://developers.google.com/search/docs/crawling-indexing/googlebot
User-agent: GoogleOther-Image
# ?? - https://developers.google.com/search/docs/crawling-indexing/googlebot
User-agent: GoogleOther-Video
# Intelligence Gatherer - https://imagesift.com/about
User-agent: ImagesiftBot
# ??
User-agent: img2dataset
# AI Data Scraper - https://developers.facebook.com/docs/sharing/webmasters/crawler
User-agent: Meta-ExternalAgent
# AI Assistant - https://developers.facebook.com/docs/sharing/webmasters/crawler
User-agent: Meta-ExternalFetcher
# AI Data Scraper
User-agent: omgili
# ??
User-agent: omgilibot
# AI Search Crawler - https://docs.perplexity.ai/docs/perplexitybot
User-agent: PerplexityBot
# ??
User-agent: Scrapy
# AI Data Scraper
User-agent: Timpibot
# AI Data Scraper - https://velen.io/
User-agent: VelenPublicWebCrawler
# AI Search Crawler - https://about.you.com/es/youbot/
User-agent: YouBot
Disallow: /