1
0
Fork 0
mirror of https://github.com/mastodon/mastodon.git synced 2024-08-20 21:08:15 -07:00

Update robots.txt

This commit is contained in:
Shlee 2024-08-02 09:39:34 +09:30 committed by GitHub
parent 776676cec4
commit 23a48fbabe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -9,7 +9,7 @@ Disallow: /interact/
###############################################################################################################################
# OpenAI Scraper/Crawler/Assistant https://platform.openai.com/docs/bots
# "GPTBot is used to make our generative AI foundation models more useful and safe. It is used to crawl content that may be used in training our generative AI foundation models."
# "GPTBot is used to make our generative AI foundation models more useful and safe. It is used to crawl content that may be used in training our generative AI foundation models"
User-agent: GPTBot
Disallow: /
# "When users ask ChatGPT or a CustomGPT a question, it may visit a web page to help answer"
@ -25,22 +25,22 @@ User-agent: Amazonbot
Disallow: /
# Apple Siri Crawler https://support.apple.com/en-au/HT204683
# "Applebot is the web crawler for Apple. Products like Siri and Spotlight Suggestions use Applebot."
# "Applebot is the web crawler for Apple. Products like Siri and Spotlight Suggestions use Applebot"
User-agent: Applebot
Disallow: /
# Apple AI models https://support.apple.com/en-au/119829
# "Allowing Applebot-Extended will help improve the capabilities and quality of Apples generative AI models over time."
# "Allowing Applebot-Extended will help improve the capabilities and quality of Apples generative AI models over time"
User-agent: Applebot-Extended
Disallow: /
# Common Crawl https://commoncrawl.org/ccbot
# "democratizing access to web information by producing and maintaining an open repository of web crawl data that is universally accessible and analyzable by anyone."
# "democratizing access to web information by producing and maintaining an open repository of web crawl data that is universally accessible and analyzable by anyone"
User-agent: CCBot
Disallow: /
# Facebook AI models https://developers.facebook.com/docs/sharing/bot/
# "FacebookBot crawls public web pages to improve language models for our speech recognition technology. "
# "FacebookBot crawls public web pages to improve language models for our speech recognition technology."
User-agent: FacebookBot
Disallow: /
@ -50,7 +50,7 @@ User-agent: Meta-ExternalAgent
Disallow: /
# Googles AI models https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers
# "help improve Gemini Apps and Vertex AI generative APIs, including future generations of models that power those products."
# "help improve Gemini Apps and Vertex AI generative APIs, including future generations of models that power those products"
User-agent: Google-Extended
Disallow: /