Skip to content

Commit

Permalink
add AI LLM tags to public taxonomy
Browse files Browse the repository at this point in the history
  • Loading branch information
seemanne committed Feb 5, 2025
1 parent e4f52f0 commit 379b477
Showing 1 changed file with 80 additions and 0 deletions.
80 changes: 80 additions & 0 deletions taxonomy/classifications.json
Original file line number Diff line number Diff line change
Expand Up @@ -358,5 +358,85 @@
"description": "IP uses rapidly changing user agents.",
"label": "Spoofed User Agent",
"name": "profile:spoofed_user_agent"
},
"ai-crawler:meta": {
"description": "This IP is used to scrape websites for LLM training by Meta",
"label": "Meta AI crawler",
"name": "ai-crawler:meta"
},
"ai-search:duckduckgo": {
"description": "This IP is used to enrich search results using an LLM by DuckDuckGo",
"label": "DuckDuckGo AI search agent",
"name": "ai-search:duckduckgo"
},
"ai-crawler:allenai": {
"description": "This IP is used to scrape websites for LLM training by AllenAI",
"label": "AllenAI AI crawler",
"name": "ai-crawler:allenai"
},
"ai-crawler:apple": {
"description": "This IP is used to scrape websites for LLM training by Apple",
"label": "Apple AI crawler",
"name": "ai-crawler:apple"
},
"ai-search:apple": {
"description": "This IP is used to enrich search results using an LLM by Apple",
"label": "Apple AI search agent",
"name": "ai-search:apple"
},
"ai-crawler:bytedance": {
"description": "This IP is used to scrape websites for LLM training by Bytedance",
"label": "Bytedance AI crawler",
"name": "ai-crawler:bytedance"
},
"ai-crawler:commoncrawl": {
"description": "This IP is used to scrape websites for LLM training by CommonCrawl",
"label": "CommonCrawl AI crawler",
"name": "ai-crawler:commoncrawl"
},
"ai-crawler:anthropic": {
"description": "This IP is used to scrape websites for LLM training by Anthropic",
"label": "Anthropic AI crawler",
"name": "ai-crawler:anthropic"
},
"ai-search:anthropic": {
"description": "This IP is used to enrich search results using an LLM by Anthropic",
"label": "Anthropic AI search agent",
"name": "ai-search:anthropic"
},
"ai-crawler:cohere": {
"description": "This IP is used to scrape websites for LLM training by CohereAI",
"label": "CohereAI AI crawler",
"name": "ai-crawler:cohere"
},
"ai-search:cohere": {
"description": "This IP is used to enrich search results using an LLM by CohereAI",
"label": "CohereAI AI search agent",
"name": "ai-search:cohere"
},
"ai-crawler:openai": {
"description": "This IP is used to scrape websites for LLM training by OpenAI",
"label": "OpenAI AI crawler",
"name": "ai-crawler:openai"
},
"ai-search:openai": {
"description": "This IP is used to enrich search results using an LLM by OpenAI",
"label": "OpenAI AI search agent",
"name": "ai-search:openai"
},
"ai-crawler:huawei": {
"description": "This IP is used to scrape websites for LLM training by Huawei",
"label": "Huawei AI crawler",
"name": "ai-crawler:huawei"
},
"ai-crawler:perplexity": {
"description": "This IP is used to scrape websites for LLM training by Perplexity",
"label": "Perplexity AI crawler",
"name": "ai-crawler:perplexity"
},
"ai-search:perplexity": {
"description": "This IP is used to enrich search results using an LLM by Perplexity",
"label": "Perplexity AI search agent",
"name": "ai-search:perplexity"
}
}

0 comments on commit 379b477

Please sign in to comment.