# The Mango Tree Indian Food Restaurant — robots.txt # https://themangotreealberta.ca # # llms.txt is intentionally excluded from search engine indexes. # Standard crawlers must not crawl or index it (Disallow below). # For a stronger guarantee on supporting servers, also configure an HTTP header: # X-Robots-Tag: noindex, nofollow # on the /llms.txt response. # ── General crawlers ──────────────────────────────────────────────────────── User-agent: * Allow: / # noindex — keep llms.txt out of standard web indexes Disallow: /llms.txt # Exclude legal pages Disallow: /privacy-policy.html Disallow: /terms-of-use.html # ── AI / LLM crawlers ─────────────────────────────────────────────────────── # These agents are explicitly permitted to read llms.txt and all public content User-agent: GPTBot User-agent: ChatGPT-User User-agent: OAI-SearchBot User-agent: CCBot User-agent: anthropic-ai User-agent: ClaudeBot User-agent: Claude-Web User-agent: Claude-SearchBot User-agent: Google-Extended User-agent: PerplexityBot User-agent: Perplexity-User User-agent: cohere-ai User-agent: Applebot-Extended User-agent: FacebookBot User-agent: Meta-ExternalAgent User-agent: Bytespider User-agent: Amazonbot User-agent: DuckAssistBot User-agent: MistralAI-User Allow: / Allow: /llms.txt Disallow: /privacy-policy.html Disallow: /terms-of-use.html # ── Google Ads crawlers ───────────────────────────────────────────────────── User-agent: AdsBot-Google User-agent: AdsBot-Google-Mobile User-agent: AdsBot-Google-Mobile-Apps Allow: / # ── Rate-limited bots ─────────────────────────────────────────────────────── User-agent: Baiduspider Crawl-delay: 10 User-agent: SemrushBot Crawl-delay: 5 User-agent: AhrefsBot Crawl-delay: 5 # ── Sitemap location ──────────────────────────────────────────────────────── Sitemap: https://themangotreealberta.ca/sitemap.xml