# RGM Real Growth Matters — robots.txt # Version: 5.0 (2026-05-15) # # Policy: Allow search engines and well-known AI crawlers full access. # Block known scrapers, competitive auditors, and data harvesters. # # This file is advisory. Enforcement of access controls requires # server-side rules (Cloudflare WAF, fail2ban, or equivalent). # A copy of this policy lives at /security/index.html. # =========================================================== # Major search engines — full access # =========================================================== User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: YandexBot Allow: / User-agent: Baiduspider Allow: / User-agent: Slurp Allow: / User-agent: facebookexternalhit Allow: / User-agent: LinkedInBot Allow: / User-agent: Twitterbot Allow: / User-agent: Applebot Allow: / # =========================================================== # AI / LLM crawlers — full access (we want to be cited) # =========================================================== User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ClaudeBot Allow: / User-agent: Claude-Web Allow: / User-agent: anthropic-ai Allow: / User-agent: PerplexityBot Allow: / User-agent: Perplexity-User Allow: / User-agent: Google-Extended Allow: / User-agent: CCBot Allow: / User-agent: cohere-ai Allow: / User-agent: Bytespider Allow: / User-agent: Amazonbot Allow: / User-agent: Diffbot Allow: / User-agent: Meta-ExternalAgent Allow: / User-agent: meta-externalagent Allow: / User-agent: FacebookBot Allow: / User-agent: GoogleOther Allow: / User-agent: YouBot Allow: / # =========================================================== # Known scrapers, competitive auditors, harvesters — BLOCKED # =========================================================== User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: rogerbot Disallow: / User-agent: Screaming Frog SEO Spider Disallow: / User-agent: SiteAuditBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: SerpstatBot Disallow: / User-agent: SISTRIX Crawler Disallow: / User-agent: serpstatbot Disallow: / User-agent: linkfluence Disallow: / User-agent: Linkdex Disallow: / User-agent: Majestic-12 Disallow: / User-agent: SEOkicks Disallow: / User-agent: SEOlyzer Disallow: / User-agent: SpyFu Disallow: / User-agent: NetSeer Disallow: / User-agent: BacklinkCrawler Disallow: / User-agent: Buzzstream Disallow: / User-agent: ContentKing Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: WhatsCMS Disallow: / User-agent: WhatsRunningBot Disallow: / User-agent: GrapeshotCrawler Disallow: / User-agent: serpwolf Disallow: / User-agent: SiteliftBot Disallow: / User-agent: cliqzbot Disallow: / # =========================================================== # AI scrapers used for unauthorized training/competitive harvest # =========================================================== User-agent: Omgili Disallow: / User-agent: omgilibot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: img2dataset Disallow: / User-agent: aiHitBot Disallow: / User-agent: PetalBot Disallow: / User-agent: NewsNow Disallow: / User-agent: Webzio-Extended Disallow: / User-agent: webzio Disallow: / User-agent: ICC-Crawler Disallow: / # =========================================================== # Generic / unknown bots — default to allow with crawl-delay # Real users get no delay. Bots that aren't explicitly named # get a small delay to deter aggressive harvesting. # =========================================================== User-agent: * Allow: / Crawl-delay: 5 Disallow: /admin/ Disallow: /private/ Disallow: /api/ Disallow: /_/ Disallow: /.git/ Disallow: /node_modules/ Disallow: /scripts/ # =========================================================== # Sitemap # =========================================================== Sitemap: https://realgrowthmatters.com/sitemap.xml