# robots.txt for http://www.chrisdixonstudios.com/ # Enterprise-Grade Configuration | Last updated: 2025-06-30 Host: www.chrisdixonstudios.com ### PRIVACY-FOCUSED SEARCH ENGINES ### User-agent: Brave-Bot User-agent: DuckDuckBot User-agent: MojeekBot User-agent: Qwantify # Qwant crawler User-agent: Startpage # Startpage proxy User-agent: searxbot # Searx instances Allow: / Crawl-delay: 2 Allow: /cdsgallery/*.html$ Allow: /artgallery/*.html$ Allow: /category/*.html$ Allow: /images/cdsgallery/ Allow: /images/artgallery/ Allow: /specials/ # For privacy-focused deal listings Disallow: /private/ Disallow: /customer/ # Blocks profile crawling ### PRIVACY SEARCH SPECIAL RULES ### # Qwant (French privacy search) User-agent: Qwantify Request-rate: 20/1h # Conservative rate limit for EU traffic # Startpage (Google proxy) User-agent: Startpage Visit-time: 0500-2100 # European business hours # Searx (open-source metasearch) User-agent: searxbot Crawl-delay: 3 # Slower delay for distributed instances Allow: /api/ # Allows product API endpoints if available ### CDN CONFIGURATION ### # Cloudflare (adjust patterns to your CDN) User-agent: Cloudflare-AlwaysOnline Allow: / Crawl-delay: 1 User-agent: Amazonbot # AWS CloudFront User-agent: Fastly-Googlebot # Fastly CDN Disallow: /staging/ Disallow: /dev/ Allow: /*.css$ Allow: /*.js$ ### GEO-BLOCKING ### # Block known hostile regions (adjust per analytics) User-agent: Yandex # Russian crawler User-agent: Baiduspider # Chinese crawler User-agent: sogou # Chinese crawler Disallow: / Request-rate: 1/10 # 1 page per 10 seconds if allowed ### AI/SCRAPER MITIGATION ### User-agent: GPTBot User-agent: Claude-Web User-agent: anthropic-ai User-agent: FacebookBot Disallow: / Crawl-delay: 10 ### DDoS PROTECTION ### User-agent: DDoS-Guard User-agent: Project-25499 # Known malicious botnet User-agent: masscan-ng Disallow: / Visit-time: 0600-1800 # Only allow crawling during business hours ### ADVANCED SECURITY ### Disallow: /*.php$ # Block all PHP except index Allow: /index.php$ Disallow: /*.sql$ Disallow: /*.env$ Disallow: /*.git/ Disallow: /*.well-known/ # Blocks security cert scans Disallow: /cdn-cgi/ # Cloudflare exploit path ### ENTERPRISE CRAWL CONTROL ### User-agent: * Crawl-delay: 5 Request-rate: 10/1m # 10 requests per minute max Visit-time: 0000-2400 # 24/7 crawling (adjust if needed) ### SEARCH ENGINE DIRECTIVES ### #User-agent: Googlebot #Sitemap: https://www.chrisdixonstudios.com/sitemap.xml #http://www.sitemaps.org/protocol.php #User-agent: Mediapartners-Google #User-agent: A1 Sitemap Generator #User-agent: miggibot # Search engine crawl control User-agent: Googlebot User-agent: Bingbot Allow: /*.css$ Allow: /*.js$ Allow: /cdsgallery/*.html$ # E-commerce product pages Allow: /artgallery/*.html$ # E-commerce product pages Allow: /category/*.html$ # Product categories Allow: /cdsgallery/ # Product images Allow: /images/artgallery/ # Product images Crawl-delay: 2 ### AGGRESSIVE CRAWLER BLOCKING ### User-agent: SemrushBot User-agent: SemrushBot-SA User-agent: AhrefsBot User-agent: MJ12bot User-agent: DotBot User-agent: MauiBot User-agent: BLEXBot User-agent: ExtLinksBot User-agent: ZoominfoBot User-agent: Barkrowler User-agent: CCBot Disallow: / ### CONTENT CONTROL ### # System directories Disallow: /admin/ Disallow: /includes/ Disallow: /tmp/ Disallow: /cache/ Disallow: /config/ Disallow: /logs/ Disallow: /private/ Disallow: /New_Folder*/ # Application files Disallow: /configuration.php Disallow: /install.php Disallow: /phpmyadmin/ Disallow: /wp-admin/ Disallow: /wp-includes/ Disallow: /wp-login.php # Custom directories Disallow: /aCopy_of_gallery/ Disallow: /aSubGallery/ Disallow: /subimages/ Disallow: /subsubimages/ Disallow: /backup/ Disallow: /blog/ Disallow: /cgi-bin/ Disallow: /email/ Disallow: /gallery/ Disallow: /gallery2/ Disallow: /giftshop2/ Disallow: /ip24u/ Disallow: /ip24uazhl-iponu/ Disallow: /iponu/ Disallow: /LL-DecoyDucks/ Disallow: /lo/ Disallow: /rate_cgi.php Disallow: /rate/ Disallow: /TESTER/ Disallow: /apoliticallycorrect/ Disallow: /ZZ*/ # Explicit allows Allow: /public_images/*.jpg$ Allow: /public_images/*.png$ Allow: /public_images/*.gif$ Allow: /fonts/ ### SITEMAP REFERENCES ### #Sitemap: https://www.chrisdixonstudios.com/sitemap-index.xml Sitemap: https://www.chrisdixonstudios.com/sitemap.xml Sitemap: https://www.chrisdixonstudios.com/sitemap.html Sitemap: https://www.chrisdixonstudios.com/image.xml