# For all robots User-agent: * # Block access to specific groups of pages / session based Disallow: /cart* Disallow: /checkout* Disallow: /my-account* Disallow: /login* Disallow: /createAccount* Disallow: /j_spring_security_check # Block access to intensive pages - the wildcard blocks all variants Disallow: /search* Disallow: /p/variant/cacheProductImage Disallow: /sap/* Disallow: */*.json Disallow: *jsessionid Request-rate: 1/20 # maximum rate is one page every 20 seconds Crawl-delay: 20 # 20 seconds between page requests # Visit-time: 0400-0845 # only visit between 04:00 and 08:45 UTC, review the times for geo-specific timelines # Allow search crawlers to discover the sitemap Sitemap: /sitemap.xml # Block CazoodleBot as it does not present correct accept content headers User-agent: CazoodleBot Disallow: / # Block MJ12bot as it is just noise User-agent: MJ12bot Disallow: / # Block dotbot as it cannot parse base urls properly User-agent: dotbot/1.0 Disallow: / # Block Gigabot User-agent: Gigabot Disallow: / #User-agent: * #Disallow: /*noindex=1 User-agent: Amazon* Crawl-delay: 20000 # 20 secs (Amazon uses milli) between page requests