# For all robots User-agent: * # Block access to specific groups of pages Disallow: /results$ Disallow: /results? # Block excessive pagination to prevent crawl budget waste Disallow: /*?page= Disallow: /*&page= # Block tracking parameters to reduce duplicate URLs Disallow: /*?utm_ Disallow: /*&utm_ Disallow: /*?cid= Disallow: /*&cid= Disallow: /*?bid= Disallow: /*&bid= # Allow search crawlers to discover the sitemap Sitemap: https://www.aldi.ie/sitemap.xml Categories Sitemap: https://www.aldi.ie/sitemap_categories.xml # Block CazoodleBot as it does not present correct accept content headers User-agent: CazoodleBot Disallow: / # Block MJ12bot as it is just noise User-agent: MJ12bot Disallow: / # Block dotbot as it cannot parse base URLs properly User-agent: dotbot/1.0 Disallow: / # Block Gigabot User-agent: Gigabot Disallow: /