# Sample robots.txt file (make sure the filename is ALL LOWERCASE on Linux/Unix systems) # This file should go in your web site's ROOT directory # The root directory is where your site's main /index.html file would be found # It is usually found in /yourhomedir/public_html/ or /yourhomedir/httpdocs # Where "yourhomedir" is your user account's name User-agent: sitecheck.internetseer.com Disallow: / User-agent: Zealbot Disallow: / User-agent: MSIECrawler Disallow: / User-agent: SiteSnagger Disallow: / User-agent: WebStripper Disallow: / User-agent: WebCopier Disallow: / User-agent: Fetch Disallow: / User-agent: Offline Explorer Disallow: / User-agent: Teleport Disallow: / User-agent: TeleportPro Disallow: / User-agent: WebZIP Disallow: / User-agent: linko Disallow: / User-agent: HTTrack Disallow: / User-agent: Microsoft.URL.Control Disallow: / User-agent: Xenu Disallow: / User-agent: larbin Disallow: / User-agent: libwww Disallow: / User-agent: ZyBORG Disallow: / User-agent: Download Ninja Disallow: / User-agent: wget Disallow: / User-agent: grub-client Disallow: / User-agent: k2spider Disallow: / User-agent: NPBot Disallow: / User-agent: WebReaper Disallow: / User-agent: * Allow: /catalog/index.php Allow: /catalog/sitemapindex.xml Allow: /catalog/sitemapproducts.xml Allow: /catalog/sitemapcategories.xml Allow: /catalog/sitemapmanufacturers.xml Allow: /catalog/sitemapspecials.xml Allow: /catalog/product_info.php Sitemap: http://www.pshop.no/catalog/sitemapindex.xml # This says to apply these settings to ALL search engine spiders/crawlers User-agent: * Crawl-delay: 10 Request-rate: 1/5 # These settings will keep spiders from indexing your unwanted pages Disallow: /catalog/includes Disallow: /catalog/account.php Disallow: /catalog/advanced_search.php Disallow: /catalog/checkout_shipping.php Disallow: /catalog/create_account.php Disallow: /catalog/login.php Disallow: /catalog/password_forgotten.php Disallow: /catalog/popup_image.php Disallow: /catalog/shopping_cart.php Disallow: /catalog/contact_us.php Disallow: /catalog/product_reviews_write.php Disallow: /catalog/cookie_usage.php Disallow: /catalog/images/ # IF YOU DO NOT WISH TO HAVE THE GOOGLE IMAGE BOT SCAN YOUR DOMAIN FOR IMAGES # THEN YOU CAN INCLUDE THE FOLLOWING IN YOUR ROBOTS FILE. # I FOUND THAT MY BANDWIDTH USAGE DROPPED BY A MASSIVE AMOUNT AFTER I GOT RID # OF THE GOOGLE IMAGE BOT. ALL I HAD WAS IMAGE HUNTERS STEALING PRODUCT SHOTS # AND NOT EVEN BROWSING THE SITE. User-agent: Googlebot-Image Disallow: /