# Robots.txt file created by ComputerBob # For domain: http://www.computerbob.com # Validated by http://tool.motoricerca.info/robots-checker.phtml # My sitemap location Sitemap: http://www.computerbob.com/sitemap.xml.gz # Prevent the Internet Archiver Wayback Machine from archiving any of my site's pages User-agent: ia_archiver Disallow: / # Prevent Digg from archiving any of my site's pages User-agent: duggmirror Disallow: / # Prevent the Googlebot image archiver from archiving any of my site's images User-agent: Googlebot-Image Disallow: / # Based on the advice found at beginlinux.com/blog/2010/01/robots-txt-tips-for-deailing-with-bots/, # I addded the following lines to help secure WordPress User-agent: Googlebot Disallow: /*.js$ Disallow: /*.inc$ Disallow: /*.css$ Disallow: /*.gz$ Disallow: /*.wmv$ Disallow: /*.cgi$ Disallow: /wp/category/*/*/ Disallow: /wp/*?* Disallow: /wp/*? Disallow: /wp/wp-* # Prevent the Yahoo image search robot from archiving any of my site's images User-agent: Yahoo-MMCrawler Disallow: / # Prevent the MSN image search robot from archiving any of my site's images User-agent: msnbot-MM Disallow: / # Prevent the Firefox Fastfox page preloauder from preloading my site's pages User-agent: Fasterfox Disallow: / # Make the Yahoo Slurp bot pause for 5 seconds in between pages when it spiders my site User-agent: Slurp Disallow: /secret/ # All allowed robots will spider the entire domain, except for the following files and folders User-agent: * # Prevent good bots from going into the secret folder and getting the index.php page - Bad bots will go and get it anyway and will get caught Disallow: /ratings/ Disallow: /secret/ Disallow: /blacklist.dat Disallow: /blacklist.php Disallow: /cb_count/ Disallow: /count/ Disallow: /phpBB2/ Disallow: /.htaccess Disallow: /400.shtml Disallow: /403.shtml Disallow: /404.shtml Disallow: /500.shtml Disallow: /autoblink.js Disallow: /cgi-bin/ Disallow: /css/ Disallow: /favicon.ico Disallow: /favorites.html Disallow: /footer.html Disallow: /forums/ Disallow: /guestsbook/ Disallow: /guests/disclaimer.html Disallow: /head_info.html Disallow: /header.html Disallow: /headerhome.html Disallow: /hours.txt Disallow: /image.php Disallow: /image_banner.php Disallow: /images/ Disallow: /languages.html Disallow: /lazarus/ Disallow: /Library/ Disallow: /login.html Disallow: /navcolumn.html Disallow: /navcolumn_not_valid.html Disallow: /scripts.txt Disallow: /spidercount/ Disallow: /suspended.page/ Disallow: /Templates/ # Based on the advice found at www.simplemachines.org/community/index.php?topic=226545.0, # I addded the following lines to stop Google Webmaster Tools from getting HTTP errors on verification code pages # and getting NOT FOUND errors for a couple of SMF gif files Disallow: /bbc/ Disallow: /survivors/Sources/ Disallow: /survivors/Themes/ # End of the lines that I added to solve errors from Google Webmaster Tools # Based on the advice found at beginlinux.com/blog/2010/01/robots-txt-tips-for-deailing-with-bots/, # I addded the following lines to help secure WordPress Disallow: /cgi-bin/ Disallow: /wp/wp-admin/ Disallow: /wp/wp-includes/ Disallow: /wp/wp-content/plugins/ Disallow: /wp/wp-content/cache/ Disallow: /wp/wp-content/themes/ Disallow: /wp/trackback/ Disallow: /wp/feed/ Disallow: /wp/comments/