#!/usr/bin/ruby

# This was a quick hack to download Facebook URLs from 
# http://www.facebook.com/directory
#
# @author Ron Bowes
# @date 2010-07-11

require 'net/http'
require 'uri'

File.open("input.txt", "r") do |infile|
	while (path = infile.gets) do
		if(path =~ /directory/) then
			attempts = 0
			while true do
				begin
					count = 0
					$stderr.puts("PATH: " + path)
					url = URI.parse(path)
					res = Net::HTTP.start(url.host, url.port) {|http|
						http.get(url.path, {"User-agent"=>"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"})
					}
					links = res.body.scan(/UIDirectoryBox_Item.*?a href="(.*?)"/)
				
					links.each { |link|
						link = link.shift
						puts link
						count = count + 1
					}
					$stderr.puts("Found %d links!" % count)

					if(count == 0) then
						attempts = attempts + 1
						if(attempts > 3)
							$stderr.puts("Giving up!")
							break
						else
							$stderr.puts("Found no links, trying again (%d retries left)!" % (3 - attempts))
						end
						sleep(10)
					else
						break
					end
				rescue Exception
					$stderr.puts("ERROR: " + $!)
					sleep(30)
				end
			end
		else
			$stderr.puts("Skipping: " + path)
			puts(path)
		end
	end
end





