require 'mechanize' # take user_agent and url from log file #File.readlines(ARGV[0]).each do |line| input = ARGV[0] || 'access_log' agent_urls = [] File.readlines(input).each do |line| # p line # "GET /fr/gcc/price_history/pointer/%3A!registration,59903!sequence,03!package,005./search_type/st_oddb/search_query/Topiramat+Pfizer+25+mg/currency/USD HTTP/1.1\" agent = line.split(/"/) agent.pop agent = agent.pop # p agent if line =~ /GET (.+) HTTP/ and agent # p $1 agent_urls << ["http://oddb.masa.org" + $1, agent] end #puts end #p agent_urls.length agent = Mechanize.new #agent.user_agent = "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; de-de) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.22" #url = "http://oddb.masa.org" x = rand(agent_urls.length - 20) p x 10.times do |i| url = agent_urls[x+i][0] agent.user_agent = agent_urls[x+i][1] print "url = #{url}, agent = #{agent.user_agent}\n" page = agent.get(url) sleep 1 end #p page.body