# Depuis le cours openClassRooms "Testez votre projet avec python" # https://github.com/celine-m-s/le_monde_est_petit/tree/master import argparse import json import time import urllib.error import urllib.request def get_agents(count, proxies=None): # le parse agents = [] while len(agents) < count: if agents: # Wait one second between every request time.sleep(1) request_count = min(count - len(agents), 500) try: if proxies: proxy_support = urllib.request.ProxyHandler( {'http': '%s' % proxies}) opener = urllib.request.build_opener(proxy_support) urllib.request.install_opener(opener) response = urllib.request.urlopen( "http://pplapi.com/batch/{}/sample.json" .format(request_count)) agents += json.loads(response.read().decode("utf8")) except urllib.error.HTTPError: print("Too many requests, sleeping 10s ({} agents)" .format(len(agents))) time.sleep(10) return agents def parse_args(args=None): parser = argparse.ArgumentParser( description="Download agents from pplapi.com") parser.add_argument( "-c", "--count", type=int, default=10, help="Number of agents to download.") parser.add_argument( "-d", "--dest", help="Destination file. If absent, will print to stdout") parser.add_argument( "-p", "--proxy", help="Proxy." ) return parser.parse_args(args) def main(command_line_arguments=None): args = parse_args(command_line_arguments) if args.proxy: agents = get_agents(args.count, proxies=args.proxy) else: agents = get_agents(args.count) result = json.dumps(agents, indent=2, sort_keys=True) if args.dest: pass with open(args.dest, 'w') as out_f: out_f.write(result) else: print(result) if __name__ == "__main__": main()