使用tornado的AsyncHTTPClient

tornado自带了一个非常好用的非阻塞HTTPClient, 使用如下:

from tornado.httpclient import AsyncHTTPClient

def echo_response(response):
    print response.body


http_client = AsyncHTTPClient()
http_client.fetch('http://www.baidu.com', echo_response, 
    user_agent=USER_AGENT)

以上代码, 在用tornado webserver时可正常使用.
现在我的需求是: 要处理一批url, 且不启用webserver, 代码包装如下:

ahttp.py

import functools
from tornado.httpclient import AsyncHTTPClient
from tornado.ioloop import IOLoop

AGENT = 'Mozilla/5.0 (X11; Linux x86_64)'


def get(url_with_callbacks):
    io_loop = IOLoop()
    http_client = AsyncHTTPClient(io_loop)

    items = list(url_with_callbacks)
    for item in items:
        url, callback = item
        _callback = functools.partial(
            _invoke_callback_and_check_ioloop, 
            io_loop, http_client, items, item, callback)
        http_client.fetch(url, _callback, user_agent=AGENT)

    io_loop.start()


def _invoke_callback_and_check_ioloop(io_loop, http_client, items, 
                                      item, callback, response):
    items.remove(item)

    try:
        if response.error:
            callback(None)
        else:
            callback(response.body)
    except Exception, e:
        print 'Error:', e.message

    if not items:
        io_loop.stop()
        http_client.close()

使用

import ahttp

def echo_response(body):
    print body

url_with_callbacks = [
    ('http://www.baidu.com', echo_response),
    ('http://www.163.com', echo_response),
]
ahttp.get(url_with_callbacks)

Previous Next

ponxu / 2014-08-17
Categories Python Tags Python tornado