Source code for gnsq.consumer

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division

import logging
import random
import time

from collections import defaultdict
from itertools import cycle

import blinker
import gevent

from gevent.event import Event
from gevent.pool import Group

from .backofftimer import BackoffTimer
from .decorators import cached_property
from .errors import NSQException, NSQRequeueMessage, NSQSocketError
from .nsqd import NsqdTCPClient
from .states import INIT, RUNNING, BACKOFF, THROTTLED, CLOSED
from .util import parse_nsqds, parse_lookupds


[docs]class Consumer(object):
    """High level NSQ consumer.

    A Consumer will connect to the nsqd tcp addresses or poll the provided
    nsqlookupd http addresses for the configured topic and send signals to
    message handlers connected to the :attr:`on_message` signal or provided by
    ``message_handler``.

    Messages will automatically be finished when the message handle returns
    unless :meth:`message.enable_async() <gnsq.Message.enable_async>` is called.
    If an exception occurs or :class:`~gnsq.errors.NSQRequeueMessage` is raised,
    the message will be requeued.

    The Consumer will handle backing off of failed messages up to a configurable
    ``max_interval`` as well as automatically reconnecting to dropped
    connections.

    Example usage::

        from gnsq import Consumer

        consumer = gnsq.Consumer('topic', 'channel', 'localhost:4150')

        @consumer.on_message.connect
        def handler(consumer, message):
            print 'got message:', message.body

        consumer.start()

    :param topic: specifies the desired NSQ topic

    :param channel: specifies the desired NSQ channel

    :param nsqd_tcp_addresses: a sequence of string addresses of the nsqd
        instances this consumer should connect to

    :param lookupd_http_addresses: a sequence of string addresses of the
        nsqlookupd instances this consumer should query for producers of the
        specified topic

    :param name: a string that is used for logging messages (defaults to
        ``'gnsq.consumer.{topic}.{channel}'``)

    :param message_handler: the callable that will be executed for each message
        received

    :param max_tries: the maximum number of attempts the consumer will make to
        process a message after which messages will be automatically discarded

    :param max_in_flight: the maximum number of messages this consumer will
        pipeline for processing. this value will be divided evenly amongst the
        configured/discovered nsqd producers

    :param requeue_delay: the default delay to use when requeueing a failed
        message

    :param lookupd_poll_interval: the amount of time in seconds between querying
        all of the supplied nsqlookupd instances.  A random amount of time based
        on this value will be initially introduced in order to add jitter when
        multiple consumers are running

    :param lookupd_poll_jitter: the maximum fractional amount of jitter to add
        to the lookupd poll loop. This helps evenly distribute requests even if
        multiple consumers restart at the same time.

    :param low_ready_idle_timeout: the amount of time in seconds to wait for a
        message from a producer when in a state where RDY counts are
        re-distributed (ie. `max_in_flight` < `num_producers`)

    :param max_backoff_duration: the maximum time we will allow a backoff state
        to last in seconds. If zero, backoff wil not occur

    :param backoff_on_requeue: if ``False``, backoff will only occur on
        exception

    :param **kwargs: passed to :class:`~gnsq.NsqdTCPClient` initialization
    """
    def __init__(self, topic, channel, nsqd_tcp_addresses=[],
                 lookupd_http_addresses=[], name=None, message_handler=None,
                 max_tries=5, max_in_flight=1, requeue_delay=0,
                 lookupd_poll_interval=60, lookupd_poll_jitter=0.3,
                 low_ready_idle_timeout=10, max_backoff_duration=128,
                 backoff_on_requeue=True, **kwargs):
        if not nsqd_tcp_addresses and not lookupd_http_addresses:
            raise ValueError('must specify at least one nsqd or lookupd')

        self.nsqd_tcp_addresses = parse_nsqds(nsqd_tcp_addresses)
        self.lookupds = parse_lookupds(lookupd_http_addresses)
        self.iterlookupds = cycle(self.lookupds)

        self.topic = topic
        self.channel = channel
        self.max_tries = max_tries
        self.max_in_flight = max_in_flight
        self.requeue_delay = requeue_delay
        self.lookupd_poll_interval = lookupd_poll_interval
        self.lookupd_poll_jitter = lookupd_poll_jitter
        self.low_ready_idle_timeout = low_ready_idle_timeout
        self.backoff_on_requeue = backoff_on_requeue
        self.max_backoff_duration = max_backoff_duration
        self.conn_kwargs = kwargs

        if name:
            self.name = name
        else:
            self.name = '%s.%s.%s' % (__name__, self.topic, self.channel)

        if message_handler is not None:
            self.on_message.connect(message_handler, weak=False)

        self.logger = logging.getLogger(self.name)

        self._state = INIT
        self._redistributed_ready_event = Event()
        self._connection_backoffs = defaultdict(self._create_backoff)
        self._message_backoffs = defaultdict(self._create_backoff)

        self._connections = {}
        self._workers = Group()
        self._killables = Group()

[docs]    @cached_property
    def on_message(self):
        """Emitted when a message is received.

        The signal sender is the consumer and the ``message`` is sent as an
        argument. The ``message_handler`` param is connected to this signal.
        """
        return blinker.Signal(doc='Emitted when a message is received.')

[docs]    @cached_property
    def on_response(self):
        """Emitted when a response is received.

        The signal sender is the consumer and the ``response`` is sent as an
        argument.
        """
        return blinker.Signal(doc='Emitted when a response is received.')

[docs]    @cached_property
    def on_error(self):
        """Emitted when an error is received.

        The signal sender is the consumer and the ``error`` is sent as an
        argument.
        """
        return blinker.Signal(doc='Emitted when a error is received.')

[docs]    @cached_property
    def on_finish(self):
        """Emitted after a message is successfully finished.

        The signal sender is the consumer and the ``message_id`` is sent as an
        argument.
        """
        return blinker.Signal(doc='Emitted after the a message is finished.')

[docs]    @cached_property
    def on_requeue(self):
        """Emitted after a message is requeued.

        The signal sender is the consumer and the ``message_id`` and ``timeout``
        are sent as arguments.
        """
        return blinker.Signal(doc='Emitted after the a message is requeued.')

[docs]    @cached_property
    def on_giving_up(self):
        """Emitted after a giving up on a message.

        Emitted when a message has exceeded the maximum number of attempts
        (``max_tries``) and will no longer be requeued. This is useful to
        perform tasks such as writing to disk, collecting statistics etc. The
        signal sender is the consumer and the ``message`` is sent as an
        argument.
        """
        return blinker.Signal(doc='Sent after a giving up on a message.')

[docs]    @cached_property
    def on_auth(self):
        """Emitted after a connection is successfully authenticated.

        The signal sender is the consumer and the ``conn`` and parsed
        ``response`` are sent as arguments.
        """
        return blinker.Signal(doc='Emitted when a response is received.')

[docs]    @cached_property
    def on_exception(self):
        """Emitted when an exception is caught while handling a message.

        The signal sender is the consumer and the ``message`` and ``error`` are
        sent as arguments.
        """
        return blinker.Signal(doc='Emitted when an exception is caught.')

[docs]    @cached_property
    def on_close(self):
        """Emitted after :meth:`close`.

        The signal sender is the consumer.
        """
        return blinker.Signal(doc='Emitted after the consumer is closed.')

[docs]    def start(self, block=True):
        """Start discovering and listing to connections."""
        if self._state == INIT:
            if not any(self.on_message.receivers_for(blinker.ANY)):
                raise RuntimeError('no receivers connected to on_message')

            self.logger.debug('starting %s...', self.name)
            self._state = RUNNING
            self.query_nsqd()

            if self.lookupds:
                self.query_lookupd()
                self._killables.add(self._workers.spawn(self._poll_lookupd))

            self._killables.add(self._workers.spawn(self._poll_ready))

        else:
            self.logger.warn('%s already started', self.name)

        if block:
            self.join()

[docs]    def close(self):
        """Immediately close all connections and stop workers."""
        if not self.is_running:
            return

        self._state = CLOSED

        self.logger.debug('killing %d worker(s)', len(self._killables))
        self._killables.kill(block=False)

        self.logger.debug('closing %d connection(s)', len(self._connections))
        for conn in self._connections:
            conn.close_stream()

        self.on_close.send(self)

[docs]    def join(self, timeout=None, raise_error=False):
        """Block until all connections have closed and workers stopped."""
        self._workers.join(timeout, raise_error)

    @property
    def is_running(self):
        """Check if consumer is currently running."""
        return self._state == RUNNING

    @property
    def is_starved(self):
        """Evaluate whether any of the connections are starved.

        This property should be used by message handlers to reliably identify
        when to process a batch of messages.
        """
        return any(conn.is_starved for conn in self._connections)

    @property
    def total_ready_count(self):
        return sum(c.ready_count for c in self._connections)

    @property
    def total_in_flight(self):
        return sum(c.in_flight for c in self._connections)

    def query_nsqd(self):
        self.logger.debug('querying nsqd...')
        for address in self.nsqd_tcp_addresses:
            address, port = address.split(':')
            self.connect_to_nsqd(address, int(port))

    def query_lookupd(self):
        self.logger.debug('querying lookupd...')
        lookupd = next(self.iterlookupds)

        try:
            producers = lookupd.lookup(self.topic)['producers']
            self.logger.debug('found %d producers', len(producers))

        except Exception as error:
            self.logger.warn(
                'Failed to lookup %s on %s (%s)',
                self.topic, lookupd.address, error)
            return

        for producer in producers:
            self.connect_to_nsqd(
                producer['broadcast_address'], producer['tcp_port'])

    def _poll_lookupd(self):
        try:
            delay = self.lookupd_poll_interval * self.lookupd_poll_jitter
            gevent.sleep(random.random() * delay)

            while True:
                gevent.sleep(self.lookupd_poll_interval)
                self.query_lookupd()

        except gevent.GreenletExit:
            pass

    def _poll_ready(self):
        try:
            while True:
                if self._redistributed_ready_event.wait(5):
                    self._redistributed_ready_event.clear()
                self._redistribute_ready_state()

        except gevent.GreenletExit:
            pass

    def _redistribute_ready_state(self):
        if not self.is_running:
            return

        if len(self._connections) > self.max_in_flight:
            ready_state = self._get_unsaturated_ready_state()
        else:
            ready_state = self._get_saturated_ready_state()

        for conn, count in ready_state.items():
            if conn.ready_count == count:
                self.logger.debug('[%s] RDY count already %d', conn, count)
                continue

            self.logger.debug('[%s] sending RDY %d', conn, count)

            try:
                conn.ready(count)
            except NSQSocketError as error:
                self.logger.warn('[%s] RDY %d failed (%r)', conn, count, error)

    def _get_unsaturated_ready_state(self):
        ready_state = {}
        active = []

        for conn, state in self._connections.items():
            if state == BACKOFF:
                ready_state[conn] = 0

            else:
                active.append(conn)

        random.shuffle(active)

        for conn in active[self.max_in_flight:]:
            ready_state[conn] = 0

        for conn in active[:self.max_in_flight]:
            ready_state[conn] = 1

        return ready_state

    def _get_saturated_ready_state(self):
        ready_state = {}
        active = []
        now = time.time()

        for conn, state in self._connections.items():
            if state == BACKOFF:
                ready_state[conn] = 0

            elif state in (INIT, THROTTLED):
                ready_state[conn] = 1

            elif (now - conn.last_message) > self.low_ready_idle_timeout:
                self.logger.info(
                    '[%s] idle connection, giving up RDY count', conn)
                ready_state[conn] = 1

            else:
                active.append(conn)

        if not active:
            return ready_state

        ready_available = self.max_in_flight - sum(ready_state.values())
        connection_max_in_flight = ready_available // len(active)

        for conn in active:
            ready_state[conn] = connection_max_in_flight

        for conn in random.sample(active, ready_available % len(active)):
            ready_state[conn] += 1

        return ready_state

    def redistribute_ready_state(self):
        self._redistributed_ready_event.set()

    def connect_to_nsqd(self, address, port):
        if not self.is_running:
            return

        conn = NsqdTCPClient(address, port, **self.conn_kwargs)
        if conn in self._connections:
            self.logger.debug('[%s] already connected', conn)
            return

        self._connections[conn] = INIT
        self.logger.debug('[%s] connecting...', conn)

        conn.on_message.connect(self.handle_message)
        conn.on_response.connect(self.handle_response)
        conn.on_error.connect(self.handle_error)
        conn.on_finish.connect(self.handle_finish)
        conn.on_requeue.connect(self.handle_requeue)
        conn.on_auth.connect(self.handle_auth)

        try:
            conn.connect()
            conn.identify()

            if conn.max_ready_count < self.max_in_flight:
                msg = (
                    '[%s] max RDY count %d < consumer max in flight %d, '
                    'truncation possible')

                self.logger.warning(
                    msg, conn, conn.max_ready_count, self.max_in_flight)

            conn.subscribe(self.topic, self.channel)

        except NSQException as error:
            self.logger.warn('[%s] connection failed (%r)', conn, error)
            self.handle_connection_failure(conn)
            return

        # Check if we've closed since we started
        if not self.is_running:
            self.handle_connection_failure(conn)
            return

        self.logger.info('[%s] connection successful', conn)
        self.handle_connection_success(conn)

    def _listen(self, conn):
        try:
            conn.listen()
        except NSQException as error:
            self.logger.warning('[%s] connection lost (%r)', conn, error)

        self.handle_connection_failure(conn)

    def handle_connection_success(self, conn):
        self._workers.spawn(self._listen, conn)
        self.redistribute_ready_state()

        if str(conn) not in self.nsqd_tcp_addresses:
            return

        self._connection_backoffs[conn].success()

    def handle_connection_failure(self, conn):
        del self._connections[conn]
        conn.close_stream()

        if not self.is_running:
            return

        self.redistribute_ready_state()

        if str(conn) not in self.nsqd_tcp_addresses:
            return

        seconds = self._connection_backoffs[conn].failure().get_interval()
        self.logger.debug('[%s] retrying in %ss', conn, seconds)

        gevent.spawn_later(
            seconds, self.connect_to_nsqd, conn.address, conn.port)

    def handle_auth(self, conn, response):
        metadata = []
        if response.get('identity'):
            metadata.append("Identity: %r" % response['identity'])

        if response.get('permission_count'):
            metadata.append("Permissions: %d" % response['permission_count'])

        if response.get('identity_url'):
            metadata.append(response['identity_url'])

        self.logger.info('[%s] AUTH accepted %s', conn, ' '.join(metadata))
        self.on_auth.send(self, conn=conn, response=response)

    def handle_response(self, conn, response):
        self.logger.debug('[%s] response: %s', conn, response)
        self.on_response.send(self, response=response)

    def handle_error(self, conn, error):
        self.logger.debug('[%s] error: %s', conn, error)
        self.on_error.send(self, error=error)

    def _handle_message(self, message):
        if self.max_tries and message.attempts > self.max_tries:
            self.logger.warning(
                "giving up on message '%s' after max tries %d",
                message.id, self.max_tries)
            self.on_giving_up.send(self, message=message)
            return message.finish()

        self.on_message.send(self, message=message)

        if not self.is_running:
            return

        if message.is_async():
            return

        if message.has_responded():
            return

        message.finish()

    def handle_message(self, conn, message):
        self.logger.debug('[%s] got message: %s', conn, message.id)

        try:
            return self._handle_message(message)

        except NSQRequeueMessage as error:
            if error.backoff is None:
                backoff = self.backoff_on_requeue
            else:
                backoff = error.backoff

        except Exception as error:
            backoff = True
            self.logger.exception(
                '[%s] caught exception while handling message', conn)
            self.on_exception.send(self, message=message, error=error)

        if not self.is_running:
            return

        if message.has_responded():
            return

        try:
            message.requeue(self.requeue_delay, backoff)
        except NSQException as error:
            self.logger.warning(
                '[%s] error requeueing message (%r)', conn, error)

    def _create_backoff(self):
        return BackoffTimer(max_interval=self.max_backoff_duration)

    def _start_backoff(self, conn):
        self._connections[conn] = BACKOFF

        interval = self._message_backoffs[conn].get_interval()
        gevent.spawn_later(interval, self._start_throttled, conn)

        self.logger.info('[%s] backing off for %s seconds', conn, interval)
        self.redistribute_ready_state()

    def _start_throttled(self, conn):
        if self._connections.get(conn) != BACKOFF:
            return

        self._connections[conn] = THROTTLED
        self.logger.info('[%s] testing backoff state with RDY 1', conn)
        self.redistribute_ready_state()

    def _complete_backoff(self, conn):
        if self._message_backoffs[conn].is_reset():
            self._connections[conn] = RUNNING
            self.logger.info('backoff complete, resuming normal operation')
            self.redistribute_ready_state()
        else:
            self._start_backoff(conn)

    def _finish_message(self, conn, backoff):
        if not self.max_backoff_duration:
            return

        try:
            state = self._connections[conn]
        except KeyError:
            return

        if state == BACKOFF:
            return

        if backoff:
            self._message_backoffs[conn].failure()
            self._start_backoff(conn)

        elif state == THROTTLED:
            self._message_backoffs[conn].success()
            self._complete_backoff(conn)

        elif state == INIT:
            self._connections[conn] = RUNNING
            self.redistribute_ready_state()

    def handle_finish(self, conn, message_id):
        self.logger.debug('[%s] finished message: %s', conn, message_id)
        self._finish_message(conn, backoff=False)
        self.on_finish.send(self, message_id=message_id)

    def handle_requeue(self, conn, message_id, timeout, backoff):
        self.logger.debug(
            '[%s] requeued message: %s (%s)', conn, message_id, timeout)
        self._finish_message(conn, backoff=backoff)
        self.on_requeue.send(self, message_id=message_id, timeout=timeout)