Source code for annotlib.multi_types

import numpy as np

from annotlib.utils import check_indices
from annotlib.base import BaseAnnot

from sklearn.utils import check_array

from itertools import chain


[docs]class MultiAnnotTypes(BaseAnnot):
    """MultiAnnotTypes

    This class enables to manage multiple types of annotators.

    Parameters
    ----------
    annotator_types: BaseAnnot | list, shape (n_annotators)
        A single annotator or a list of annotators who are to be added.

    Attributes
    ----------
    annotator_types_: list, shape (n_annotators)
        List of added annotators.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.datasets import load_iris
    >>> from annotlib import ClassifierBasedAnnot, ClusterBasedAnnot
    >>> X, y_true = load_iris(return_X_y=True)
    >>> # create two types of annotators
    >>> classifier_annotators = ClassifierBasedAnnot(X=X, y_true=y_true, n_annotators=3)
    >>> cluster_annotators = ClusterBasedAnnot(X=X, y_true=y_true, n_annotators=3)
    >>> annotator_types = [classifier_annotators, cluster_annotators]
    >>> # create instance of multiple annotator types
    >>> multiple_annotators = MultiAnnotTypes(annotator_types=annotator_types)
    >>> # there are 3+3=6 annotators
    >>> multiple_annotators.n_annotators()
    6
    >>> # ask 6 annotators for class labels of 10 samples
    >>> multiple_annotators.class_labels(X=X[0:10], query_value=10).shape
    (10, 6)
    >>> # check query values
    >>> multiple_annotators.n_queries()
    array([10, 10, 10, 10, 10, 10])
    """

    def __init__(self, annotator_types):
        annotator_types = [annotator_types] if isinstance(annotator_types, BaseAnnot) else annotator_types
        if isinstance(annotator_types, list):
            for a in annotator_types:
                if not isinstance(a, BaseAnnot):
                    raise TypeError(
                        'An annotator is required to be an instance of '
                        ':py:class:`annotlib.base.BaseAnnot`')
        else:
            raise TypeError('The parameter `annotator_types` must be a single annotator or a list of annotators.')
        self.annotator_types_ = annotator_types

[docs]    def add_annotators(self, annotator_types):
        """Method adds new annotators.

        Parameters
        ----------
        annotator_types: list, shape (n_annotators)
            The annotator types to be added.

        Returns
        -------
        self: sim_annotator_lib.multiple_annotator_types.MultiAnnotTypes
            The instance itself.
        """
        annotator_types = [annotator_types] if isinstance(annotator_types, BaseAnnot) else annotator_types
        for a in annotator_types:
            if not isinstance(a, BaseAnnot):
                raise TypeError(
                    'An annotator is required to be an instance of '
                    ':py:class:`annotlib.base.BaseAnnot`')
            else:
                self.annotator_types_.append(a)
        return self

[docs]    def n_annotators(self):
        """Method for computing the number of annotators.

        Returns
        -------
        n_annotators: int
            Number of annotators.
        """
        return np.sum([a.n_annotators() for a in self.annotator_types_])

[docs]    def n_queries(self):
        """Method for computing the number of queries posed to an annotator.

        Returns
        -------
        n_queries: numpy.ndarray, shape (n_annotators)
            An entry n_queries_[a] indicates how many queries annotator a has processed.
        """
        return np.hstack([a.n_queries() for a in self.annotator_types_])

[docs]    def queried_samples(self):
        """Abstract method for returning the samples for which the annotators were queried to provide class labels.

        Returns
        -------
        X_queried: numpy.ndarray, shape (n_annotators, n_queried_samples, n_features)
            An entry X_queried_[a] represents the samples for which the annotator a was queried to provide class labels.
        """
        return list(chain(*[a.queried_samples() for a in self.annotator_types_]))

[docs]    def class_labels(self, X, annotator_ids=None, query_value=1, **kwargs):
        """Method returning the class labels of the given samples.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose class labels are queried.
        query_value: int
            The query value represents the increment of the query statistics of the queried annotators.

        Returns
        -------
        Y: numpy.ndarray, shape (n_samples, n_annotators)
            Class labels of the given samples which were provided by the queried annotators.
            The non queried annotators return np.nan values.
        """
        X = check_array(X)
        if annotator_ids is None:
            Y = np.hstack([a.class_labels(X, None, query_value, **kwargs) for a in self.annotator_types_])
        else:
            annotator_ids = self._transform_ids(check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids'))
            Y = []
            for a in range(len(self.annotator_types_)):
                if len(annotator_ids[a]) > 0:
                    Y_a = self.annotator_types_[a].class_labels(X=X, annotator_ids=annotator_ids[a],
                                                                query_value=query_value, **kwargs)
                else:
                    Y_a = np.empty((len(X), self.annotator_types_[a].n_annotators()))
                    Y_a.fill(np.nan)
                Y.append(Y_a)
            Y = np.hstack(Y)
        return Y

[docs]    def confidence_scores(self, X, annotator_ids=None, **kwargs):
        """Method returning the confidence scores for labelling the given samples.

        Parameters
        ----------
        X: array-like, shape (n_samples, n_features)
             Samples whose class labels are queried.
        annotator_ids: array-like, shape (n_queried_annotators)
            The indices of the annotators whose confidence scores are queried.

        Returns
        -------
        C: numpy.ndarray, shape (n_samples, n_annotators)
            Confidence scores of the queried annotators for labelling the given samples.
            The non queried annotators should return np.nan values.
        """
        X = check_array(X)
        if annotator_ids is None:
            C = np.hstack([a.confidence_scores(X) for a in self.annotator_types_])
        else:
            annotator_ids = self._transform_ids(check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids'))
            C = []
            for a in range(len(self.annotator_types_)):
                if len(annotator_ids[a]) > 0:
                    C_a = self.annotator_types_[a].confidence_scores(X, annotator_ids[a])
                else:
                    C_a = np.empty((len(X), self.annotator_types_[a].n_annotators()))
                    C_a.fill(np.nan)
                C.append(C_a)
            C = np.hstack(C)
        return C

    def _transform_ids(self, annotator_ids):
        """
        This method transforms the annotator ids, so that the correct annotators in the different
        types are asked. If we have two annotator types each with two annotators, the ids [0, 1, 2, 3] are transformed
        to [[0, 1], [0, 1]], so that for each type there is a separate list of annotator ids.

        Parameters
        ----------
        annotator_ids: array-like,
            Annotator ids to be transformed.

        Returns
        -------
        new_ids: list, shape (n_annotator_types, n_asked_annotators_of_annotator_type)
            The transformed annotator ids. For example, the entry new_ids[1]=[0, 2] defines the annotators with the
            indices 0 and 2 of the annotator type with index 1 are asked.
        """
        id_ranges = np.zeros((len(self.annotator_types_), 2), dtype=int)
        id_ranges[0] = np.asarray([0, self.annotator_types_[0].n_annotators() - 1])
        for a in range(1, len(self.annotator_types_)):
            x_1 = id_ranges[a - 1][1] + 1
            x_2 = x_1 + self.annotator_types_[a].n_annotators() - 1
            id_ranges[a] = np.asarray([x_1, x_2], dtype=int)

        new_ids = list()
        for a in range(len(id_ranges)):
            new_ids.append([])
            for i in annotator_ids:
                if id_ranges[a, 0] <= i <= id_ranges[a, 1]:
                    new_ids[a].append(i - id_ranges[a, 0])
        return new_ids