import numpy as np
from annotlib.utils import check_indices
from annotlib.base import BaseAnnot
from sklearn.utils import check_array
from itertools import chain
[docs]class MultiAnnotTypes(BaseAnnot):
"""MultiAnnotTypes
This class enables to manage multiple types of annotators.
Parameters
----------
annotator_types: BaseAnnot | list, shape (n_annotators)
A single annotator or a list of annotators who are to be added.
Attributes
----------
annotator_types_: list, shape (n_annotators)
List of added annotators.
Examples
--------
>>> import numpy as np
>>> from sklearn.datasets import load_iris
>>> from annotlib import ClassifierBasedAnnot, ClusterBasedAnnot
>>> X, y_true = load_iris(return_X_y=True)
>>> # create two types of annotators
>>> classifier_annotators = ClassifierBasedAnnot(X=X, y_true=y_true, n_annotators=3)
>>> cluster_annotators = ClusterBasedAnnot(X=X, y_true=y_true, n_annotators=3)
>>> annotator_types = [classifier_annotators, cluster_annotators]
>>> # create instance of multiple annotator types
>>> multiple_annotators = MultiAnnotTypes(annotator_types=annotator_types)
>>> # there are 3+3=6 annotators
>>> multiple_annotators.n_annotators()
6
>>> # ask 6 annotators for class labels of 10 samples
>>> multiple_annotators.class_labels(X=X[0:10], query_value=10).shape
(10, 6)
>>> # check query values
>>> multiple_annotators.n_queries()
array([10, 10, 10, 10, 10, 10])
"""
def __init__(self, annotator_types):
annotator_types = [annotator_types] if isinstance(annotator_types, BaseAnnot) else annotator_types
if isinstance(annotator_types, list):
for a in annotator_types:
if not isinstance(a, BaseAnnot):
raise TypeError(
'An annotator is required to be an instance of '
':py:class:`annotlib.base.BaseAnnot`')
else:
raise TypeError('The parameter `annotator_types` must be a single annotator or a list of annotators.')
self.annotator_types_ = annotator_types
[docs] def add_annotators(self, annotator_types):
"""Method adds new annotators.
Parameters
----------
annotator_types: list, shape (n_annotators)
The annotator types to be added.
Returns
-------
self: sim_annotator_lib.multiple_annotator_types.MultiAnnotTypes
The instance itself.
"""
annotator_types = [annotator_types] if isinstance(annotator_types, BaseAnnot) else annotator_types
for a in annotator_types:
if not isinstance(a, BaseAnnot):
raise TypeError(
'An annotator is required to be an instance of '
':py:class:`annotlib.base.BaseAnnot`')
else:
self.annotator_types_.append(a)
return self
[docs] def n_annotators(self):
"""Method for computing the number of annotators.
Returns
-------
n_annotators: int
Number of annotators.
"""
return np.sum([a.n_annotators() for a in self.annotator_types_])
[docs] def n_queries(self):
"""Method for computing the number of queries posed to an annotator.
Returns
-------
n_queries: numpy.ndarray, shape (n_annotators)
An entry n_queries_[a] indicates how many queries annotator a has processed.
"""
return np.hstack([a.n_queries() for a in self.annotator_types_])
[docs] def queried_samples(self):
"""Abstract method for returning the samples for which the annotators were queried to provide class labels.
Returns
-------
X_queried: numpy.ndarray, shape (n_annotators, n_queried_samples, n_features)
An entry X_queried_[a] represents the samples for which the annotator a was queried to provide class labels.
"""
return list(chain(*[a.queried_samples() for a in self.annotator_types_]))
[docs] def class_labels(self, X, annotator_ids=None, query_value=1, **kwargs):
"""Method returning the class labels of the given samples.
Parameters
----------
X: array-like, shape (n_samples, n_features)
Samples whose class labels are queried.
annotator_ids: array-like, shape (n_queried_annotators)
The indices of the annotators whose class labels are queried.
query_value: int
The query value represents the increment of the query statistics of the queried annotators.
Returns
-------
Y: numpy.ndarray, shape (n_samples, n_annotators)
Class labels of the given samples which were provided by the queried annotators.
The non queried annotators return np.nan values.
"""
X = check_array(X)
if annotator_ids is None:
Y = np.hstack([a.class_labels(X, None, query_value, **kwargs) for a in self.annotator_types_])
else:
annotator_ids = self._transform_ids(check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids'))
Y = []
for a in range(len(self.annotator_types_)):
if len(annotator_ids[a]) > 0:
Y_a = self.annotator_types_[a].class_labels(X=X, annotator_ids=annotator_ids[a],
query_value=query_value, **kwargs)
else:
Y_a = np.empty((len(X), self.annotator_types_[a].n_annotators()))
Y_a.fill(np.nan)
Y.append(Y_a)
Y = np.hstack(Y)
return Y
[docs] def confidence_scores(self, X, annotator_ids=None, **kwargs):
"""Method returning the confidence scores for labelling the given samples.
Parameters
----------
X: array-like, shape (n_samples, n_features)
Samples whose class labels are queried.
annotator_ids: array-like, shape (n_queried_annotators)
The indices of the annotators whose confidence scores are queried.
Returns
-------
C: numpy.ndarray, shape (n_samples, n_annotators)
Confidence scores of the queried annotators for labelling the given samples.
The non queried annotators should return np.nan values.
"""
X = check_array(X)
if annotator_ids is None:
C = np.hstack([a.confidence_scores(X) for a in self.annotator_types_])
else:
annotator_ids = self._transform_ids(check_indices(annotator_ids, self.n_annotators() - 1, 'annotator_ids'))
C = []
for a in range(len(self.annotator_types_)):
if len(annotator_ids[a]) > 0:
C_a = self.annotator_types_[a].confidence_scores(X, annotator_ids[a])
else:
C_a = np.empty((len(X), self.annotator_types_[a].n_annotators()))
C_a.fill(np.nan)
C.append(C_a)
C = np.hstack(C)
return C
def _transform_ids(self, annotator_ids):
"""
This method transforms the annotator ids, so that the correct annotators in the different
types are asked. If we have two annotator types each with two annotators, the ids [0, 1, 2, 3] are transformed
to [[0, 1], [0, 1]], so that for each type there is a separate list of annotator ids.
Parameters
----------
annotator_ids: array-like,
Annotator ids to be transformed.
Returns
-------
new_ids: list, shape (n_annotator_types, n_asked_annotators_of_annotator_type)
The transformed annotator ids. For example, the entry new_ids[1]=[0, 2] defines the annotators with the
indices 0 and 2 of the annotator type with index 1 are asked.
"""
id_ranges = np.zeros((len(self.annotator_types_), 2), dtype=int)
id_ranges[0] = np.asarray([0, self.annotator_types_[0].n_annotators() - 1])
for a in range(1, len(self.annotator_types_)):
x_1 = id_ranges[a - 1][1] + 1
x_2 = x_1 + self.annotator_types_[a].n_annotators() - 1
id_ranges[a] = np.asarray([x_1, x_2], dtype=int)
new_ids = list()
for a in range(len(id_ranges)):
new_ids.append([])
for i in annotator_ids:
if id_ranges[a, 0] <= i <= id_ranges[a, 1]:
new_ids[a].append(i - id_ranges[a, 0])
return new_ids