Module `ranky.visualization`

Expand source code

#################################
######## VISUALIZATIONS #########
#################################

import numpy as np
import pandas as pd
from math import ceil
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
import networkx as nx
import ranky as rk
from sklearn.manifold import TSNE, MDS
from mpl_toolkits.mplot3d import Axes3D

# critical difference does not work when this is enabled
#sns.set_theme(style = "darkgrid")

def autolabel(rects, values, round=2):
    """ Function used by `rk.show` to annotate bar plots.
    """
    values = np.round(values, round)
    for idx,rect in enumerate(rects):
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
                values[idx],
                ha='center', va='bottom', rotation=0)

def show(m, rotation=90, title=None, size=2, annot=False, round=2, color='royalblue', cmap=None):
    """ Display a ranking or a prefrence matrix.

    If m is 1D: show ballot (bar plot).
    If m is 2D: show preferences (heatmap).

    TODO: annot argument adding the values in the plot.

    Args:
        rotation: x labels rotation.
        title: string - title of the figure.
        size: integer - higher value for a smaller figure.
        annot: If True, write the values.
        round: Number of decimals to display if annot is True.
        color: Color for 1D bar plot.
        cmap: Color map for 2D heatmap.
    """
    if isinstance(m, list): # convert to np.ndarray if needed
        m = np.array(m)
    dim = len(m.shape)
    if dim == 1: # 1D
        x = np.arange(len(m))
        bar_plot = plt.bar(x, m, align='center', color=color)
        if annot:
            autolabel(bar_plot, m, round=round)
        if rk.is_series(m):
            plt.xticks(x, m.index, rotation=rotation)
    elif dim == 2: # 2D
        fig, ax = plt.subplots(figsize=(m.shape[1]/size, m.shape[0]/size))
        sns.heatmap(m, ax=ax, annot=annot, linewidths=.2, fmt='0.'+str(round), cmap=cmap)
        x = np.arange(m.shape[1])
        if rk.is_dataframe(m):
            plt.xticks(x, m.columns, rotation=rotation)
    else:
        raise(Exception('Passed array must have only 1 or 2 dimension, not {}.'.format(dim)))
    if title is not None:
        plt.title(title)
    plt.show()

def show_learning_curve(h):
    """ Display learning curve.

    Args:
        h: list representing the history of scores.
    """
    plt.plot(range(len(h)), h)
    plt.xlabel('epochs')
    plt.ylabel('score')
    plt.show()

def show_graph(matrix, names=None):
    """ Show a directed graph represented by a binary matrix.

    Args:
        matrix: binary matrix. matrix[i, j] = 1 indicates an edge from i to j.
        names: list representing the names of the vertices.
    """
    G = nx.DiGraph()
    n = len(matrix)
    nodes = range(n)
    if names is not None:
        nodes = names
    G.add_nodes_from(nodes)
    for i in range(n):
        for j in range(n):
            if matrix[i][j] == 1:
                G.add_edge(nodes[i], nodes[j])
    nx.draw_circular(G, with_labels=True, node_size=2500, font_size=8, font_weight='bold')
    plt.show()

def scatterplot(m, dim=2, names=None, colors=None, fontsize=8, pointsize=60, big_display=True, legend=False, legend_loc='best'):
    """ 2D or 3D scatterplot.

    Args:
        m: data
        dim: 2 or 3.
        names: vector of names to display on each point.
        colors: vector of numbers or categories of the size of the number of points.
                If None it will be replaced by names.
        fontsize: text font size (integer).
        pointsize: size of data points (integer).
        big_display: plot the figure in a big format if True.
        legend: if True, add legend of colors.
        legend_loc: location of legend. See matplotlib.pyplot.legend for details.
    """
    if colors is None:
        colors = names
    if dim == 2: # 2 dimensions
        x, y = [m[:, i] for i in range(m.shape[1])] # take columns
        scat = sns.scatterplot(x, y, hue=colors, s=pointsize, legend=(legend and 'brief'))
        if names is not None: # TEXT #
            for line in range(0, m.shape[0]):
                scat.text(x[line]+0.01, y[line], names[line], horizontalalignment='left',
                         fontsize=fontsize, color='black', weight='semibold')
        #if legend:
        #    plt.legend(colors, loc=legend_loc)
    elif dim == 3: # 3 dimensions
        fig = plt.figure()
        ax = fig.add_subplot(111, projection = '3d')
        x, y, z = [m[:, i] for i in range(m.shape[1])] # take columns
        ax.scatter(x, y, z) #, c=range(len(names)))
    else:
        raise Exception('dim must be 2 or 3.')
    if big_display:
        fi = plt.gcf()
        fi.set_size_inches(12, 8) # change plot size
    plt.show()

def tsne(m, axis=0, dim=2, **kwargs):
    """ Use T-SNE algorithm to show the matrix m in a 2 or 3 dimensions space.

    Args:
        axis: axis of dimensionality reduction.
        dim: number of dimensions. 2 for 2D plot, 3 for 3D plot.
        **kwargs: arguments for rk.scatterplot function (e.g. fontsize, pointsize).
    """
    names = None
    if axis == 0:
        if rk.is_dataframe(m):
            names = m.columns
        m = m.T # transpose
    elif axis == 1:
        if rk.is_dataframe(m):
            names = m.index
    else:
        raise Excpetion('axis must be 0 or 1.')
    m_transformed = TSNE(n_components=dim).fit_transform(m)
    # Display
    scatterplot(m_transformed, dim=dim, names=names, **kwargs)

def mds_from_dist_matrix(distance_matrix, dim=2, names=None, **kwargs):
    """ Multidimensional scaling plot from a symmetric distance matrix (pairwise distances).

    See: https://en.wikipedia.org/wiki/Multidimensional_scaling

    Args:
        m: distance matrix.
        dim: number of dimensions to plot (2 or 3).
        names: names of objects. Will be overwritten if distance_matrix is a pd.DataFrame.
        **kwargs: arguments for rk.scatterplot function (e.g. fontsize).
    """
    if rk.is_dataframe(distance_matrix):
        names = distance_matrix.columns
    transformer = MDS(n_components=dim, dissimilarity='precomputed')
    m_transformed = transformer.fit_transform(distance_matrix)
    # Display
    scatterplot(m_transformed, dim=dim, names=names, **kwargs)

def mds(m, axis=0, dim=2, method='spearman', **kwargs):
    """ Multidimensional scaling plot from a preference matrix.

    See: https://en.wikipedia.org/wiki/Multidimensional_scaling

    Args:
        m: preference matrix.
        dim: number of dimensions to plot (2 or 3).
        method: any metric method.
        **kwargs: arguments for rk.scatterplot function (e.g. fontsize).
    """
    names = None
    if axis == 0:
        if rk.is_dataframe(m):
            names = m.columns
        m = m.T # transpose
    elif axis == 1:
        if rk.is_dataframe(m):
            names = m.index
    else:
        raise Excpetion('axis must be 0 or 1.')
    # Compute pairwise distances
    dist_matrix = rk.distance_matrix(m, method=method)
    # Call the plot functions
    mds_from_dist_matrix(dist_matrix, dim=dim, names=names, **kwargs)

def overlaps(pos, not_sig):
    #Used by critical difference.
    #Checks if the horizontal line overlaps any existing horizontal line.
    i, j = pos
    for i1, j1 in not_sig:
        if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
            return True
    return False

def merge_couples(couples):
    # Used by critical difference
    longest = [(i, j) for i, j in not_sig if not overlaps((i, j), couples)]
    return longest

def critical_difference(m, comparison_func=None, axis=1, **kwargs):
    """ Computes and draws a critical difference diagram.

    The goal of critical difference diagrams is to show the average scores of
    different candidates, and to group if their performance are not significantly
    different (using pairwise statistical tests).
    This function uses a comparison function (rk.p_wins by default).
    A comparison function f(a, b) should return True if a is significantly better than b.

    Args:
        m: Score matrix, array-like (use pd.DataFrame to name the candidates).
        comparison_func: Assymetrical function used to compare two candidates.
        The function comparison_func(a, b) should return 1 if a beats b and 0 otherwise.
        By default it's p_wins (defined in the same module), performing a binomial test.
        axis: Axis of judges.
        kwargs: Arguments for the comparison_func function.
    """
    m = pd.DataFrame(m) # casting if necessary
    scores = rk.score(m, axis=axis).sort_values()
    if axis == 0:
        m = m.T # if the candidates are in column, transpose the matrix
    couples = []
    for i in range(len(scores) - 1):
        for j in range(1, len(scores)):
            if i < j:
                _i, _j = scores.index[i], scores.index[j] # do not confuse indices in couples and in scores
                a, b = m.iloc[_i], m.iloc[_j]
                if rk.duel.declare_ties(a, b, comparison_func=comparison_func):
                    couples.append((i, j))
    show_critical_difference(scores, couples)

def show_critical_difference(scores, couples, arrow_vgap=.2, link_voffset=.15, link_vgap=.1, xlabel=None):
    """ Draws a critical difference diagram.

    The goal of critical difference diagrams is to show the average scores of
    different candidates, and to group if their performance are not significantly
    different (using pairwise statistical tests).

    Forked from https://github.com/mbatchkarov/critical_difference

    Critical difference diagrams can be seen in the following publications:
    - Janez Demsar, Statistical Comparisons of Classifiers over Multiple Data Sets, 7(Jan):1--30, 2006.
    - H. Ismail Fawaz, G. Forestier, J. Weber, L. Idoumghar, P. Muller, Deep learning for time series classification: a review, Data Mining and Knowledge Discovery, 2018.

    Args:
        scores: List of average methods' scores, array-like. If scores is a pd.Series, the index will be used as names.
        couples: list of tuples representing the equivalence between neighbors (once sorted) e.g. [(0, 1), (1, 2), (4, 5)], based on indices in the array scores.
        arrow_vgap: vertical space between the arrows that point to method names, between 0 and 1.
        link_vgap: vertical space between the lines that connect methods that are not significantly different. Scale is 0 to 1, fraction of axis size
        link_voffset: offset from the axis of the links that connect non-significant methods
    """
    size = len(scores)
    names = list(range(size)) # default names: [0, 1, ...]
    if isinstance(scores, pd.Series):
        names = scores.index
    scores, names = (list(t) for t in zip(*sorted(zip(scores, names))))
    for pair in couples:
        assert all(0 <= idx < size for idx in pair), 'Check indices'
    # remove axes
    fig, ax = plt.subplots(1, 1, figsize=(6, 2), subplot_kw=dict(frameon=False))
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().set_visible(False)
    y = [0] * size
    ax.plot(scores, y, 'ko')
    plt.xlim(0.9 * scores[0], 1.1 * scores[-1])
    plt.ylim(0, 1)
    # draw the x axis again
    xmin, xmax = ax.get_xaxis().get_view_interval()
    ymin, ymax = ax.get_yaxis().get_view_interval()
    ax.add_artist(Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2))
    if xlabel: # add an optional label to the x axis
        ax.annotate(xlabel, xy=(xmax, 0), xytext=(0.95, 0.1), textcoords='axes fraction',
                                ha='center', va='center', fontsize=9)  # text slightly smaller
    half = int(ceil(size / 2.))
    # make sure the topmost annotation in at 90% of figure height
    ycoords = list(reversed([0.9 - arrow_vgap * i for i in range(half)]))
    ycoords.extend(reversed(ycoords))
    for i in range(size):
        ax.annotate(str(names[i]),
                    xy=(scores[i], y[i]),
                    xytext=(-.05 if i < half else .95, ycoords[i]),
                    textcoords='axes fraction', ha='center', va='center',
                    arrowprops={'arrowstyle': '-', 'connectionstyle': 'angle,angleA=0,angleB=90'})
    # draw horizontal lines linking non-significant methods
    linked_methods = merge_couples(couples)
    # where do the existing lines begin and end, (X, Y) coords
    used_endpoints = set()
    y = link_voffset
    dy = link_vgap
    # draw lines
    for i, (x1, x2) in enumerate(sorted(linked_methods)):
        if y > link_voffset and overlaps((x1, y - dy), used_endpoints):
            y -= dy
        elif overlaps((x1, y), used_endpoints):
            y += dy
        plt.hlines(y, scores[x1], scores[x2], linewidth=3)  # y, x0, x1
        used_endpoints.add((x1, y))
        used_endpoints.add((x2, y))
    plt.show()

Functions

def autolabel(rects, values, round=2)

Function used by rk.show to annotate bar plots.

Expand source code

def autolabel(rects, values, round=2):
    """ Function used by `rk.show` to annotate bar plots.
    """
    values = np.round(values, round)
    for idx,rect in enumerate(rects):
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2., 1.05*height,
                values[idx],
                ha='center', va='bottom', rotation=0)

def critical_difference(m, comparison_func=None, axis=1, **kwargs)

Computes and draws a critical difference diagram.

The goal of critical difference diagrams is to show the average scores of different candidates, and to group if their performance are not significantly different (using pairwise statistical tests). This function uses a comparison function (rk.p_wins by default). A comparison function f(a, b) should return True if a is significantly better than b.

Args

m: Score matrix, array-like (use pd.DataFrame to name the candidates).
comparison_func: Assymetrical function used to compare two candidates.
The function comparison_func(a, b) should return 1 if a beats b and 0 otherwise.
By default it's p_wins (defined in the same module), performing a binomial test.
axis: Axis of judges.
kwargs: Arguments for the comparison_func function.

Expand source code

def critical_difference(m, comparison_func=None, axis=1, **kwargs):
    """ Computes and draws a critical difference diagram.

    The goal of critical difference diagrams is to show the average scores of
    different candidates, and to group if their performance are not significantly
    different (using pairwise statistical tests).
    This function uses a comparison function (rk.p_wins by default).
    A comparison function f(a, b) should return True if a is significantly better than b.

    Args:
        m: Score matrix, array-like (use pd.DataFrame to name the candidates).
        comparison_func: Assymetrical function used to compare two candidates.
        The function comparison_func(a, b) should return 1 if a beats b and 0 otherwise.
        By default it's p_wins (defined in the same module), performing a binomial test.
        axis: Axis of judges.
        kwargs: Arguments for the comparison_func function.
    """
    m = pd.DataFrame(m) # casting if necessary
    scores = rk.score(m, axis=axis).sort_values()
    if axis == 0:
        m = m.T # if the candidates are in column, transpose the matrix
    couples = []
    for i in range(len(scores) - 1):
        for j in range(1, len(scores)):
            if i < j:
                _i, _j = scores.index[i], scores.index[j] # do not confuse indices in couples and in scores
                a, b = m.iloc[_i], m.iloc[_j]
                if rk.duel.declare_ties(a, b, comparison_func=comparison_func):
                    couples.append((i, j))
    show_critical_difference(scores, couples)

def mds(m, axis=0, dim=2, method='spearman', **kwargs)

Multidimensional scaling plot from a preference matrix.

See: https://en.wikipedia.org/wiki/Multidimensional_scaling

Args

m: preference matrix.
dim: number of dimensions to plot (2 or 3).
method: any metric method.
**kwargs: arguments for rk.scatterplot function (e.g. fontsize).

Expand source code

def mds(m, axis=0, dim=2, method='spearman', **kwargs):
    """ Multidimensional scaling plot from a preference matrix.

    See: https://en.wikipedia.org/wiki/Multidimensional_scaling

    Args:
        m: preference matrix.
        dim: number of dimensions to plot (2 or 3).
        method: any metric method.
        **kwargs: arguments for rk.scatterplot function (e.g. fontsize).
    """
    names = None
    if axis == 0:
        if rk.is_dataframe(m):
            names = m.columns
        m = m.T # transpose
    elif axis == 1:
        if rk.is_dataframe(m):
            names = m.index
    else:
        raise Excpetion('axis must be 0 or 1.')
    # Compute pairwise distances
    dist_matrix = rk.distance_matrix(m, method=method)
    # Call the plot functions
    mds_from_dist_matrix(dist_matrix, dim=dim, names=names, **kwargs)

def mds_from_dist_matrix(distance_matrix, dim=2, names=None, **kwargs)

Multidimensional scaling plot from a symmetric distance matrix (pairwise distances).

See: https://en.wikipedia.org/wiki/Multidimensional_scaling

Args

m: distance matrix.
dim: number of dimensions to plot (2 or 3).
names: names of objects. Will be overwritten if distance_matrix is a pd.DataFrame.
**kwargs: arguments for rk.scatterplot function (e.g. fontsize).

Expand source code

def mds_from_dist_matrix(distance_matrix, dim=2, names=None, **kwargs):
    """ Multidimensional scaling plot from a symmetric distance matrix (pairwise distances).

    See: https://en.wikipedia.org/wiki/Multidimensional_scaling

    Args:
        m: distance matrix.
        dim: number of dimensions to plot (2 or 3).
        names: names of objects. Will be overwritten if distance_matrix is a pd.DataFrame.
        **kwargs: arguments for rk.scatterplot function (e.g. fontsize).
    """
    if rk.is_dataframe(distance_matrix):
        names = distance_matrix.columns
    transformer = MDS(n_components=dim, dissimilarity='precomputed')
    m_transformed = transformer.fit_transform(distance_matrix)
    # Display
    scatterplot(m_transformed, dim=dim, names=names, **kwargs)

def merge_couples(couples)

Expand source code

def merge_couples(couples):
    # Used by critical difference
    longest = [(i, j) for i, j in not_sig if not overlaps((i, j), couples)]
    return longest

def overlaps(pos, not_sig)

Expand source code

def overlaps(pos, not_sig):
    #Used by critical difference.
    #Checks if the horizontal line overlaps any existing horizontal line.
    i, j = pos
    for i1, j1 in not_sig:
        if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
            return True
    return False

def scatterplot(m, dim=2, names=None, colors=None, fontsize=8, pointsize=60, big_display=True, legend=False, legend_loc='best')

2D or 3D scatterplot.

Args

m: data
dim: 2 or 3.
names: vector of names to display on each point.
colors: vector of numbers or categories of the size of the number of points. If None it will be replaced by names.
fontsize: text font size (integer).
pointsize: size of data points (integer).
big_display: plot the figure in a big format if True.
legend: if True, add legend of colors.
legend_loc: location of legend. See matplotlib.pyplot.legend for details.

Expand source code

def scatterplot(m, dim=2, names=None, colors=None, fontsize=8, pointsize=60, big_display=True, legend=False, legend_loc='best'):
    """ 2D or 3D scatterplot.

    Args:
        m: data
        dim: 2 or 3.
        names: vector of names to display on each point.
        colors: vector of numbers or categories of the size of the number of points.
                If None it will be replaced by names.
        fontsize: text font size (integer).
        pointsize: size of data points (integer).
        big_display: plot the figure in a big format if True.
        legend: if True, add legend of colors.
        legend_loc: location of legend. See matplotlib.pyplot.legend for details.
    """
    if colors is None:
        colors = names
    if dim == 2: # 2 dimensions
        x, y = [m[:, i] for i in range(m.shape[1])] # take columns
        scat = sns.scatterplot(x, y, hue=colors, s=pointsize, legend=(legend and 'brief'))
        if names is not None: # TEXT #
            for line in range(0, m.shape[0]):
                scat.text(x[line]+0.01, y[line], names[line], horizontalalignment='left',
                         fontsize=fontsize, color='black', weight='semibold')
        #if legend:
        #    plt.legend(colors, loc=legend_loc)
    elif dim == 3: # 3 dimensions
        fig = plt.figure()
        ax = fig.add_subplot(111, projection = '3d')
        x, y, z = [m[:, i] for i in range(m.shape[1])] # take columns
        ax.scatter(x, y, z) #, c=range(len(names)))
    else:
        raise Exception('dim must be 2 or 3.')
    if big_display:
        fi = plt.gcf()
        fi.set_size_inches(12, 8) # change plot size
    plt.show()

def show(m, rotation=90, title=None, size=2, annot=False, round=2, color='royalblue', cmap=None)

Display a ranking or a prefrence matrix.

If m is 1D: show ballot (bar plot). If m is 2D: show preferences (heatmap).

TODO: annot argument adding the values in the plot.

Args

rotation: x labels rotation.
title: string - title of the figure.
size: integer - higher value for a smaller figure.
annot: If True, write the values.
round: Number of decimals to display if annot is True.
color: Color for 1D bar plot.
cmap: Color map for 2D heatmap.

Expand source code

def show(m, rotation=90, title=None, size=2, annot=False, round=2, color='royalblue', cmap=None):
    """ Display a ranking or a prefrence matrix.

    If m is 1D: show ballot (bar plot).
    If m is 2D: show preferences (heatmap).

    TODO: annot argument adding the values in the plot.

    Args:
        rotation: x labels rotation.
        title: string - title of the figure.
        size: integer - higher value for a smaller figure.
        annot: If True, write the values.
        round: Number of decimals to display if annot is True.
        color: Color for 1D bar plot.
        cmap: Color map for 2D heatmap.
    """
    if isinstance(m, list): # convert to np.ndarray if needed
        m = np.array(m)
    dim = len(m.shape)
    if dim == 1: # 1D
        x = np.arange(len(m))
        bar_plot = plt.bar(x, m, align='center', color=color)
        if annot:
            autolabel(bar_plot, m, round=round)
        if rk.is_series(m):
            plt.xticks(x, m.index, rotation=rotation)
    elif dim == 2: # 2D
        fig, ax = plt.subplots(figsize=(m.shape[1]/size, m.shape[0]/size))
        sns.heatmap(m, ax=ax, annot=annot, linewidths=.2, fmt='0.'+str(round), cmap=cmap)
        x = np.arange(m.shape[1])
        if rk.is_dataframe(m):
            plt.xticks(x, m.columns, rotation=rotation)
    else:
        raise(Exception('Passed array must have only 1 or 2 dimension, not {}.'.format(dim)))
    if title is not None:
        plt.title(title)
    plt.show()

def show_critical_difference(scores, couples, arrow_vgap=0.2, link_voffset=0.15, link_vgap=0.1, xlabel=None)

Draws a critical difference diagram.

The goal of critical difference diagrams is to show the average scores of different candidates, and to group if their performance are not significantly different (using pairwise statistical tests).

Forked from https://github.com/mbatchkarov/critical_difference

Critical difference diagrams can be seen in the following publications: - Janez Demsar, Statistical Comparisons of Classifiers over Multiple Data Sets, 7(Jan):1–30, 2006. - H. Ismail Fawaz, G. Forestier, J. Weber, L. Idoumghar, P. Muller, Deep learning for time series classification: a review, Data Mining and Knowledge Discovery, 2018.

Args

scores: List of average methods' scores, array-like. If scores is a pd.Series, the index will be used as names.
couples: list of tuples representing the equivalence between neighbors (once sorted) e.g. [(0, 1), (1, 2), (4, 5)], based on indices in the array scores.
arrow_vgap: vertical space between the arrows that point to method names, between 0 and 1.
link_vgap: vertical space between the lines that connect methods that are not significantly different. Scale is 0 to 1, fraction of axis size
link_voffset: offset from the axis of the links that connect non-significant methods

Expand source code

def show_critical_difference(scores, couples, arrow_vgap=.2, link_voffset=.15, link_vgap=.1, xlabel=None):
    """ Draws a critical difference diagram.

    The goal of critical difference diagrams is to show the average scores of
    different candidates, and to group if their performance are not significantly
    different (using pairwise statistical tests).

    Forked from https://github.com/mbatchkarov/critical_difference

    Critical difference diagrams can be seen in the following publications:
    - Janez Demsar, Statistical Comparisons of Classifiers over Multiple Data Sets, 7(Jan):1--30, 2006.
    - H. Ismail Fawaz, G. Forestier, J. Weber, L. Idoumghar, P. Muller, Deep learning for time series classification: a review, Data Mining and Knowledge Discovery, 2018.

    Args:
        scores: List of average methods' scores, array-like. If scores is a pd.Series, the index will be used as names.
        couples: list of tuples representing the equivalence between neighbors (once sorted) e.g. [(0, 1), (1, 2), (4, 5)], based on indices in the array scores.
        arrow_vgap: vertical space between the arrows that point to method names, between 0 and 1.
        link_vgap: vertical space between the lines that connect methods that are not significantly different. Scale is 0 to 1, fraction of axis size
        link_voffset: offset from the axis of the links that connect non-significant methods
    """
    size = len(scores)
    names = list(range(size)) # default names: [0, 1, ...]
    if isinstance(scores, pd.Series):
        names = scores.index
    scores, names = (list(t) for t in zip(*sorted(zip(scores, names))))
    for pair in couples:
        assert all(0 <= idx < size for idx in pair), 'Check indices'
    # remove axes
    fig, ax = plt.subplots(1, 1, figsize=(6, 2), subplot_kw=dict(frameon=False))
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().set_visible(False)
    y = [0] * size
    ax.plot(scores, y, 'ko')
    plt.xlim(0.9 * scores[0], 1.1 * scores[-1])
    plt.ylim(0, 1)
    # draw the x axis again
    xmin, xmax = ax.get_xaxis().get_view_interval()
    ymin, ymax = ax.get_yaxis().get_view_interval()
    ax.add_artist(Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2))
    if xlabel: # add an optional label to the x axis
        ax.annotate(xlabel, xy=(xmax, 0), xytext=(0.95, 0.1), textcoords='axes fraction',
                                ha='center', va='center', fontsize=9)  # text slightly smaller
    half = int(ceil(size / 2.))
    # make sure the topmost annotation in at 90% of figure height
    ycoords = list(reversed([0.9 - arrow_vgap * i for i in range(half)]))
    ycoords.extend(reversed(ycoords))
    for i in range(size):
        ax.annotate(str(names[i]),
                    xy=(scores[i], y[i]),
                    xytext=(-.05 if i < half else .95, ycoords[i]),
                    textcoords='axes fraction', ha='center', va='center',
                    arrowprops={'arrowstyle': '-', 'connectionstyle': 'angle,angleA=0,angleB=90'})
    # draw horizontal lines linking non-significant methods
    linked_methods = merge_couples(couples)
    # where do the existing lines begin and end, (X, Y) coords
    used_endpoints = set()
    y = link_voffset
    dy = link_vgap
    # draw lines
    for i, (x1, x2) in enumerate(sorted(linked_methods)):
        if y > link_voffset and overlaps((x1, y - dy), used_endpoints):
            y -= dy
        elif overlaps((x1, y), used_endpoints):
            y += dy
        plt.hlines(y, scores[x1], scores[x2], linewidth=3)  # y, x0, x1
        used_endpoints.add((x1, y))
        used_endpoints.add((x2, y))
    plt.show()

def show_graph(matrix, names=None)

Show a directed graph represented by a binary matrix.

Args

matrix: binary matrix. matrix[i, j] = 1 indicates an edge from i to j.
names: list representing the names of the vertices.

Expand source code

def show_graph(matrix, names=None):
    """ Show a directed graph represented by a binary matrix.

    Args:
        matrix: binary matrix. matrix[i, j] = 1 indicates an edge from i to j.
        names: list representing the names of the vertices.
    """
    G = nx.DiGraph()
    n = len(matrix)
    nodes = range(n)
    if names is not None:
        nodes = names
    G.add_nodes_from(nodes)
    for i in range(n):
        for j in range(n):
            if matrix[i][j] == 1:
                G.add_edge(nodes[i], nodes[j])
    nx.draw_circular(G, with_labels=True, node_size=2500, font_size=8, font_weight='bold')
    plt.show()

def show_learning_curve(h)

Display learning curve.

Args

h: list representing the history of scores.

Expand source code

def show_learning_curve(h):
    """ Display learning curve.

    Args:
        h: list representing the history of scores.
    """
    plt.plot(range(len(h)), h)
    plt.xlabel('epochs')
    plt.ylabel('score')
    plt.show()

def tsne(m, axis=0, dim=2, **kwargs)

Use T-SNE algorithm to show the matrix m in a 2 or 3 dimensions space.

Args

axis: axis of dimensionality reduction.
dim: number of dimensions. 2 for 2D plot, 3 for 3D plot.
**kwargs: arguments for rk.scatterplot function (e.g. fontsize, pointsize).

Expand source code

def tsne(m, axis=0, dim=2, **kwargs):
    """ Use T-SNE algorithm to show the matrix m in a 2 or 3 dimensions space.

    Args:
        axis: axis of dimensionality reduction.
        dim: number of dimensions. 2 for 2D plot, 3 for 3D plot.
        **kwargs: arguments for rk.scatterplot function (e.g. fontsize, pointsize).
    """
    names = None
    if axis == 0:
        if rk.is_dataframe(m):
            names = m.columns
        m = m.T # transpose
    elif axis == 1:
        if rk.is_dataframe(m):
            names = m.index
    else:
        raise Excpetion('axis must be 0 or 1.')
    m_transformed = TSNE(n_components=dim).fit_transform(m)
    # Display
    scatterplot(m_transformed, dim=dim, names=names, **kwargs)