Module ranky.utilities

Expand source code
#################################
########## UTILITIES ############
#################################

import pandas as pd
import numpy as np

def str_to_float(s):
    """ Convert str to float specifically to read Codalab's CSV leaderboard.

        e.g. '0.78 (2)' -> 0.78
        TODO: more robust
    """
    if isinstance(s, str):
        if s[-1]==')':
            s = float(s.split(' ')[0])
    return s

def read_codalab_csv(csv_file):
    """ Read a leaderboard generated by Codalab as a CSV file.

    Args:
        csv_file: The leaderboard downloaded from Codalab.
    """
    # Read the CSV file just to get the column names
    df_temp = pd.read_csv(csv_file, nrows=0)  # Read no rows, but it does read the header row
    col_names = df_temp.columns.tolist()
    # Add a name for the extra column
    col_names.append('extra')
    # Read the CSV file again with the updated column names
    m = pd.read_csv(csv_file, names=col_names, skiprows=1)
    # Now you can drop the extra column
    m = m.drop(columns=['extra'])
    m = m.drop('submission_pk', axis=1)
    m.index = m['User']
    m = m.drop('User', axis=1)
    m = m.applymap(str_to_float)
    return m

Functions

def read_codalab_csv(csv_file)

Read a leaderboard generated by Codalab as a CSV file.

Args

csv_file
The leaderboard downloaded from Codalab.
Expand source code
def read_codalab_csv(csv_file):
    """ Read a leaderboard generated by Codalab as a CSV file.

    Args:
        csv_file: The leaderboard downloaded from Codalab.
    """
    # Read the CSV file just to get the column names
    df_temp = pd.read_csv(csv_file, nrows=0)  # Read no rows, but it does read the header row
    col_names = df_temp.columns.tolist()
    # Add a name for the extra column
    col_names.append('extra')
    # Read the CSV file again with the updated column names
    m = pd.read_csv(csv_file, names=col_names, skiprows=1)
    # Now you can drop the extra column
    m = m.drop(columns=['extra'])
    m = m.drop('submission_pk', axis=1)
    m.index = m['User']
    m = m.drop('User', axis=1)
    m = m.applymap(str_to_float)
    return m
def str_to_float(s)

Convert str to float specifically to read Codalab's CSV leaderboard.

e.g. '0.78 (2)' -> 0.78 TODO: more robust

Expand source code
def str_to_float(s):
    """ Convert str to float specifically to read Codalab's CSV leaderboard.

        e.g. '0.78 (2)' -> 0.78
        TODO: more robust
    """
    if isinstance(s, str):
        if s[-1]==')':
            s = float(s.split(' ')[0])
    return s