Module ranky.utilities
Expand source code
#################################
########## UTILITIES ############
#################################
import pandas as pd
import numpy as np
def str_to_float(s):
""" Convert str to float specifically to read Codalab's CSV leaderboard.
e.g. '0.78 (2)' -> 0.78
TODO: more robust
"""
if isinstance(s, str):
if s[-1]==')':
s = float(s.split(' ')[0])
return s
def read_codalab_csv(csv_file):
""" Read a leaderboard generated by Codalab as a CSV file.
Args:
csv_file: The leaderboard downloaded from Codalab.
"""
# Read the CSV file just to get the column names
df_temp = pd.read_csv(csv_file, nrows=0) # Read no rows, but it does read the header row
col_names = df_temp.columns.tolist()
# Add a name for the extra column
col_names.append('extra')
# Read the CSV file again with the updated column names
m = pd.read_csv(csv_file, names=col_names, skiprows=1)
# Now you can drop the extra column
m = m.drop(columns=['extra'])
m = m.drop('submission_pk', axis=1)
m.index = m['User']
m = m.drop('User', axis=1)
m = m.applymap(str_to_float)
return m
Functions
def read_codalab_csv(csv_file)
-
Read a leaderboard generated by Codalab as a CSV file.
Args
csv_file
- The leaderboard downloaded from Codalab.
Expand source code
def read_codalab_csv(csv_file): """ Read a leaderboard generated by Codalab as a CSV file. Args: csv_file: The leaderboard downloaded from Codalab. """ # Read the CSV file just to get the column names df_temp = pd.read_csv(csv_file, nrows=0) # Read no rows, but it does read the header row col_names = df_temp.columns.tolist() # Add a name for the extra column col_names.append('extra') # Read the CSV file again with the updated column names m = pd.read_csv(csv_file, names=col_names, skiprows=1) # Now you can drop the extra column m = m.drop(columns=['extra']) m = m.drop('submission_pk', axis=1) m.index = m['User'] m = m.drop('User', axis=1) m = m.applymap(str_to_float) return m
def str_to_float(s)
-
Convert str to float specifically to read Codalab's CSV leaderboard.
e.g. '0.78 (2)' -> 0.78 TODO: more robust
Expand source code
def str_to_float(s): """ Convert str to float specifically to read Codalab's CSV leaderboard. e.g. '0.78 (2)' -> 0.78 TODO: more robust """ if isinstance(s, str): if s[-1]==')': s = float(s.split(' ')[0]) return s