The Ranking problem¶

$N$ teams play a tournament. Not all teams play each other. How do you rank all the teams?

In [1]:
import numpy as np, scipy as sp
from numpy import sqrt, pi, exp, log, floor, ceil, sin, cos, nan, inf
from numpy import linspace, logspace, geomspace, arange
from numpy import empty, zeros, ones, empty_like, zeros_like, ones_like, full, full_like
from numpy import diff, meshgrid, mean, std, argmin, array, eye, amin, amax, fmin, fmax
from numpy.linalg import norm

%matplotlib inline
import matplotlib as mpl
from matplotlib import pylab, mlab, pyplot as plt
from matplotlib.pylab import plot, scatter, contour, xlabel, ylabel, title, legend
from matplotlib.animation import FuncAnimation
plt.rcParams.update({
    'image.cmap': 'coolwarm',
    'animation.html': 'jshtml',
    'animation.embed_limit': 40, # 40 mb
})

import re, multiprocessing as mp, networkx as nx, json

from tqdm.notebook import tqdm, trange

rng = np.random.default_rng()

Example tournament with 4 teams¶

$S[i, j]$ the score when team $i$ plays team $j$

In [2]:
S = array([
    [0, 3, 3, 0],
    [0, 0, 0, 0],
    [1, 0, 0, 2],
    [0, 0, 1, 0]
])
In [3]:
# Visualize the scores
def visualize(G, weights_from='weight'):
    widths = nx.get_edge_attributes(G, weights_from)
    nodelist = G.nodes()
    # plt.figure(figsize=(12,8))

    pos = nx.circular_layout(G)
    nx.draw_networkx_nodes(G, pos, nodelist=nodelist, node_size=300, edgecolors='black', alpha=.5)
    # nx.draw_networkx_edges(G,pos, edgelist = widths.keys(), alpha=array(list(widths.values())) )
    nx.draw_networkx_edges(G, pos, connectionstyle='arc3,rad=0.1',
        edgelist=[(a, b) for a, b in G.edges if a != b])
    nx.draw_networkx_edge_labels( G, pos,
        edge_labels={k: f'{w:.2f}' for k, w in widths.items()},
        label_pos=.3 )
    _ = nx.draw_networkx_labels(G, pos=pos, verticalalignment='center_baseline',
        labels=dict(zip(nodelist, nodelist)))
In [4]:
G = nx.DiGraph()
N = len(S)
G.add_nodes_from(arange(N))
G.add_edges_from([
    (a, b, {'score': S[a, b]})
        for a in range(N)
            for b in range(N)
                 if S[a, b] + S[b, a] != 0 or a == b
    ])
# G.add_edges_from( [ ( a, a, {'score': 0} ) for a in G.nodes] )

visualize(G, weights_from='score')
title('Scores')
Out[4]:
Text(0.5, 1.0, 'Scores')
No description has been provided for this image

Convert the numerical score in the match of $x$ vs $y$ to the probability a fan switches aliances from team $x$ to team $y$.

  • Larger the victory margin the smaller the probability of switching.

  • One choice is ans switch from team $x$ to team $y$ with probability proportional to $$ \exp\bigl({-\beta (S_{x, y} - S_{y, x})}\bigr) $$ for some constant $\beta > 0$. Or, with some small probability (say 15%) they get annoyed with both teams and switch to a randomly chosen team.

  • Rank teams by the average amount of time a person spends being a fan of their team. The pagerank function computes this for us.

In [5]:
# Transition probabilities
β = 0.1
for a in G.nodes():
    Z = sum(exp(-β*(G[a][b]['score'] - G[b][a]['score'])) for b in G.neighbors(a))
    for b in G.neighbors(a):
        G[a][b]['weight'] = exp(-β*(G[a][b]['score']-G[b][a]['score']))/Z

visualize(G)
No description has been provided for this image
In [6]:
π = nx.pagerank( G, alpha=.85 )
keys = sorted(π, key=lambda k: π[k], reverse=True)
for k in keys:
    print(k, f'{π[k]*100:.2f}')
0 33.86
2 28.97
1 18.92
3 18.25

The ranking depends on $\beta$. Different values of $\beta$ will give you different rankings.

In [7]:
β=1.5
for a in G.nodes():
    Z = sum(exp(-β*(G[a][b]['score'] - G[b][a]['score'])) for b in G.neighbors(a))
    for b in G.neighbors(a):
        G[a][b]['weight'] = exp(-β*(G[a][b]['score']-G[b][a]['score']))/Z

visualize(G)
No description has been provided for this image
In [8]:
π = nx.pagerank(G, alpha=.85)
keys = sorted(π, key=lambda k: π[k], reverse=True)
for k in keys:
    print(k, f'{π[k]*100:.2f}')
0 80.40
2 10.54
3 4.55
1 4.51

Example with a large number of teams¶

Consider a tournament with $N$ teams, where each team plays a random fraction of other teams.

In [9]:
N = 100
# Connectivity threshold log N / N
ER = nx.erdos_renyi_graph(N, 2 * log(N) / N)

G = nx.DiGraph()
G.add_nodes_from(ER.nodes)
G.add_edges_from([(a, b, {'score': rng.integers(1, 5)}) for (a, b) in ER.edges])
G.add_edges_from([(b, a, {'score': rng.integers(1, 5)}) for (a, b) in ER.edges])
G.add_edges_from([(a, a, {'score': 0}) for a in G.nodes])
In [10]:
# visualize( G, weights_from='score' )
In [11]:
β = 0.1
for a in G.nodes():
    Z = sum(exp(-β*(G[a][b]['score']-G[b][a]['score'])) for b in G.neighbors(a))
    for b in G.neighbors(a):
        G[a][b]['weight'] = exp(-β*(G[a][b]['score']-G[b][a]['score']))/Z

π = nx.pagerank(G, alpha=.85)
keys = sorted(π, key=lambda k: π[k], reverse=True)
for k in keys[:10]:
    print(k, f'{π[k]*100:.2f}%')
60 1.54%
14 1.49%
82 1.42%
92 1.39%
43 1.37%
19 1.36%
34 1.35%
26 1.33%
6 1.32%
89 1.31%

Note, the order isn't stable; changing $\beta$ slightly, alters the ranking (slightly). We will, however, show that for fixed $\beta > 0$ (and for $\alpha < 1$), the rankings are unique and the percentage of time spent as a fan of a given team is exactly what's returned by the pagerank library function used here.

In [12]:
β=0.15
for a in G.nodes():
    Z = sum(exp(-β*(G[a][b]['score']-G[b][a]['score'])) for b in G.neighbors(a))
    for b in G.neighbors(a):
        G[a][b]['weight'] = exp(-β*(G[a][b]['score']-G[b][a]['score']))/Z

π = nx.pagerank(G, alpha=.85)
keys = sorted(π, key=lambda k: π[k], reverse=True)
for k in keys[:10]:
    print(k, f'{π[k]*100:.2f}%')
14 1.50%
82 1.49%
60 1.49%
43 1.41%
34 1.40%
92 1.40%
19 1.38%
26 1.35%
97 1.34%
6 1.31%
In [ ]: