forked from LAION-AI/Open-Assistant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrankings.py
140 lines (122 loc) · 5.36 KB
/
rankings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from typing import List
import numpy as np
def head_to_head_votes(ranks: List[List[int]]):
tallies = np.zeros((len(ranks[0]), len(ranks[0])))
names = sorted(ranks[0])
ranks = np.array(ranks)
# we want the sorted indices
ranks = np.argsort(ranks, axis=1)
for i in range(ranks.shape[1]):
for j in range(i + 1, ranks.shape[1]):
# now count the cases someone voted for i over j
over_j = np.sum(ranks[:, i] < ranks[:, j])
over_i = np.sum(ranks[:, j] < ranks[:, i])
tallies[i, j] = over_j
# tallies[i,j] = over_i
tallies[j, i] = over_i
# tallies[j,i] = over_j
return tallies, names
def cycle_detect(pairs):
"""Recursively detect cylces by removing condorcet losers until either only one pair is left or condorcet loosers no longer exist
This method upholds the invariant that in a ranking for all a,b either a>b or b>a for all a,b.
Returns
-------
out : False if the pairs do not contain a cycle, True if the pairs contain a cycle
"""
# get all condorcet losers (pairs that loose to all other pairs)
# idea: filter all losers that are never winners
# print("pairs", pairs)
if len(pairs) <= 1:
return False
losers = [c_lose for c_lose in np.unique(pairs[:, 1]) if c_lose not in pairs[:, 0]]
if len(losers) == 0:
# if we recursively removed pairs, and at some point we did not have
# a condorcet loser, that means everything is both a winner and loser,
# yielding at least one (winner,loser), (loser,winner) pair
return True
new = []
for p in pairs:
if p[1] not in losers:
new.append(p)
return cycle_detect(np.array(new))
def get_winner(pairs):
"""
This returns _one_ concordant winner.
It could be that there are multiple concordant winners, but in our case
since we are interested in a ranking, we have to choose one at random.
"""
losers = np.unique(pairs[:, 1]).astype(int)
winners = np.unique(pairs[:, 0]).astype(int)
for w in winners:
if w not in losers:
return w
def get_ranking(pairs):
"""
Abuses concordance property to get a (not necessarily unqiue) ranking.
The lack of uniqueness is due to the potential existence of multiple
equally ranked winners. We have to pick one, which is where
the non-uniqueness comes from
"""
if len(pairs) == 1:
return list(pairs[0])
w = get_winner(pairs)
# now remove the winner from the list of pairs
p_new = np.array([(a, b) for a, b in pairs if a != w])
return [w] + get_ranking(p_new)
def ranked_pairs(ranks: List[List[int]]):
"""
Expects a list of rankings for an item like:
[("w","x","z","y") for _ in range(3)]
+ [("w","y","x","z") for _ in range(2)]
+ [("x","y","z","w") for _ in range(4)]
+ [("x","z","w","y") for _ in range(5)]
+ [("y","w","x","z") for _ in range(1)]
This code is quite brain melting, but the idea is the following:
1. create a head-to-head matrix that tallies up all win-lose combinations of preferences
2. take all combinations that win more than they loose and sort those by how often they win
3. use that to create an (implicit) directed graph
4. recursively extract nodes from the graph that do not have incoming edges
5. said recursive list is the ranking
"""
tallies, names = head_to_head_votes(ranks)
tallies = tallies - tallies.T
# print(tallies)
# note: the resulting tally matrix should be skew-symmetric
# order by strength of victory (using tideman's original method, don't think it would make a difference for us)
sorted_majorities = []
for i in range(len(ranks[0])):
for j in range(len(ranks[i])):
if tallies[i, j] > 0:
sorted_majorities.append((i, j, tallies[i, j]))
# we don't explicitly deal with tied majorities here
sorted_majorities = np.array(sorted(sorted_majorities, key=lambda x: x[2], reverse=True))
# now do lock ins
lock_ins = []
for (x, y, _) in sorted_majorities:
# invariant: lock_ins has no cycles here
lock_ins.append((x, y))
# print("lock ins are now",np.array(lock_ins))
if cycle_detect(np.array(lock_ins)):
# print("backup: cycle detected")
# if there's a cycle, delete the new addition and continue
lock_ins = lock_ins[:-1]
# now simply return all winners in order, and attach the losers
# to the back. This is because the overall loser might not be unique
# and (by concordance property) may never exist in any winning set to begin with.
# (otherwise he would either not be the loser, or cycles exist!)
# Since there could be multiple overall losers, we just return them in any order
# as we are unable to find a closer ranking
numerical_ranks = np.array(get_ranking(np.array(lock_ins))).astype(int)
conversion = [names[n] for n in numerical_ranks]
return conversion
if __name__ == "__main__":
ranks = (
[("w", "x", "z", "y") for _ in range(1)]
+ [("w", "y", "x", "z") for _ in range(2)]
# + [("x","y","z","w") for _ in range(4)]
+ [("x", "z", "w", "y") for _ in range(5)]
+ [("y", "w", "x", "z") for _ in range(1)]
# [("y","z","w","x") for _ in range(1000)]
)
rp = ranked_pairs(ranks)
print(rp)