-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathndcg_kaggle.py
82 lines (65 loc) · 2.18 KB
/
ndcg_kaggle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
"""
Metrics to compute the model performance.
https://www.kaggle.com/davidgasquez/ndcg-scorer
"""
import numpy as np
from sklearn.metrics import make_scorer
def dcg_score(y_true, y_score, k=5):
"""Discounted cumulative gain (DCG) at rank K.
Parameters
----------
y_true : array, shape = [n_samples]
Ground truth (true relevance labels).
y_score : array, shape = [n_samples, n_classes]
Predicted scores.
k : int
Rank.
Returns
-------
score : float
"""
order = np.argsort(y_score)[::-1]
y_true = np.take(y_true, order[:k])
gain = 2 ** y_true - 1
discounts = np.log2(np.arange(len(y_true)) + 2)
return np.sum(gain / discounts)
def ndcg_score(ground_truth, predictions, k=5):
"""Normalized discounted cumulative gain (NDCG) at rank K.
Normalized Discounted Cumulative Gain (NDCG) measures the performance of a
recommendation system based on the graded relevance of the recommended
entities. It varies from 0.0 to 1.0, with 1.0 representing the ideal
ranking of the entities.
Parameters
----------
ground_truth : array, shape = [n_samples]
Ground truth (true labels represended as integers).
predictions : array, shape = [n_samples, n_classes]
Predicted probabilities.
k : int
Rank.
Returns
-------
score : float
Example
-------
>>> ground_truth = [1, 0, 2]
>>> predictions = [[0.15, 0.55, 0.2], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
>>> score = ndcg_score(ground_truth, predictions, k=2)
1.0
>>> predictions = [[0.9, 0.5, 0.8], [0.7, 0.2, 0.1], [0.06, 0.04, 0.9]]
>>> score = ndcg_score(ground_truth, predictions, k=2)
0.6666666666
"""
scores = []
# Iterate over each y_true and compute the DCG score
for y_true, y_score in zip(ground_truth, predictions):
actual = dcg_score(y_true, y_score, k)
best = dcg_score(y_true, y_true, k)
if best < 1e-8:
scores.append(1.)
else:
score = float(actual) / float(best)
scores.append(score)
return np.mean(scores)
# NDCG Scorer function
ndcg_scorer = make_scorer(ndcg_score, needs_proba=True, k=5)