-
Notifications
You must be signed in to change notification settings - Fork 989
/
Copy pathpr-statistics.sh
executable file
·263 lines (220 loc) · 8.34 KB
/
pr-statistics.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
#!/usr/bin/env bash
# Will scan a github repo to compute some statistics on PR
# requires curl and bc (presents on most unix like system)
# PARAMETERS ----------------------------
# Github repo to scan
GITHUB_REPO="status-im/status-mobile"
# Computational mode
# available values :
# open : catch only open PRs, not yet merged nor closed and then compute five number
# summary on opened time until today (open is default)
# merge : catch only merged AND closed PRs and then compute five number summary
# on time spent before PR was merged
MODE=open
# Filter PR by date
# This parameter is optional, you could empty these values to disable filter by date
# Will only catch PR created between these dates
# if END_TIME is empty, it will assume today as value
# WARN : PR github api limit each result to a 30 page size
# So depending on chosen date, it could be a lot of requests
# available values : YYYY-MM-DD
START_TIME="2018-01-01"
END_TIME="2018-02-01"
# Verbose mode
# print more info (0 is default)
# available values : 0 | 1
VERBOSE=1
# INTERNAL PARAMETERS ----------------------------
# Determine platform
case "$OSTYPE" in
darwin*) OS="DARWIN" ;;
*) OS="UNIX" ;;
esac
# Check available tools
# curl and bc (which are present on most unix platform)
type curl &>/dev/null || _err=1
if [ "$_err" ]; then
echo "ERROR : you need curl on this platform"
exit 1
fi
type bc &>/dev/null || _err=1
if [ "$_err" ]; then
echo "ERROR : you need bc on this platform"
exit 1
fi
# Fix date filter
NOW="$(date +%s)"
FILTER_TEXT=""
if [ ! "$START_TIME" = "" ]; then
FILTER="ON"
FILTER_TEXT=" created between $START_TIME"
[ "$OS" = "DARWIN" ] && START_TIME="$(date -j -u -f '%Y-%m-%d' "$START_TIME" +%s)" \
|| START_TIME="$(date -d "$START_TIME" +%s)"
# we get today by default for END_TIME
if [ "$END_TIME" = "" ]; then
FILTER_TEXT="$FILTER_TEXT and today"
END_TIME="$NOW"
else
FILTER_TEXT="$FILTER_TEXT and $END_TIME"
[ "$OS" = "DARWIN" ] && END_TIME="$(date -j -u -f '%Y-%m-%d' "$END_TIME" +%s)" \
|| END_TIME="$(date -d "$END_TIME" +%s)"
fi
fi
# FUNCTIONS ---------------------------
usage() {
echo "USAGE"
echo "$0 -h : get help"
echo "$0 [merge|open] : compute stats on merged PR or opened PR (open is default)"
}
github_pr_merge() {
repo="$1"
list_spent_time=""
list_dt_closed=""
list_dt_created=""
last_page=$(curl -i -sL "https://api.github.com/repos/$repo/pulls?sort=created&state=closed" | grep rel=\"last\" | cut -d "," -f 2 | cut -d "=" -f 4 | cut -d ">" -f 1)
[ "$last_page" = "" ] && last_page=1
[ "$VERBOSE" = "1" ] && echo "" >&2
for i in $(seq 1 $last_page); do
[ "$VERBOSE" = "1" ] && tput cuu 1 >&2 && tput el >&2 && echo "[Analysing PRs list: $i/$last_page]" >&2
json="$(curl -sL "https://api.github.com/repos/$repo/pulls?sort=created&direction=desc&state=closed&page=$i")"
# get PR merged AND closed - we use closed date to compute stat and we ignore if its merged date is null
list_tmp="$(echo "$json" | grep -A1 -B2 "closed_at" | grep --invert-match null | grep -B4 "merged_at" | grep -A3 "created_at")"
list_dt_created="$list_dt_created $(echo "$list_tmp" | grep "created_at" | tr -d ' ' | cut -d "\"" -f 4)"
list_dt_closed="$list_dt_closed $(echo "$list_tmp" | grep "closed_at" | tr -d ' ' | cut -d "\"" -f 4)"
len_list_dt="$(echo $list_dt_created | wc -w)"
for k in $(seq 1 $len_list_dt); do
dt_crea="$(echo $list_dt_created | cut -d " " -f $k)"
[ "$OS" = "DARWIN" ] && dt_crea="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_crea" +%s)" \
|| dt_crea="$(date -d "$dt_crea" +%s)"
dt_closed="$(echo $list_dt_closed | cut -d " " -f $k)"
[ "$OS" = "DARWIN" ] && dt_closed="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_closed" +%s)" \
|| dt_closed="$(date -d "$dt_closed" +%s)"
if [ "$FILTER" = "ON" ]; then
if [ "1" = "$(echo "$START_TIME <= $dt_crea" | bc -l)" ]; then
if [ "1" = "$(echo "$END_TIME >= $dt_crea" | bc -l)" ]; then
(( spent_time = dt_closed - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
fi
else
(( spent_time = dt_closed - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
done
if [ "$FILTER" = "ON" ]; then
# we do not catch anymore page because we get all PR for the period
if [ "1" = "$(echo "$START_TIME > $dt_crea" | bc -l)" ]; then
[ "$VERBOSE" = "1" ] && echo "[Stop fetching PR, other PR are out of the selected time]" >&2
break
fi
fi
done
echo "$list_spent_time" | tr ' ' '\n' | sort -n | tr ' ' '\n'
}
github_pr_open() {
repo="$1"
list_spent_time=""
list_dt_created=""
last_page=$(curl -i -sL "https://api.github.com/repos/$repo/pulls?sort=created&state=open" | grep rel=\"last\" | cut -d "," -f 2 | cut -d "=" -f 4 | cut -d ">" -f 1)
[ "$last_page" = "" ] && last_page=1
[ "$VERBOSE" = "1" ] && echo "" >&2
for i in $(seq 1 $last_page); do
[ "$VERBOSE" = "1" ] && tput cuu 1 >&2 && tput el >&2 && echo "[Analysing PRs list: $i/$last_page]" >&2
json="$(curl -sL "https://api.github.com/repos/$repo/pulls?sort=created&direction=desc&state=open&page=$i")"
list_dt_created="$list_dt_created $(echo "$json" | grep -B2 "closed_at" | grep "created_at" | tr -d ' ' | cut -d "\"" -f 4)"
for dt_crea in $list_dt_created; do
[ "$OS" = "DARWIN" ] && dt_crea="$(date -j -u -f '%Y-%m-%dT%H:%M:%SZ' "$dt_crea" +%s)" \
|| dt_crea="$(date -d "$dt_crea" +%s)"
if [ "$FILTER" = "ON" ]; then
if [ "1" = "$(echo "$START_TIME <= $dt_crea" | bc -l)" ]; then
if [ "1" = "$(echo "$END_TIME >= $dt_crea" | bc -l)" ]; then
(( spent_time = NOW - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
fi
else
(( spent_time = NOW - dt_crea ))
list_spent_time="$list_spent_time $spent_time"
fi
done
if [ "$FILTER" = "ON" ]; then
# we do not catch anymore page because we get all PR for the period
if [ "1" = "$(echo "$START_TIME > $dt_crea" | bc -l)" ]; then
[ "$VERBOSE" = "1" ] && echo "[Stop fetching PRs, next will be out of the selected time]" >&2
break
fi
fi
done
echo "$list_spent_time" | tr ' ' '\n' | sort -n | tr ' ' '\n'
}
median_list() {
list="$@"
len_list="$#"
median=""
if [ $len_list -eq 1 ]; then
middle_list=1
middle_list_next=1
else
(( middle_list = len_list / 2 ))
(( middle_list_next = middle_list + 1 ))
fi
j=1
for l in $list; do
[ $j -eq $middle_list ] && median=$l
if [ $j -eq $middle_list_next ]; then
median=$(echo "scale=2; ($median + $l)/2" | bc -l)
break
fi
(( j = j + 1 ))
done
echo "$median"
}
convert_sec() {
secs="$1"
printf '%dd %02dh:%02dm:%02ds\n' $(($secs/86400)) $(($secs%86400/3600)) $(($secs%3600/60)) $(($secs%60))
}
five_nb_summary() {
list="$@"
len_list="$#"
sample_minimum=""
lower_quartile=""
median=""
upper_quartile=""
sample_maximum=""
if [ $len_list -gt 0 ]; then
# NOTE list is already sorted asc
sample_minimum=$(echo $list | cut -d " " -f 1)
sample_maximum=$(echo $list | cut -d " " -f $len_list)
median="$(median_list $list)"
for l in $list; do
# Q1
[ "1" = "$(echo "$l <= $median" | bc -l)" ] && q1_list="$q1_list $l"
# Q3
[ "1" = "$(echo "$l >= $median" | bc -l)" ] && q3_list="$q3_list $l"
done
lower_quartile="$(median_list $q1_list | cut -d '.' -f 1)"
median="$(echo $median | cut -d '.' -f 1)"
upper_quartile="$(median_list $q3_list | cut -d '.' -f 1)"
fi
[ "$VERBOSE" = "1" ] && echo "[Stats computed on $len_list PR$FILTER_TEXT]" >&2
printf "\n"
printf "%16s | %16s | %16s | %16s | %16s" "Min" "Q1" "Mean" "Q3" "Max"
printf "\n"
[ $len_list -gt 0 ] && printf "%16s | %16s | %16s | %16s | %16s" "$(convert_sec "$sample_minimum")" "$(convert_sec "$lower_quartile")" "$(convert_sec "$median")" "$(convert_sec "$upper_quartile")" "$(convert_sec "$sample_maximum")"
printf "\n"
}
# MAIN ----------------------------
if [ "$1" = "-h" ]; then
usage
exit
fi
case $1 in
merge|open )
MODE=$1
;;
esac
if [ "$VERBOSE" = "1" ]; then
echo "[Fetching $MODE PRs stats from $GITHUB_REPO]" >&2
fi
list_time="$(github_pr_$MODE $GITHUB_REPO)"
five_nb_summary $list_time