3
3
import json
4
4
import sys
5
5
import random
6
+ import datetime
7
+ import math
6
8
7
9
vendors = [
8
10
"Yellow" ,
@@ -114,6 +116,11 @@ def distance(start, end):
114
116
return base + 0.2 * random .randint (0 , max (base , 1 ))
115
117
116
118
119
+ def duration (dist_miles ):
120
+ # 15 - 25 miles per hour on average
121
+ return datetime .timedelta (hours = dist_miles / random .randint (15 , 25 ))
122
+
123
+
117
124
def fare (trip_distance ):
118
125
# loosely based on https://www.sfmta.com/getting-around/taxi/taxi-rates
119
126
# assume a random waiting time up to 10% of the distance
@@ -126,27 +133,62 @@ def tip(fare_amount):
126
133
# up to 20% tip
127
134
return 0.2 * random .randint (0 , round (fare_amount ))
128
135
136
+
129
137
def round_f (v ):
130
138
return float ("{0:.2f}" .format (v ))
131
139
132
140
141
+ def generate_timestamp (current ):
142
+ h = current .hour
143
+ week_day = current .isoweekday ()
144
+
145
+ hours_per_day = 24
146
+
147
+ peak_hour = 12
148
+ max_difference_hours = hours_per_day - peak_hour
149
+
150
+ if week_day < 6 :
151
+ max_rides_per_hour = 1000
152
+ min_rides_per_hour = 100
153
+ elif week_day == 6 :
154
+ max_rides_per_hour = 800
155
+ min_rides_per_hour = 200
156
+ else :
157
+ max_rides_per_hour = 600
158
+ min_rides_per_hour = 50
159
+
160
+ diff_from_peak_hour = peak_hour - h if h <= peak_hour else h - peak_hour
161
+ # vary the targeted rides per hour between [min_rides_per_hour; max_rides_per_hour] depending on difference to peak hour according to
162
+ # a sine function to smooth it a bit.
163
+ traffic_scale_factor = math .sin (0.5 * math .pi * (max_difference_hours - diff_from_peak_hour ) / max_difference_hours )
164
+ target_rides_this_hour = min_rides_per_hour + (max_rides_per_hour - min_rides_per_hour ) * traffic_scale_factor
165
+
166
+ increment = random .expovariate (target_rides_this_hour ) * 3600
167
+ return current + datetime .timedelta (seconds = increment )
168
+
169
+
170
+ def format_ts (ts ):
171
+ return ts .strftime ("%Y-%m-%d %H:%M:%S" )
172
+
173
+
133
174
def main ():
134
175
if len (sys .argv ) != 2 :
135
176
print ("usage: %s number_of_records_to_generate" % sys .argv [0 ])
136
177
exit (1 )
137
178
179
+ current = datetime .datetime (year = 2017 , month = 4 , day = 1 )
138
180
num_records = int (sys .argv [1 ])
139
181
for i in range (num_records ):
140
182
record = {}
183
+ current = generate_timestamp (current )
141
184
record ["vendor" ] = vendor ()
142
- # TODO: Find a simple but somewhat realistic model for daily / weekly patterns
143
- # record["pickup_datetime"] = pickup_datetime
144
- # record["dropoff_datetime"] = dropoff_datetime
185
+ record ["pickup_datetime" ] = format_ts (current )
145
186
record ["passenger_count" ] = passengers ()
146
187
147
188
start = random .choice (zones )
148
189
end = random .choice (zones )
149
190
trip_distance = distance (start , end )
191
+ record ["dropoff_datetime" ] = format_ts (current + duration (trip_distance ))
150
192
151
193
record ["pickup_zone" ] = start
152
194
record ["dropoff_zone" ] = end
0 commit comments