# Dataset Generator
This tool is query form DataCatalog to generating input dataset for CAS Validation tool.

Setup all the arguments in the first section then run each following section one by one.

In [None]:
# Argument Setup.
kOutputDir = '/mnt/sun-pcp01/safety_pcp/validation/tmp/'

# vehicles: 
# veh_list = list(range(22, 23 + 1, 1))
veh_list = list(range(15, 34 + 1, 1))

# geofences
gf_list = ['SF_1', 'SF_2', 'SF_3', 'SF_4', 'SF_5', 'SF_6'] # San Francisco
# gf_list = ['SLAC'] # SLAC
# gf_list = ['LV_1'] # Las Vegas

# dates
# date_list = [[2019, 10], [2019, 11], [2019, 12], [2020, 1], [2020, 2], [2020, 3]]
date_list = [[2019, 10]]

# day or night
day_or_night = [True, False]

In [None]:
# Add system paths.
import sys
import os
# Get current working directory root.
cwd_root = ''
for dir in os.getcwd().split('/'):
    cwd_root += dir + '/'
    if dir.find('driving') != -1:
        break
sys.path.insert(0, os.path.abspath(cwd_root))
sys.path.insert(0, os.path.abspath(cwd_root + 'data/catalog/services/weather'))

# Import weather api.
from weather.darksky_api.darksky_api import DarkSkyClient
weather_api = DarkSkyClient()

from datetime import datetime, timedelta
from dateutil.tz import tzlocal, tzutc
from infra.data_catalog.client import data_rest_api
from pprint import pprint
import time

# Import requests api.
import requests, json
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

# Test catalog api.
data_rest_api.ping()
# data_rest_api.get_run_types()['types']

In [None]:
def get_vehicle(veh):
    return 'vehicle_list=' + veh + '&'

def get_vehicle_type_list():
    return 'vehicle_type_list=Kitt%2CVH6&'

def get_types():
    return 'types=challenge_route%2Cpoint_to_point%2Cmpi_testing%2Cgeneral_ar&'

def get_geofences(gf_list):
    return 'geofences=' + '%2C'.join(gf_list) + '&'

def get_autonomous_mode():
    return 'autonomous_mode=AUTONOMOUS&'

def get_timerange(is_day):
    if is_day:
        return 'timerange_start=08%3A00%2F-8&timerange_end=17%3A59%2F-8&'
    else:
        return 'timerange_start=18%3A00%2F-8&timerange_end=23%3A59%2F-8&'

def get_window(year, month, day): 
    return 'window_start=' + str(year) + '-' + str(month) + '-' + str(day) + 'T00%3A00%3A00-08%3A00&' + \
            'window_end=' + str(year)+ '-' + str(month) + '-' + str(day + 1) + 'T00%3A00%3A00-08%3A00&'

def get_disengagement(meta_id):
    query = data_rest_api.get_disengagements(run_ids=[meta_id], weather_type_list=['ROADS_WET_NO_RAIN', 'RAIN'])
    #query = data_rest_api.get_disengagements(run_ids=[meta_id])
    #print(query)
    return len(query['disengagements']) != 0

def get_ts(date_str):
    datetime_object = datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S.%f+00:00')
    #print(datetime_object)
    dt = timedelta(hours=-8)
    #print time.mktime((datetime_object + dt).timetuple())
    return time.mktime((datetime_object + dt).timetuple())

def get_weather(meta_id, start_ts, end_ts):
    found = False
    icon = ''
    response = data_rest_api.get_bounding_box(meta_id)
    if response['success']:
        latitude = round(response['bbox']['sw']['lat'], 3)
        longitude = round(response['bbox']['sw']['lon'], 3)
        start_ts_int = int(start_ts)
        end_ts_int = int(end_ts)
        # Response format: https://darksky.net/dev/docs#response-format
        # icons: clear-day, clear-night, rain, snow, sleet, wind, fog, cloudy, partly-cloudy-day, or partly-cloudy-night.
        for ts_int in range(start_ts_int, end_ts_int, 1200):
            response = weather_api.get_weather_data(latitude, longitude, ts_int)
            if response['success']:
                #print(response['currently']['time'], response['currently']['icon'])
                is_rain = response['currently']['icon'] == 'rain'
                is_snow = response['currently']['icon'] == 'snow'
                is_sleet = response['currently']['icon'] == 'sleet'
                is_fog = response['currently']['icon'] == 'fog'
                found = is_rain or is_snow or is_sleet or is_fog
                icon= response['currently']['icon']
            else:
                print(response)
            if found == True:
                break
    return found, icon


In [None]:
BASE_URL = 'https://catalog-api-prod.zooxlabs.com/api/runs?'

odometry_total_km_w_rain = 0
odometry_total_km_wo_rain = 0
runs_w_rain = set()
runs_wo_rain = set()
f_runs = open(kOutputDir + 'runs.txt', 'w')

for veh_num in veh_list:
    veh_name = 'kitt_' + str(veh_num)
    for date in date_list:
        for d in range(1, 32, 1):
            for is_day in day_or_night:
                
                time.sleep(0.3) # Wait for 0.3 second
                print('Checking {} {} {} {}'.format(veh_name, date[0], date[1], d))
                url = BASE_URL + get_vehicle(veh_name) + get_vehicle_type_list() + get_types() + \
                    get_geofences(gf_list) + get_autonomous_mode() + get_timerange(is_day) + get_window(date[0], date[1], d) + \
                    'limit=10&' + 'offset=0'
                #print(url)
                #response = requests.get(url=url)
                #response.raise_for_status()
                response = requests_retry_session().get(url)
                if response.json() and response.json()['success']:
                    runs = response.json()['runs']
                    # print('num of runs: ', len(runs))
                    for run in runs:
                        # Get the odometry information.
                        response_odo = data_rest_api.get_odometry(run['data_id'])
                        if not response_odo['success']:
                            continue
                        odometry_km = response_odo['odometry']['autonomous_delta_km']
                        if odometry_km < 3.0:
                            # Skip runs with less than 3 km in autonomous mode.
                            continue
                        # Get the start and end timestamp.
                        start_ts = get_ts(run['start_time'])
                        end_ts = get_ts(run['end_time'])
                        # Check weather.
                        is_raining, icon = get_weather(run['meta_id'], start_ts, end_ts)
                        if is_raining:
                            if run['meta_id'].encode("UTF-8") not in runs_w_rain:
                                odometry_total_km_w_rain += odometry_km
                                runs_w_rain.add(run['meta_id'])
                        else:
                            if run['meta_id'].encode("UTF-8") not in runs_wo_rain:
                                odometry_total_km_wo_rain += odometry_km
                                runs_wo_rain.add(run['meta_id'])
                        print(run['meta_id'], run['start_time'], odometry_km, icon)
                        f_runs.write(f'{run['meta_id']},{run['run_type']},{run['start_time']},{start_ts},{end_ts},{run['autonomous_mode']},{icon}\n')
                else:
                    #print('query failed', response.json())
                    continue
f_runs.close()

print("runs_total: ", len(runs_w_rain) + len(runs_wo_rain))
print("odometry_total_km: ", odometry_total_km_w_rain + odometry_total_km_wo_rain, " miles: ", (odometry_total_km_w_rain + odometry_total_km_wo_rain) / 1.60934)
print("odometry_total_km_w_rain: ", odometry_total_km_w_rain, " miles: ", odometry_total_km_w_rain / 1.60934)
print("odometry_total_km_wo_rain: ", odometry_total_km_wo_rain, " miles: ", odometry_total_km_wo_rain / 1.60934)

In [None]:
# Output to files.
f_runs_w_rain = open(kOutputDir + 'runs_w_rain.pbtxt', 'w')
for run_id in runs_w_rain:
    f_runs_w_rain.write(f'chum: {{chum_uri: "{run_id}"}}\n')
f_runs_w_rain.close()
print(f'Finish writing {kOutputDir} runs_w_rain.pbtxt')

f_runs_wo_rain = open(kOutputDir + 'runs_wo_rain.pbtxt', 'w')
for run_id in runs_wo_rain:
    f_runs_wo_rain.write(f'chum: {{chum_uri: "{run_id}"}}\n')
f_runs_wo_rain.close()
print(f'Finish writing {kOutputDir} runs_wo_rain.pbtxt')

summary = open(kOutputDir + 'summary.txt', 'w')
summary.write(f"runs_total: {len(runs_w_rain) + len(runs_wo_rain)}\n")
summary.write(f"odometry_total_km: {odometry_total_km_w_rain + odometry_total_km_wo_rain} miles: {(odometry_total_km_w_rain + odometry_total_km_wo_rain) / 1.60934}\n")
summary.write(f"odometry_total_km_w_rain: {odometry_total_km_w_rain} miles: {odometry_total_km_w_rain / 1.60934}\n")
summary.write(f"odometry_total_km_wo_rain: {odometry_total_km_wo_rain} miles: {odometry_total_km_wo_rain / 1.60934}\n")
summary.close()

In [None]:
# window_start – Search window start (datetime)
start_time = datetime(2020, 2, 1, 8, 0, 0, tzinfo=tzlocal())
# window_end – Search window end
end_time = datetime(2020, 2, 28, 17, 0, 0, tzinfo=tzlocal())
# autonomous_mode
a_list = [u'autonomous']
# states - Comma separated list of states
s_list = [u'Uploaded']
# types – Comma separated list of run types
t_list = [u'challenge_route']
# bounding_box – Bounding box to search (a bounding_box dict (see below))
# part of SF_1
bbox_sf = {'ne': {'lat': 37.805736, 'lon': -122.402}, 
           'sw': {'lat': 37.797391, 'lon': -122.403}}

for veh in veh_list:
    runs_csv_query = data_rest_api.get_runs_csv(vehicle=veh, 
                                                window_start=start_time, 
                                                window_end=end_time, 
                                                #states=s_list, 
                                                #types=t_list, 
                                                autonomous_mode=a_list, 
                                                bounding_box=bbox_sf, 
                                                limit=1)
    print(runs_csv_query['success'])
    print(runs_csv_query['runs_csv'])