# CAS Validation Pipeline Tools Contents
Please run the first initilization section before running each tool.
- **Bucketed Metrics:** Show a 2D array of metrics with buckets X metrics.
- **Compare experiments:** compare the difference of the chum URIs from two experiments.
- **Diffing Tool:** Provide the chum URI with variant under this experiment.

## Commit guideline
- For new changes please create a PR and update in `mined_metric/jupyter/templates/cas_validation.ipynb` file. And remember to use `"EXPERIMENT_ID_PLACEHOLDER"` to replace the `EXPERIMENT_ID` value.
- Clear the output before commit.

In [None]:
# Initilization
from mined_metric.jupyter.utils.data_access_util import get_experiment_data, chum_summary_stats

EXPERIMENT_ID = "EXPERIMENT_ID_PLACEHOLDER"

exp_data = get_experiment_data(EXPERIMENT_ID)
if "exception" in exp_data:
    print("Invalid data is returned: %s" % exp_data["exception"])

# Bucketed Metric
Show a 2D array of metrics with buckets X metrics.

In [None]:
from mined_metric.jupyter.utils.cas_validation_util import bucketingMetrics
import pandas as pd

pd.set_option("display.max_columns", 999)
pd.set_option('precision', 4)
scenario_metrics = bucketingMetrics(exp_data)
df = pd.DataFrame(scenario_metrics)
df

In [None]:
# Show the Pareto Chart
from mined_metric.jupyter.utils.cas_validation_util import paretoChart
fig = paretoChart(scenario_metrics)
fig.show()

## Compare experiments
compare the difference of the chum URIs from two experiments.\
**Input:** Experiment ID of the compared job.

In [None]:
# Input the candidate experiment ID here.
EXPERIMENT_ID_candidate = "3ed80a27-6e5c-4ed7-977e-90c51032ffd0"

# Retrieve all experiment data and metadata
exp_cand = get_experiment_data(EXPERIMENT_ID_candidate)
if "exception" in exp_cand:
    print("Invalid data is returned: %s" % exp_cand["exception"])

print("# of Events in the baseline branch: {}".format(len(exp_data["meta"])))
print("# of Events in the candidate branch: {}".format(len(exp_cand["meta"])))

In [None]:
from data.chum import chumpy
import numpy as np
import csv

class chumUri(object):
    def __init__(self, run_id, begin_timestamp, end_timestamp,
            timestamp_offset_allowance=3.0):
        self.run_id = run_id
        self.begin_timestamp = begin_timestamp
        self.end_timestamp = end_timestamp
        self.timestamp_offset_allowance = timestamp_offset_allowance

    def __eq__(self, other):
        return self.run_id == other.run_id \
               and np.abs(self.begin_timestamp - other.begin_timestamp) < self.timestamp_offset_allowance \
               and np.abs(self.end_timestamp - other.end_timestamp) < self.timestamp_offset_allowance

    def __str__(self):
        return 'chum://{}@{:.3f}-{:.3f}'.format(self.run_id, self.begin_timestamp,
                self.end_timestamp)

def read_csv(csv_fn):
    rows = []
    with open(csv_fn) as csv_file:
        reader = csv.reader(csv_file)
        for row in reader:
            rows.append([s.strip() for s in row])
    return rows

def get_chum_uris(csv_fn):
    csv_rows = read_csv(csv_fn)
    result = []
    for r in csv_rows:
        chum_uri = r[0]
        chum_uri_proto = chumpy.parseChumUriToProto(chum_uri)
        result.append(chumUri(chumpy.getMetaIdFromChumUri(r[0]),\
                chumpy.getStartTime(chum_uri_proto)/float(1e9),
                chumpy.getEndTime(chum_uri_proto)/float(1e9)))
    return result

def get_chum_uris_from_exp_data(exp_data):
    result = []
    for event in exp_data["meta"]:
        if "event_info" not in event:
            continue
        if event["event_info"] and event["event_info"]["chum_uri"] != None:
            chum_uri = event["event_info"]["chum_uri"]
            chum_uri_proto = chumpy.parseChumUriToProto(chum_uri)
            result.append(chumUri(chumpy.getMetaIdFromChumUri(chum_uri),\
                chumpy.getStartTime(chum_uri_proto)/float(1e9),
                chumpy.getEndTime(chum_uri_proto)/float(1e9)))
    return result

def diff(l_1, l_2):
    return [ e for e in l_1 if e not in l_2 ]

def intersect(l_1, l_2):
    return [ e for e in l_1 if e in l_2 ]

In [None]:
l_base = get_chum_uris_from_exp_data(exp_data)
l_cand = get_chum_uris_from_exp_data(exp_cand)
common_list = intersect(l_base, l_cand)
unique_list_1 = diff(l_base, l_cand)
unique_list_2 = diff(l_cand, l_base)
print('----------Comparing two online runs----------')
print('Num of events in common: {}'.format(len(common_list)))
print('Num of unique events in Base' + ': {}'.format(len(unique_list_1)))
print('Num of unique events in Candidate' + ': {}'.format(len(unique_list_2)))
print('----------Common items')
for e in common_list:
    print(e)
print('----------Unique items in Base')
for e in unique_list_1:
    print(e)
print('----------Unique items in Candidate')
for e in unique_list_2:
    print(e)

## Diffing Tool
Provide the chum URI with variant under this experiment.\
**Input:** a chum URI snippet from another experiment.

In [None]:
from data.chum import chumpy
import glob

# 1. Provide input chum URI.
input_chum_uri = "chum://20191025T202643-kitt_18@1572038773.511295232#3s"
input_store, input_range = chumpy.parseChumUri(input_chum_uri)

# 2. Extract Run ID.
run_id = input_chum_uri
if 'chum://' in run_id: run_id = run_id.strip('chum://')
if '@' in run_id: run_id = run_id.split('@')[0]

# 3. Find possible variant path.
paths = glob.glob('/mnt/sun-tcs01/pcp/tools/metric_hub/*/' + EXPERIMENT_ID + '/' + run_id + '*')
if len(paths) == 0:
    # try old chum root path
    paths = glob.glob('/mnt/nautilus_rw/offline_pipeline/*/' + EXPERIMENT_ID + '/' + run_id + '*')
    if len(paths) == 0:
        print("ERROR: Chum was not found in this experiment.")
else:
    output_chum_uri = input_chum_uri + '?i=' + paths[0] + '&v=cas,safety_pcp'
    print("Output Chum URI:\n" + output_chum_uri)
    
# 4. Build Argus Link.
import urllib
argus_url = 'https://argus.zooxlabs.com/uri?'
argus_url += 'primaryUri=' + urllib.quote(input_chum_uri)
argus_url += '&primaryNickname=candidate'
argus_url += '&comparisonUri=' + urllib.quote(output_chum_uri)
argus_url += '&comparisonNickname=baseline'
print("Argus URL:\n" + argus_url)

## Other templates

In [None]:
# Calculate basic Chum URI summary statistics
chum_uris = [d["chum_uri"] for d in exp_data["meta"]]
stats = chum_summary_stats(chum_uris)
print("Average duration of Chum URIs: {:.2f}s".format(stats["avg_duration_s"]))
print("Vehicles: {}".format(stats["vehicles_included"]))