# Comparison of Synthetic Logs
This Notebook targets a comparison of synthetic scenarios and real world logs at scale. We are targeting a similar amount of interactions in each category and to strain the planner in a similar fashion even if the hero behavior changes and the original logs diverge.

## 1. Setup

In [None]:
import json
import numpy as np 
import collections

from mined_metric.jupyter.utils.data_access_util import *
from mined_metric.jupyter.utils.data_processing_util import *
from mined_metric.jupyter.utils.hero_interaction_util import *

%matplotlib inline

# Config
EXP_ID_CONTROL = "EXP_ID_CONTROL_PLACEHOLDER"
EXP_ID_CANDIDATE = "EXP_ID_CANDIDATE_PLACEHOLDER"
VALIDATION_ID = "VALIDATION_ID_PLACEHOLDER"
# Thresholds for pass / fail. Passing when MIN_* <= reproduction_rate[%] < MAX_* 
MIN_PASS_THRESHOLD = 90.0
MAX_PASS_THRESHOLD = 130.0

In [None]:
# Retrieve all data for both candidate and control branches
if "PLACEHOLDER" not in VALIDATION_ID:
    exp_data_control, exp_data_candidate = get_validation_data(VALIDATION_ID)
    if "baseline" in exp_data_candidate['experiment']['metricId']:
        # Switch IDs since second element appears to be the baseline.
        exp_data_control, exp_data_candidate = exp_data_candidate, exp_data_control
    elif not "baseline" in exp_data_control['experiment']['metricId']:
        print("WARNING: No baseline ID specified, control metric ID an swap them if needed!")
else:
    exp_data_control = get_experiment_data(EXP_ID_CONTROL)
    exp_data_candidate = get_experiment_data(EXP_ID_CANDIDATE)
print("Control: ID {} SHA {}".format(exp_data_control['experiment']['metricId'], exp_data_control["experiment"]["gitsha"]))
print("Candidate: ID {} SHA {}".format(exp_data_candidate['experiment']['metricId'], exp_data_candidate["experiment"]["gitsha"]))
print("Check Control and Candiate ID. Git SHA should be equal for this metric!")

In [None]:
# Read configurations for Argus videos
with open("/mnt/sun-pcs01/jupyterhub/argus/argus_log_conversion_layout.json") as f:
    argus_layout = json.load(f)

## 2. Metrics Summary

In [None]:
# Calculate basic Chum URI summary statistics
chum_uris, meta_data_control, meta_data_candidate = parse_meta_data_for_validations(exp_data_control, exp_data_candidate)
scenario_lookup = get_scenario_lookup(chum_uris)

chum_dict_control = input_output_chum_dict(meta_data_control)
chum_dict_candidate = input_output_chum_dict(meta_data_candidate)
monitor_dict_candidate = monitor_dict(meta_data_candidate)

# Insert scenario IDs of scenarios that should be excluded from the metric computations. 
# Typically flaky or unrealistic failures are listed here.
scenario_ids_to_exclude = []
chum_uris = remove_scenarios_from_metric(scenario_ids_to_exclude, scenario_lookup, chum_uris, meta_data_control, meta_data_candidate)

print("Total number of scenarios: {}".format(len(chum_uris)))

In [None]:
# dict[chum][interaction_type][entity_id][start_time] = interaction_duration
data_dict_control, data_dict_experiment = aggregate_interactions(meta_data_control), aggregate_interactions(meta_data_candidate)

# Remove entries from base line dictionary that occur after the simulation 
# has been ended by a monitor.
data_dict_control = cut_entries_to_overlap(data_dict_control, monitor_dict_candidate)

# The following dicts aggregate the interactions on per entity level.
# dict[interaction_type][chum] = #interaction_control - #interaction_experiment
diff_per_entity = interaction_per_entity_differences(data_dict_control, data_dict_experiment)
# dict[interaction_type][chum] = #interaction
num_per_entity_control = interaction_per_entity_total(data_dict_control)
num_per_entity_experiment = interaction_per_entity_total(data_dict_experiment)

# These dicts contain the interactions aggregated per type
# dict[interaction_type] = #per_entity_interaction
type_aggregate = aggregate_per_type(diff_per_entity)
type_sum_aggregate_control = aggregate_per_type(num_per_entity_control)
type_sum_aggregate_experiment = aggregate_per_type(num_per_entity_experiment)
summary = create_summary_dict(type_sum_aggregate_control, type_sum_aggregate_experiment, MIN_PASS_THRESHOLD, MAX_PASS_THRESHOLD)

df = pd.DataFrame(data=summary, index=['Control', 'Converted', 'Reproduced[%]', 'Pass/Fail'])
df.T

In [None]:
# Show me the maximum deviation for label:
label = 'kEntityAwareness'
chum_of_interest = max(diff_per_entity[label], key = lambda k: abs(diff_per_entity[label][k]))
print ("Max abs difference of interactions:", diff_per_entity[label][chum_of_interest])
print("Track IDs Experiment")
if chum_of_interest in data_dict_experiment and label in data_dict_experiment[chum_of_interest]:
    print([str(k) for k in data_dict_experiment[chum_of_interest][label].keys()])
print("Track IDs Baseline")
if chum_of_interest in data_dict_control and label in data_dict_control[chum_of_interest]:
    print([str(k) for k in data_dict_control[chum_of_interest][label].keys()])
embed_argus(argus_layout, chum_dict_candidate[chum_of_interest], chum_dict_control[chum_of_interest], True)

In [None]:
# Get scenarios that have been ended by the collision monitor.
collision_scenarios = get_collision_scenarios(monitor_dict_candidate)
print("Number of collisions ", len(collision_scenarios))
for collision_chum in collision_scenarios:
    print(collision_chum)
# Index of scenario you want to visualize.
index = 0
argus_str = ""
if index < len(collision_scenarios):
    argus_str += argus_string(argus_layout, chum_dict_candidate[collision_scenarios[index]], chum_dict_control[collision_scenarios[index]], True)
HTML(argus_str)

In [None]:
# Get scenarios that have diverged and ended by the divergence monitor.
diverging_scenarios = get_pass_on_divergence_scenarios(monitor_dict_candidate)
for diverging_chum, crop_duration in diverging_scenarios:
    print("Experiment ended %s s earlier with chum: \n %s" % (crop_duration / 1e9, chum_dict_candidate[diverging_chum]))