#!/usr/bin/env python3
"""Launch script for PISCES."""
import argparse
import datetime
import os
import requests
import re
from shlex import quote
import subprocess
import sys
from urllib.parse import urlparse
import uuid

import boto3

from base.git.git_build_info import GIT_BUILD_INFO
from base.logging import zoox_logger
from data.chum import chumpy
from vehicle.planner.metrics.pipegraph import PipeGraph
from vehicle.planner.metrics.pmav.utils import GITHUB_TOKEN
import mined_metric.builder.metrics_impl.pisces.config as pisces_config
import mined_metric.builder.metrics_impl.pisces.errors as errs
from mined_metric.builder.metrics_impl.pisces.task.standard import (
    PiscesStandardTask,
)
from mined_metric.builder.metrics_impl.pisces.task.meta import PiscesMetaTask


LOG = zoox_logger.ZooxLogger(__name__)


def validate_run_meta_ids(run_ids):
    invalid_uris = [
        run_id for run_id in run_ids if not chumpy.is_valid_chum_uri(run_id)
    ]
    for uri in run_ids:
        chum_store, chum_range = chumpy.store_and_range_from_uri(uri)
        if (chum_range.end_time - chum_range.start_time) / 1e9 > 120:
            LOG.warn(
                "Supplied chum range for uri '%s' exceeds 120s limit for video "
                "generation.",
                uri,
            )

    if len(invalid_uris) > 0:
        raise errs.PiscesChumUriError(
            f"Invalid chum URIs supplied: '{','.join(invalid_uris)}'"
        )


def validate_dataset_path(dataset_path):
    exception = ""
    file_exists = os.path.isfile(dataset_path)
    if file_exists:
        return

    # check if file exists on S3
    s3_object = urlparse(dataset_path)
    s3 = boto3.resource("s3")

    try:
        # strip leading "/"
        key = s3_object.path[1:]
        bucket = s3.Bucket(s3_object.netloc)
        objs = list(bucket.objects.filter(Prefix=key))
        if len(objs) > 0 and objs[0].key == key:
            return
    except Exception as e:
        exception = e

    raise errs.PiscesFileNotFoundError(
        f"Could not locate file: '{dataset_path}' ({exception})"
    )


def check_path_exists(path):
    if path is None:
        raise errs.PiscesFileNotFoundError("No path provided")
    if not os.path.isfile(path):
        raise errs.PiscesFileNotFoundError(f"Could not locate file: '{path}'")


def validate_sdl_pipedream_paths(control_path, candidate_path):
    if control_path is None and candidate_path is None:
        return
    check_path_exists(control_path)
    check_path_exists(candidate_path)


def is_email(target):
    regex = r"^\w+([\.-]?\w+)*@zoox.com$"

    # check if valid email
    return re.search(regex, target)


def validate_notification_target(target):
    # check if valid email
    if is_email(target):
        return

    # check if potentially valid slack channel
    if len(target) > 0 and target[0] == "#":
        return

    raise errs.PiscesInvalidSlackTargetError(
        f"Not a valid notification target: {target}"
    )


def get_commit_info(commit):
    resp = requests.get(
        f"https://git.zooxlabs.com/api/v3/repos/zooxco/driving/commits/{commit}",
        headers={"Authorization": f"token {GITHUB_TOKEN}"},
    )
    if resp.status_code == 200:
        return resp.json()
    raise errs.PiscesInvalidShaError(
        f"Could not find SHA {commit} on remote repo "
        "git.zooxlabs.com/zooxco/driving"
    )


def validate_shas(
    control: str, control_label: str, candidate: str, candidate_label: str
):
    """Validate candidate and control SHAs.

    Args:
      control: git SHA1 hash for control commit.
      control_label: Human-readable label for control commit.
      candidate: git SHA1 hash for candidate commit.
      candidate_label: Human-readable label for candidate commit.

    Returns a tuple of (control, candidate) 40 character SHA1 hashes.
    """
    n = min(len(control), len(candidate))
    if n < 8:
        raise errs.PiscesInvalidShaError(
            f"Both candidate {candidate} and control {control} must be at "
            "least 8 characters"
        )

    if control[:n] == candidate[:n] and (
        control_label == candidate_label
        or (control == control_label and candidate == candidate_label)
    ):
        raise errs.PiscesSameShaError(
            f"Candidate {candidate} and control {control} are the same and "
            f"have the same label {control_label}"
        )

    control_commit = get_commit_info(control)
    candidate_commit = get_commit_info(candidate)

    LOG.info(
        "Control   SHA %s: %s",
        control[:8],
        control_commit["commit"]["message"].strip(),
    )
    LOG.info(
        "Candidate SHA %s: %s",
        candidate[:8],
        candidate_commit["commit"]["message"].strip(),
    )

    return (control_commit["sha"], candidate_commit["sha"])


def validate_labels(control, candidate):
    if control == candidate:
        raise errs.PiscesSameLabelError(
            f"Candidate label {candidate} and control label {control} are the "
            "same"
        )


def validate_zoox_workspace_root():
    if "ZOOX_WORKSPACE_ROOT" not in os.environ:
        raise errs.PiscesInvalidZooxWorkspaceError(
            "No $ZOOX_WORKSPACE_ROOT environment variable set. Perhaps you "
            "forgot to source scripts/shell/zooxrc.sh."
        )

    git_dir = os.path.join(os.environ["ZOOX_WORKSPACE_ROOT"], ".git")
    workspace_git_sha = (
        subprocess.check_output(
            ["git", "--git-dir", git_dir, "rev-parse", "HEAD"]
        )
        .strip()
        .decode("utf8")
    )

    if workspace_git_sha != GIT_BUILD_INFO.git_sha:
        raise errs.PiscesInvalidZooxWorkspaceError(
            f"Your $ZOOX_WORKSPACE_ROOT ({os.environ['ZOOX_WORKSPACE_ROOT']}) "
            "does not match the repository you are running PISCES from. "
            "Specifically, the git SHA at $ZOOX_WORKSPACE_ROOT is "
            f"{workspace_git_sha} but the git SHA of your build is "
            f"{GIT_BUILD_INFO.git_sha}."
        )


def get_user_email():
    # if on pipedream, construct the email from pipedream envvars
    user = os.environ.get("PIPEDREAM_USER", None)
    if user:
        email = f"{user}@zoox.com"
    else:
        email = subprocess.check_output(["git", "config", "user.email"]).decode(
            "utf8"
        )
    return email.strip()


def get_slack_user_id(email):
    data = {"token": pisces_config.SLACK_TOKEN, "email": email}
    response = requests.post(
        "https://slack.com/api/users.lookupByEmail", data=data
    )
    if not (200 <= response.status_code < 300 and response.json()["ok"]):
        raise errs.PiscesSlackLookupError(
            f"Could not lookup user by email ({email}): {response.text}"
        )
    return response.json()["user"]["id"]


def get_notification_id(target):
    if is_email(target):
        return get_slack_user_id(target)
    return target


def split_arg(arg):
    return [] if arg is None else arg.split(",")


def sanitize_label(label, sha):
    if label is None:
        label = sha
    label = re.sub(r" |,|\/", "_", label)
    return label


def parse_args():
    parser = argparse.ArgumentParser("Run PISCES on two SHAs")
    parser.add_argument(
        "--control-sha",
        type=str,
        required=True,
        help="The git sha to run the metric on.",
    )
    parser.add_argument(
        "--control-label",
        type=str,
        required=False,
        default=None,
        help="Label for the control run",
    )
    parser.add_argument(
        "--candidate-sha",
        type=str,
        required=True,
        help="The git sha to compare to.",
    )
    parser.add_argument(
        "--candidate-label",
        type=str,
        required=False,
        default=None,
        help="Label for the candidate run",
    )
    parser.add_argument(
        "--tracking",
        required=False,
        type=str,
        default=None,
        help="Branch name to use when recording stats in this job.",
    )
    parser.add_argument(
        "--notify",
        type=str,
        required=False,
        default=None,
        help="The email or slack channel to notify",
    )
    parser.add_argument(
        "--num-workers",
        type=int,
        default=30,
        help="Number of workers to use in each MR job.",
    )
    parser.add_argument(
        "--no-xray",
        required=False,
        action="store_true",
        help="DEPERCATED. Whether to run with visualization. Has no effect: always true.",
    )
    parser.add_argument(
        "--with-pcp",
        required=False,
        action="store_true",
        help="Do we want to run the PCP PISCES",
    )

    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        "--uris",
        "-i",
        type=str,
        required=False,
        help="Launch PISCES on specific chum uris",
    )
    group.add_argument(
        "--dataset-path",
        "-p",
        type=str,
        required=False,
        help="Launch PISCES against a different benchmark dataset",
    )
    group.add_argument(
        "--dataset",
        "-d",
        type=str,
        required=False,
        choices=pisces_config.SCENARIOS_LUT.keys(),
        help="Launch PISCES with a specific scenario",
    )

    developer_group = parser.add_argument_group("Developer options")
    developer_group.add_argument(
        "--dry-run",
        required=False,
        action="store_true",
        help="Whether to do a dry run of the launcher.",
    )
    developer_group.add_argument(
        "--version",
        "-v",
        action="version",
        version=f"PISCES {pisces_config.VERSION_STR}",
    )
    developer_group.add_argument(
        "--analysis-branch",
        type=str,
        required=False,
        default="master",
        help=(
            "Developer Option: A remote git branch to run the PISCES's data "
            "extraction and analysis steps."
        ),
    )

    steering_mode_group = parser.add_argument_group(
        "Steering mode (2ws vs 4ws) comparison"
    )

    # When used without `--sdl-pipedream-control-json-path` and
    # `--sdl-pipedream-candidate-json-path`, will launch a job to create
    # the PISCES job."
    steering_mode_group.add_argument(
        "--sdl-pipedream-control-2ws-json-path",
        type=str,
        required=False,
        help=argparse.SUPPRESS,  # "INTERNAL USE ONLY. Path to sdl pipedream job for the control"
    )
    steering_mode_group.add_argument(
        "--sdl-pipedream-candidate-2ws-json-path",
        type=str,
        required=False,
        help=argparse.SUPPRESS,  # "INTERNAL USE ONLY. Path to sdl pipedream job for the candidate"
    )
    steering_mode_group.add_argument(
        "--sdl-pipedream-control-4ws-json-path",
        type=str,
        required=False,
        help=argparse.SUPPRESS,  # "INTERNAL USE ONLY. Path to sdl pipedream job for the control"
    )
    steering_mode_group.add_argument(
        "--sdl-pipedream-candidate-4ws-json-path",
        type=str,
        required=False,
        help=argparse.SUPPRESS,  # "INTERNAL USE ONLY. Path to sdl pipedream job for the candidate"
    )
    steering_mode_group.add_argument(
        "--steering-mode-comparison-only",
        action="store_true",
        help=(
            "Run the 2ws vs 4ws steering mode comparison tests only, i.e. skip "
            "all PISCES sim tests other than ':steering_mode_log_tests'."
        ),
    )

    args = parser.parse_args()

    args.uris = split_arg(args.uris)

    validate_zoox_workspace_root()

    validate_run_meta_ids(args.uris)

    if args.dataset_path:
        validate_dataset_path(args.dataset_path)

    if args.dataset:
        validate_dataset_path(pisces_config.SCENARIOS_LUT[args.dataset].path)

    if args.notify is None:
        args.notify = get_user_email()
    validate_notification_target(args.notify)
    args.notify = get_notification_id(args.notify)

    validate_sdl_pipedream_paths(
        args.sdl_pipedream_control_2ws_json_path,
        args.sdl_pipedream_candidate_2ws_json_path,
    )

    validate_sdl_pipedream_paths(
        args.sdl_pipedream_control_4ws_json_path,
        args.sdl_pipedream_candidate_4ws_json_path,
    )

    args.control_label = sanitize_label(args.control_label, args.control_sha)
    args.candidate_label = sanitize_label(
        args.candidate_label, args.candidate_sha
    )

    (args.control_sha, args.candidate_sha) = validate_shas(
        args.control_sha,
        args.control_label,
        args.candidate_sha,
        args.candidate_label,
    )

    validate_labels(args.control_label, args.candidate_label)

    return args


def should_run_meta_task(args):
    return (
        args.sdl_pipedream_control_2ws_json_path is None
        and args.sdl_pipedream_candidate_2ws_json_path is None
        and args.sdl_pipedream_control_4ws_json_path is None
        and args.sdl_pipedream_candidate_4ws_json_path is None
    )


def main():
    """Launch PISCES analysis."""
    LOG.debug("Running with python version %s", sys.version)
    LOG.debug("Running with boto3 version %s", boto3.__version__)
    LOG.debug("Running with requests version %s", requests.__version__)

    try:
        args = parse_args()
    except errs.PiscesError as e:
        LOG.error(e)
        return 1

    if should_run_meta_task(args):
        job_name = "pisces_builder"
    else:
        job_name = "pisces"

    validation_id = uuid.uuid4()
    now = datetime.datetime.now()

    graph = PipeGraph(
        job_name,
        metadata={
            "Launched At": str(now),
            "Team": "Planner Metrics",
            "Contact": "jkegelman@zoox.com,echu@zoox.com,dpalguna@zoox.com,dzeng@zoox.com",
            "Documentation": "https://confluence.zooxlabs.com/pages/viewpage.action?pageId=141157981",
            "Control": f"https://git.zooxlabs.com/zooxco/driving/commits/{args.control_sha}",
            "Candidate": f"https://git.zooxlabs.com/zooxco/driving/commits/{args.candidate_sha}",
            "HR Control": f"{args.control_label} ({args.control_sha[:8]})",
            "HR Candidate": f"{args.candidate_label} ({args.candidate_sha[:8]})",
            "PISCES Version": pisces_config.VERSION_STR,
            "PISCES Page": f"http://pisces.web.zooxlabs.com/{validation_id}/",
            "Bazel Command": " ".join(
                [
                    "bazel run",
                    "//mined_metric/builder/metrics_impl/pisces:launch_pisces --",
                ]
                + list(map(quote, sys.argv[1:]))
            ),
        },
        autoRetryBudget=1,
        links=[
            dict(
                text="Documentation",
                url="https://confluence.zooxlabs.com/pages/viewpage.action?pageId=141157981",
            ),
            dict(
                text="Control",
                url=f"https://git.zooxlabs.com/zooxco/driving/commits/{args.control_sha}",
            ),
            dict(
                text="Candidate",
                url=f"https://git.zooxlabs.com/zooxco/driving/commits/{args.candidate_sha}",
            ),
            dict(
                text="PISCES Page",
                url=f"http://pisces.web.zooxlabs.com/{validation_id}/",
            ),
        ],
    )

    num_workers = 1
    dataset_path = args.dataset_path
    if args.dataset:
        scenarios = pisces_config.SCENARIOS_LUT[args.dataset]
        num_workers = scenarios.num_workers
        dataset_path = scenarios.path

    if args.num_workers > num_workers:
        num_workers = args.num_workers

    # because args.notify could be a slack channel, we get the user email separately
    user_email = get_user_email()
    TaskKlass = (
        PiscesMetaTask if should_run_meta_task(args) else PiscesStandardTask
    )
    TaskKlass.add_tasks(
        graph,
        validation_id,
        num_workers,
        user_email,
        args.control_sha,
        args.control_label,
        args.candidate_sha,
        args.candidate_label,
        args.tracking,
        args.notify,
        args.uris,
        dataset_path,
        args.analysis_branch,
        args.with_pcp,
        args.sdl_pipedream_control_2ws_json_path,
        args.sdl_pipedream_control_4ws_json_path,
        args.sdl_pipedream_candidate_2ws_json_path,
        args.sdl_pipedream_candidate_4ws_json_path,
        args.steering_mode_comparison_only,
    )

    graph.submit(dry_run=args.dry_run)


if __name__ == "__main__":
    sys.exit(main())
