{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cell-0",
   "metadata": {},
   "source": [
    "# Social rats - PAIR-R24M\n",
    "\n",
    "Data from Marshall et al. (2021)¹ ([figshare](https://figshare.com/articles/dataset/pairs_dataset/14754374)), a multi-animal 3D pose dataset about the dyadic interactions in laboratory rats.\n",
    "\n",
    "---\n",
    "\n",
    "¹ Marshall, J., Klibaite, U., Gellis, A., Aldarondo, D., Olveczky, B., & Dunn, T. W. (2021). The PAIR-R24M Dataset for Multi-animal 3D Pose Estimation. Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, 1. https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/hash/1ff8a7b5dc7a7d1f0ed65aaa29c04b1e-Abstract-round1.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "cell-1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xarray as xr\n",
    "from pathlib import Path\n",
    "from typing import Optional\n",
    "from movement.kinematics import compute_velocity, compute_speed\n",
    "from movement.utils.vector import compute_norm\n",
    "\n",
    "import ethograph as eto\n",
    "from ethograph.io.nwb_alignment import align_media_per_trial"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cell-2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xarray as xr\n",
    "from pathlib import Path\n",
    "\n",
    "try:\n",
    "    _here = Path(__vsc_ipynb_file__).parent\n",
    "except NameError:\n",
    "    _here = Path().resolve()\n",
    "\n",
    "# TODO: adjust to your local copy of the PAIR-R24M dataset\n",
    "path = r\"C:\\Users\\aksel\\Documents\\Code\\EthoGraph\\data\\20210119_Recording_SR1_SR2_social_vidtwo\\markerDataset.csv\"\n",
    "\n",
    "SESSION = \"20210119_Recording_SR1_SR2_social_vidtwo\"\n",
    "DATA_DIR = _here.parent / \"data\" / SESSION\n",
    "VIDEO_DIR = DATA_DIR / \"videos\"\n",
    "CAMERAS = [\"Camera1\", \"Camera2\", \"Camera3\", \"Camera4\", \"Camera5\", \"Camera6\"]\n",
    "CHUNK_SIZE = 3500\n",
    "FPS = 120"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "cell-3",
   "metadata": {},
   "outputs": [],
   "source": [
    "def from_pair24_csv(\n",
    "    file_path: Path | str,\n",
    "    fps: Optional[float] = None,\n",
    ") -> xr.Dataset:\n",
    "    df = pd.read_csv(file_path)\n",
    "\n",
    "    keypoint_names = [\n",
    "        \"HeadF\", \"HeadB\", \"HeadL\", \"SpineF\", \"SpineM\", \"SpineL\",\n",
    "        \"Offset1\", \"Offset2\", \"HipL\", \"HipR\", \"ShoulderL\", \"ShoulderR\",\n",
    "    ]\n",
    "    individual_names = [\"an1\", \"an2\"]\n",
    "    position_types = [\"aligned\", \"absolute\"]\n",
    "    n_frames = len(df)\n",
    "    n_keypoints = len(keypoint_names)\n",
    "    n_individuals = len(individual_names)\n",
    "    n_space = 3\n",
    "\n",
    "    position_array = np.zeros((n_frames, len(position_types), n_space, n_keypoints, n_individuals))\n",
    "    confidence_array = np.ones((n_frames, len(position_types), n_keypoints, n_individuals))\n",
    "\n",
    "    for p, pos_type in enumerate(position_types):\n",
    "        csv_prefix = \"alignedPosition\" if pos_type == \"aligned\" else \"absolutePosition\"\n",
    "        for i, individual in enumerate(individual_names):\n",
    "            for j, keypoint in enumerate(keypoint_names):\n",
    "                for k, coord in enumerate([\"x\", \"y\", \"z\"]):\n",
    "                    col_name = f\"{csv_prefix}_{individual}_{keypoint}_{coord}\"\n",
    "                    if col_name in df.columns:\n",
    "                        position_array[:, p, k, j, i] = df[col_name].values\n",
    "\n",
    "    time_coords = np.arange(n_frames, dtype=float) / fps\n",
    "\n",
    "    ds = xr.Dataset(\n",
    "        data_vars={\n",
    "            \"position\": xr.DataArray(\n",
    "                position_array,\n",
    "                dims=[\"time\", \"position_type\", \"space\", \"keypoints\", \"individuals\"],\n",
    "            ),\n",
    "            \"confidence\": xr.DataArray(\n",
    "                confidence_array,\n",
    "                dims=[\"time\", \"position_type\", \"keypoints\", \"individuals\"],\n",
    "            ),\n",
    "        },\n",
    "        coords={\n",
    "            \"time\": time_coords,\n",
    "            \"position_type\": position_types,\n",
    "            \"space\": [\"x\", \"y\", \"z\"],\n",
    "            \"keypoints\": keypoint_names,\n",
    "            \"individuals\": [\"mouse 1\", \"mouse 2\"],\n",
    "        },\n",
    "        attrs={\"source_software\": \"DeepLabCut\", \"fps\": fps},\n",
    "    )\n",
    "\n",
    "    com_data = np.zeros((n_frames, n_space, n_individuals))\n",
    "    for i, individual in enumerate(individual_names):\n",
    "        for j, coord in enumerate([\"x\", \"y\", \"z\"]):\n",
    "            col_name = f\"centerOfmass_{individual}_{coord}\"\n",
    "            if col_name in df.columns:\n",
    "                com_data[:, j, i] = df[col_name].values\n",
    "    ds[\"center_of_mass\"] = xr.DataArray(com_data, dims=[\"time\", \"space\", \"individuals\"])\n",
    "\n",
    "    return ds\n",
    "\n",
    "\n",
    "def split_into_chunks(\n",
    "    ds_full: xr.Dataset,\n",
    "    chunk_size: int,\n",
    "    cameras: list[str],\n",
    "    fps: float,\n",
    ") -> tuple[list[xr.Dataset], pd.DataFrame]:\n",
    "    n_frames = ds_full.sizes[\"time\"]\n",
    "    n_chunks = n_frames // chunk_size\n",
    "\n",
    "    datasets = []\n",
    "    rows = []\n",
    "\n",
    "    for i in range(n_chunks):\n",
    "        start_idx = i * chunk_size\n",
    "        end_idx = start_idx + chunk_size\n",
    "        start_frame = start_idx\n",
    "\n",
    "        ds_chunk = ds_full.isel(time=slice(start_idx, end_idx)).copy()\n",
    "        ds_chunk = ds_chunk.assign_coords(time=np.arange(chunk_size) / fps)\n",
    "        ds_chunk.attrs[\"trial\"] = i\n",
    "\n",
    "        ds_chunk[\"pairwise_distance\"] = compute_norm(\n",
    "            ds_chunk.center_of_mass.sel(individuals=\"mouse 1\")\n",
    "            - ds_chunk.center_of_mass.sel(individuals=\"mouse 2\")\n",
    "        )\n",
    "        ds_chunk[\"nose_nose_distance\"] = compute_norm(\n",
    "            ds_chunk.position.sel(keypoints=\"HeadF\", individuals=\"mouse 1\", position_type=\"absolute\")\n",
    "            - ds_chunk.position.sel(keypoints=\"HeadF\", individuals=\"mouse 2\", position_type=\"absolute\")\n",
    "        )\n",
    "        ds_chunk[\"velocity\"] = compute_velocity(ds_chunk.position.sel(position_type=\"aligned\"))\n",
    "        ds_chunk[\"speed\"] = compute_speed(ds_chunk.position.sel(position_type=\"aligned\"))\n",
    "\n",
    "        for feature in [\"nose_nose_distance\", \"velocity\", \"speed\"]:\n",
    "            ds_chunk[feature].attrs[\"type\"] = \"features\"\n",
    "\n",
    "        datasets.append(ds_chunk)\n",
    "\n",
    "        row = {\"trial\": i}\n",
    "        for cam in cameras:\n",
    "            row[f\"video_{cam}\"] = str(VIDEO_DIR / cam / f\"{start_frame}.mp4\")\n",
    "        rows.append(row)\n",
    "\n",
    "    remaining = n_frames % chunk_size\n",
    "    if remaining > 0:\n",
    "        print(f\"Discarded {remaining} frames at the end (not a full chunk)\")\n",
    "    print(f\"Created {len(datasets)} chunks of {chunk_size} frames each\")\n",
    "\n",
    "    return datasets, pd.DataFrame(rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-4",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds_full = from_pair24_csv(path, fps=FPS)\n",
    "ds_full"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-5",
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets, session_table = split_into_chunks(\n",
    "    ds_full, chunk_size=CHUNK_SIZE, cameras=CAMERAS, fps=FPS\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-6",
   "metadata": {},
   "outputs": [],
   "source": [
    "output_path = DATA_DIR / \"Trial_data.nc\"\n",
    "output_path.parent.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "# Build NWB alignment\n",
    "nwb_path = output_path.parent / \".ethograph\" / \"alignment.nwb\"\n",
    "align_media_per_trial(\n",
    "    trial_table=session_table,\n",
    "    stream_rates={\"video\": float(FPS)},\n",
    "    output_path=nwb_path,\n",
    ")\n",
    "\n",
    "# Build and save TrialTree\n",
    "dt = eto.from_datasets(datasets)\n",
    "dt.save(output_path)\n",
    "print(f\"Saved dataset to {output_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cell-7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ethograph.labels.converters import write_mapping_file\n",
    "from ethograph.utils.paths import SETTINGS_DIR\n",
    "\n",
    "mapping = {\n",
    "    \"Background\": 0,\n",
    "    \"Idle\": 1,\n",
    "    \"SmallMovement\": 2,\n",
    "    \"HeadTilt\": 3,\n",
    "    \"Groom\": 4,\n",
    "    \"Sniff\": 5,\n",
    "    \"Investigate\": 6,\n",
    "    \"RearUp\": 7,\n",
    "    \"RearDown\": 8,\n",
    "    \"CrouchExplore\": 9,\n",
    "    \"Amble\": 10,\n",
    "    \"Locomotion\": 11,\n",
    "}\n",
    "\n",
    "mapping_path = output_path.parent / SETTINGS_DIR / \"mapping.txt\"\n",
    "write_mapping_file(mapping_path, mapping)\n",
    "print(f\"Saved mapping to {mapping_path}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}