{ "cells": [ { "cell_type": "markdown", "id": "cell-0", "metadata": {}, "source": [ "# Social rats - PAIR-R24M\n", "\n", "Data from Marshall et al. (2021)¹ ([figshare](https://figshare.com/articles/dataset/pairs_dataset/14754374)), a multi-animal 3D pose dataset about the dyadic interactions in laboratory rats.\n", "\n", "---\n", "\n", "¹ Marshall, J., Klibaite, U., Gellis, A., Aldarondo, D., Olveczky, B., & Dunn, T. W. (2021). The PAIR-R24M Dataset for Multi-animal 3D Pose Estimation. Proceedings of the Neural Information Processing Systems Track on Datasets and Benchmarks, 1. https://datasets-benchmarks-proceedings.neurips.cc/paper/2021/hash/1ff8a7b5dc7a7d1f0ed65aaa29c04b1e-Abstract-round1.html" ] }, { "cell_type": "code", "execution_count": 1, "id": "cell-1", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "from pathlib import Path\n", "from typing import Optional\n", "from movement.kinematics import compute_velocity, compute_speed\n", "from movement.utils.vector import compute_norm\n", "\n", "import ethograph as eto\n", "from ethograph.io.nwb_alignment import align_media_per_trial" ] }, { "cell_type": "code", "execution_count": 2, "id": "cell-2", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "from pathlib import Path\n", "\n", "try:\n", " _here = Path(__vsc_ipynb_file__).parent\n", "except NameError:\n", " _here = Path().resolve()\n", "\n", "# TODO: adjust to your local copy of the PAIR-R24M dataset\n", "path = r\"C:\\Users\\aksel\\Documents\\Code\\EthoGraph\\data\\20210119_Recording_SR1_SR2_social_vidtwo\\markerDataset.csv\"\n", "\n", "SESSION = \"20210119_Recording_SR1_SR2_social_vidtwo\"\n", "DATA_DIR = _here.parent / \"data\" / SESSION\n", "VIDEO_DIR = DATA_DIR / \"videos\"\n", "CAMERAS = [\"Camera1\", \"Camera2\", \"Camera3\", \"Camera4\", \"Camera5\", \"Camera6\"]\n", "CHUNK_SIZE = 3500\n", "FPS = 120" ] }, { "cell_type": "code", "execution_count": 3, "id": "cell-3", "metadata": {}, "outputs": [], "source": [ "def from_pair24_csv(\n", " file_path: Path | str,\n", " fps: Optional[float] = None,\n", ") -> xr.Dataset:\n", " df = pd.read_csv(file_path)\n", "\n", " keypoint_names = [\n", " \"HeadF\", \"HeadB\", \"HeadL\", \"SpineF\", \"SpineM\", \"SpineL\",\n", " \"Offset1\", \"Offset2\", \"HipL\", \"HipR\", \"ShoulderL\", \"ShoulderR\",\n", " ]\n", " individual_names = [\"an1\", \"an2\"]\n", " position_types = [\"aligned\", \"absolute\"]\n", " n_frames = len(df)\n", " n_keypoints = len(keypoint_names)\n", " n_individuals = len(individual_names)\n", " n_space = 3\n", "\n", " position_array = np.zeros((n_frames, len(position_types), n_space, n_keypoints, n_individuals))\n", " confidence_array = np.ones((n_frames, len(position_types), n_keypoints, n_individuals))\n", "\n", " for p, pos_type in enumerate(position_types):\n", " csv_prefix = \"alignedPosition\" if pos_type == \"aligned\" else \"absolutePosition\"\n", " for i, individual in enumerate(individual_names):\n", " for j, keypoint in enumerate(keypoint_names):\n", " for k, coord in enumerate([\"x\", \"y\", \"z\"]):\n", " col_name = f\"{csv_prefix}_{individual}_{keypoint}_{coord}\"\n", " if col_name in df.columns:\n", " position_array[:, p, k, j, i] = df[col_name].values\n", "\n", " time_coords = np.arange(n_frames, dtype=float) / fps\n", "\n", " ds = xr.Dataset(\n", " data_vars={\n", " \"position\": xr.DataArray(\n", " position_array,\n", " dims=[\"time\", \"position_type\", \"space\", \"keypoints\", \"individuals\"],\n", " ),\n", " \"confidence\": xr.DataArray(\n", " confidence_array,\n", " dims=[\"time\", \"position_type\", \"keypoints\", \"individuals\"],\n", " ),\n", " },\n", " coords={\n", " \"time\": time_coords,\n", " \"position_type\": position_types,\n", " \"space\": [\"x\", \"y\", \"z\"],\n", " \"keypoints\": keypoint_names,\n", " \"individuals\": [\"mouse 1\", \"mouse 2\"],\n", " },\n", " attrs={\"source_software\": \"DeepLabCut\", \"fps\": fps},\n", " )\n", "\n", " com_data = np.zeros((n_frames, n_space, n_individuals))\n", " for i, individual in enumerate(individual_names):\n", " for j, coord in enumerate([\"x\", \"y\", \"z\"]):\n", " col_name = f\"centerOfmass_{individual}_{coord}\"\n", " if col_name in df.columns:\n", " com_data[:, j, i] = df[col_name].values\n", " ds[\"center_of_mass\"] = xr.DataArray(com_data, dims=[\"time\", \"space\", \"individuals\"])\n", "\n", " return ds\n", "\n", "\n", "def split_into_chunks(\n", " ds_full: xr.Dataset,\n", " chunk_size: int,\n", " cameras: list[str],\n", " fps: float,\n", ") -> tuple[list[xr.Dataset], pd.DataFrame]:\n", " n_frames = ds_full.sizes[\"time\"]\n", " n_chunks = n_frames // chunk_size\n", "\n", " datasets = []\n", " rows = []\n", "\n", " for i in range(n_chunks):\n", " start_idx = i * chunk_size\n", " end_idx = start_idx + chunk_size\n", " start_frame = start_idx\n", "\n", " ds_chunk = ds_full.isel(time=slice(start_idx, end_idx)).copy()\n", " ds_chunk = ds_chunk.assign_coords(time=np.arange(chunk_size) / fps)\n", " ds_chunk.attrs[\"trial\"] = i\n", "\n", " ds_chunk[\"pairwise_distance\"] = compute_norm(\n", " ds_chunk.center_of_mass.sel(individuals=\"mouse 1\")\n", " - ds_chunk.center_of_mass.sel(individuals=\"mouse 2\")\n", " )\n", " ds_chunk[\"nose_nose_distance\"] = compute_norm(\n", " ds_chunk.position.sel(keypoints=\"HeadF\", individuals=\"mouse 1\", position_type=\"absolute\")\n", " - ds_chunk.position.sel(keypoints=\"HeadF\", individuals=\"mouse 2\", position_type=\"absolute\")\n", " )\n", " ds_chunk[\"velocity\"] = compute_velocity(ds_chunk.position.sel(position_type=\"aligned\"))\n", " ds_chunk[\"speed\"] = compute_speed(ds_chunk.position.sel(position_type=\"aligned\"))\n", "\n", " for feature in [\"nose_nose_distance\", \"velocity\", \"speed\"]:\n", " ds_chunk[feature].attrs[\"type\"] = \"features\"\n", "\n", " datasets.append(ds_chunk)\n", "\n", " row = {\"trial\": i}\n", " for cam in cameras:\n", " row[f\"video_{cam}\"] = str(VIDEO_DIR / cam / f\"{start_frame}.mp4\")\n", " rows.append(row)\n", "\n", " remaining = n_frames % chunk_size\n", " if remaining > 0:\n", " print(f\"Discarded {remaining} frames at the end (not a full chunk)\")\n", " print(f\"Created {len(datasets)} chunks of {chunk_size} frames each\")\n", "\n", " return datasets, pd.DataFrame(rows)" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-4", "metadata": {}, "outputs": [], "source": [ "ds_full = from_pair24_csv(path, fps=FPS)\n", "ds_full" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-5", "metadata": {}, "outputs": [], "source": [ "datasets, session_table = split_into_chunks(\n", " ds_full, chunk_size=CHUNK_SIZE, cameras=CAMERAS, fps=FPS\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-6", "metadata": {}, "outputs": [], "source": [ "output_path = DATA_DIR / \"Trial_data.nc\"\n", "output_path.parent.mkdir(parents=True, exist_ok=True)\n", "\n", "# Build NWB alignment\n", "nwb_path = output_path.parent / \".ethograph\" / \"alignment.nwb\"\n", "align_media_per_trial(\n", " trial_table=session_table,\n", " stream_rates={\"video\": float(FPS)},\n", " output_path=nwb_path,\n", ")\n", "\n", "# Build and save TrialTree\n", "dt = eto.from_datasets(datasets)\n", "dt.save(output_path)\n", "print(f\"Saved dataset to {output_path}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "cell-7", "metadata": {}, "outputs": [], "source": [ "from ethograph.labels.converters import write_mapping_file\n", "from ethograph.utils.paths import SETTINGS_DIR\n", "\n", "mapping = {\n", " \"Background\": 0,\n", " \"Idle\": 1,\n", " \"SmallMovement\": 2,\n", " \"HeadTilt\": 3,\n", " \"Groom\": 4,\n", " \"Sniff\": 5,\n", " \"Investigate\": 6,\n", " \"RearUp\": 7,\n", " \"RearDown\": 8,\n", " \"CrouchExplore\": 9,\n", " \"Amble\": 10,\n", " \"Locomotion\": 11,\n", "}\n", "\n", "mapping_path = output_path.parent / SETTINGS_DIR / \"mapping.txt\"\n", "write_mapping_file(mapping_path, mapping)\n", "print(f\"Saved mapping to {mapping_path}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }