Source code for torch_measure.data.pairwise

# Copyright (c) 2026 AIMS Foundations. MIT License.

"""PairwiseComparisons data structure for pairwise preference data."""

from __future__ import annotations

import torch



[docs]
class PairwiseComparisons:
    """Pairwise comparison data (e.g., Chatbot Arena).

    Each observation records subject_a vs subject_b with an outcome.

    Parameters
    ----------
    subject_a : torch.LongTensor
        Indices into ``subject_ids`` for the first subject in each comparison.
        Shape: ``(n_comparisons,)``.
    subject_b : torch.LongTensor
        Indices into ``subject_ids`` for the second subject in each comparison.
        Shape: ``(n_comparisons,)``.
    outcome : torch.Tensor
        Comparison outcome. ``1.0`` = subject_a wins, ``0.0`` = subject_b wins,
        ``0.5`` = tie. Shape: ``(n_comparisons,)``.
    subject_ids : list[str]
        Unique subject identifiers (e.g., model names).
    item_ids : list[str] | None
        Unique item/prompt identifiers (e.g., question IDs).
    item_contents : list[str] | None
        Text content for each item (one per entry in ``item_ids``).
    item_idx : torch.LongTensor | None
        Per-comparison index into ``item_ids``, shape ``(n_comparisons,)``.
        Maps each comparison to the item/prompt it was evaluated on.
    subject_metadata : list[dict] | None
        Structured metadata per subject (one dict per entry in ``subject_ids``).
    comparison_metadata : list[dict] | None
        Structured metadata per comparison (one dict per row).
    """

    def __init__(
        self,
        subject_a: torch.Tensor,
        subject_b: torch.Tensor,
        outcome: torch.Tensor,
        subject_ids: list[str],
        item_ids: list[str] | None = None,
        item_contents: list[str] | None = None,
        item_idx: torch.Tensor | None = None,
        subject_metadata: list[dict] | None = None,
        comparison_metadata: list[dict] | None = None,
    ) -> None:
        if subject_a.ndim != 1 or subject_b.ndim != 1 or outcome.ndim != 1:
            raise ValueError("subject_a, subject_b, and outcome must be 1-D tensors")
        n = subject_a.shape[0]
        if subject_b.shape[0] != n or outcome.shape[0] != n:
            raise ValueError(
                f"Length mismatch: subject_a={subject_a.shape[0]}, "
                f"subject_b={subject_b.shape[0]}, outcome={outcome.shape[0]}"
            )
        self.subject_a = subject_a.long()
        self.subject_b = subject_b.long()
        self.outcome = outcome.float()
        self.subject_ids = subject_ids
        self.item_ids = item_ids
        self.item_contents = item_contents
        self.item_idx = item_idx.long() if item_idx is not None else None
        self.subject_metadata = subject_metadata
        self.comparison_metadata = comparison_metadata

    @property
    def n_comparisons(self) -> int:
        """Number of pairwise comparisons."""
        return self.outcome.shape[0]

    @property
    def n_subjects(self) -> int:
        """Number of unique subjects."""
        return len(self.subject_ids)

    @property
    def n_items(self) -> int:
        """Number of unique items/prompts."""
        if self.item_ids is None:
            return 0
        return len(self.item_ids)

    @property
    def shape(self) -> tuple[int, int]:
        """(n_comparisons, n_subjects)."""
        return (self.n_comparisons, self.n_subjects)

    @property
    def density(self) -> float:
        """Fraction of all possible ordered pairs that are observed.

        Computed as ``n_comparisons / (n_subjects * (n_subjects - 1) / 2)``.
        """
        n = self.n_subjects
        total_pairs = n * (n - 1) / 2
        if total_pairs == 0:
            return 0.0
        return self.n_comparisons / total_pairs


[docs]
    def win_rates(self) -> torch.Tensor:
        """Per-subject overall win rate.

        Returns
        -------
        torch.Tensor
            Win rate for each subject, shape ``(n_subjects,)``.
            Ties count as 0.5 wins and 0.5 losses.
        """
        wins = torch.zeros(self.n_subjects)
        counts = torch.zeros(self.n_subjects)

        wins.scatter_add_(0, self.subject_a, self.outcome)
        wins.scatter_add_(0, self.subject_b, 1.0 - self.outcome)
        counts.scatter_add_(0, self.subject_a, torch.ones(self.n_comparisons))
        counts.scatter_add_(0, self.subject_b, torch.ones(self.n_comparisons))

        return wins / counts.clamp(min=1)



[docs]
    def to_win_matrix(self) -> torch.Tensor:
        """Aggregate into a pairwise win-rate matrix.

        Returns
        -------
        torch.Tensor
            Square matrix of shape ``(n_subjects, n_subjects)`` where entry
            ``(i, j)`` is the win rate of subject *i* against subject *j*.
            Diagonal is NaN. Unobserved pairs are NaN.
        """
        n = self.n_subjects
        wins = torch.zeros(n, n)
        counts = torch.zeros(n, n)

        a = self.subject_a
        b = self.subject_b
        wins[a, b] += self.outcome
        wins[b, a] += 1.0 - self.outcome
        counts[a, b] += 1
        counts[b, a] += 1

        mat = wins / counts.clamp(min=1)
        mat[counts == 0] = float("nan")
        mat.fill_diagonal_(float("nan"))
        return mat



[docs]
    def to(self, device: torch.device | str) -> PairwiseComparisons:
        """Move tensors to a device."""
        return PairwiseComparisons(
            subject_a=self.subject_a.to(device),
            subject_b=self.subject_b.to(device),
            outcome=self.outcome.to(device),
            subject_ids=self.subject_ids,
            item_ids=self.item_ids,
            item_contents=self.item_contents,
            item_idx=self.item_idx.to(device) if self.item_idx is not None else None,
            subject_metadata=self.subject_metadata,
            comparison_metadata=self.comparison_metadata,
        )



[docs]
    @classmethod
    def from_dataframe(
        cls,
        df,
        subject_a_col: str = "model_a",
        subject_b_col: str = "model_b",
        outcome_col: str = "outcome",
    ) -> PairwiseComparisons:
        """Create from a pandas DataFrame.

        Parameters
        ----------
        df : pandas.DataFrame
            DataFrame with at least subject_a, subject_b, and outcome columns.
        subject_a_col : str
            Column name for the first subject.
        subject_b_col : str
            Column name for the second subject.
        outcome_col : str
            Column name for the outcome (1.0 = a wins, 0.0 = b wins, 0.5 = tie).
        """
        all_subjects = sorted(set(df[subject_a_col]) | set(df[subject_b_col]))
        sid_to_idx = {s: i for i, s in enumerate(all_subjects)}

        subject_a = torch.tensor([sid_to_idx[s] for s in df[subject_a_col]], dtype=torch.long)
        subject_b = torch.tensor([sid_to_idx[s] for s in df[subject_b_col]], dtype=torch.long)
        outcome = torch.tensor(df[outcome_col].values, dtype=torch.float32)

        return cls(
            subject_a=subject_a,
            subject_b=subject_b,
            outcome=outcome,
            subject_ids=all_subjects,
        )


    def __repr__(self) -> str:
        return (
            f"PairwiseComparisons(n_comparisons={self.n_comparisons}, "
            f"n_subjects={self.n_subjects}, density={self.density:.2%})"
        )