Source code for torch_measure.data.response_matrix

# Copyright (c) 2026 AIMS Foundations. MIT License.

"""Core ResponseMatrix data structure for measurement analysis."""

from __future__ import annotations

import torch



[docs]
class ResponseMatrix:
    """A binary or continuous response matrix (subjects x items).

    Parameters
    ----------
    data : torch.Tensor
        Response matrix of shape (n_subjects, n_items). Values can be:
        - Binary (0/1) for correct/incorrect responses
        - Continuous [0, 1] for probability responses
        - NaN for missing data
    subject_ids : list[str] | None
        Optional identifiers for subjects (rows).
    item_ids : list[str] | None
        Optional identifiers for items (columns).
    item_contents : list[str] | None
        Optional text content for each item (e.g., question text).
    subject_metadata : list[dict[str, str | int | float | bool | None]] | None
        Optional structured metadata for each subject (one dict per row).
        For HELM datasets, each dict has keys: ``org``, ``model``,
        ``param_count``, ``is_instruct``.
    info : dict | None
        Optional dataset-level metadata (interpretation notes, paper URL,
        data source URL, license, etc.). Usually loaded from
        ``data/<benchmark>/info.yaml``. Common keys include:
        ``description``, ``testing_condition``, ``paper_url``,
        ``data_source_url``, ``subject_type``, ``item_type``, ``license``,
        ``citation``, ``tags``.
    """

    def __init__(
        self,
        data: torch.Tensor,
        subject_ids: list[str] | None = None,
        item_ids: list[str] | None = None,
        item_contents: list[str] | None = None,
        subject_metadata: list[dict] | None = None,
        info: dict | None = None,
    ) -> None:
        if data.ndim != 2:
            raise ValueError(f"Expected 2D tensor, got {data.ndim}D")
        self.data = data.float()
        self.subject_ids = subject_ids
        self.item_ids = item_ids
        self.item_contents = item_contents
        self.subject_metadata = subject_metadata
        self.info = info

    @property
    def n_rows(self) -> int:
        """Number of subjects (rows)."""
        return self.data.shape[0]

    @property
    def n_cols(self) -> int:
        """Number of items (columns)."""
        return self.data.shape[1]

    @property
    def n_subjects(self) -> int:
        """Number of subjects (rows)."""
        return self.data.shape[0]

    @property
    def n_items(self) -> int:
        """Number of items (columns)."""
        return self.data.shape[1]

    @property
    def shape(self) -> tuple[int, int]:
        """Shape of the response matrix."""
        return (self.n_rows, self.n_cols)

    @property
    def observed_mask(self) -> torch.Tensor:
        """Boolean mask of observed (non-NaN) entries."""
        return ~torch.isnan(self.data)

    @property
    def density(self) -> float:
        """Fraction of observed (non-missing) entries."""
        return self.observed_mask.float().mean().item()

    @property
    def subject_means(self) -> torch.Tensor:
        """Mean response per subject (ignoring NaN)."""
        data = self.data.clone()
        data[~self.observed_mask] = 0.0
        counts = self.observed_mask.float().sum(dim=1)
        return data.sum(dim=1) / counts.clamp(min=1)

    @property
    def item_means(self) -> torch.Tensor:
        """Mean response per item (ignoring NaN), i.e., item easiness/facility."""
        data = self.data.clone()
        data[~self.observed_mask] = 0.0
        counts = self.observed_mask.float().sum(dim=0)
        return data.sum(dim=0) / counts.clamp(min=1)


[docs]
    def to(self, device: torch.device | str) -> ResponseMatrix:
        """Move response matrix to a device."""
        return ResponseMatrix(
            data=self.data.to(device),
            subject_ids=self.subject_ids,
            item_ids=self.item_ids,
            item_contents=self.item_contents,
            subject_metadata=self.subject_metadata,
            info=self.info,
        )



[docs]
    def binarize(self, threshold: float = 0.5) -> ResponseMatrix:
        """Convert continuous responses to binary using a threshold."""
        binary = (self.data >= threshold).float()
        binary[~self.observed_mask] = float("nan")
        return ResponseMatrix(
            binary,
            self.subject_ids,
            self.item_ids,
            self.item_contents,
            self.subject_metadata,
            self.info,
        )



[docs]
    @classmethod
    def from_numpy(cls, array, **kwargs) -> ResponseMatrix:
        """Create from a numpy array."""
        return cls(torch.from_numpy(array).float(), **kwargs)



[docs]
    @classmethod
    def from_dataframe(cls, df) -> ResponseMatrix:
        """Create from a pandas DataFrame."""
        return cls(
            torch.tensor(df.values, dtype=torch.float32),
            subject_ids=list(df.index.astype(str)),
            item_ids=list(df.columns.astype(str)),
        )



[docs]
    @classmethod
    def from_long(cls, data) -> ResponseMatrix:
        """Pivot a :class:`LongFormData` into a wide :class:`ResponseMatrix`.

        When multiple trials or non-null ``test_condition`` values exist per
        (subject, item) cell, the response is averaged across those
        dimensions. The legacy ``load()`` path used to do this automatically;
        consumers who want polytomous / per-trial / multi-condition analysis
        should work with the :class:`LongFormData` directly.

        Parameters
        ----------
        data : LongFormData
            The long-form dataset returned by
            :func:`torch_measure.datasets.load`.

        Returns
        -------
        ResponseMatrix
            Subject-by-item matrix with subjects rendered as their
            ``display_name`` (when the subjects registry is populated) and
            items keyed by ``item_id``. ``item_contents`` carries the item
            ``content`` strings from the items registry.
        """
        import pandas as pd  # noqa: F401 — required for pivot

        responses = data.responses
        items = data.items
        subjects = data.subjects
        name = data.name

        items_bench = items[items["benchmark_id"] == name] if "benchmark_id" in items.columns else items
        items_bench = items_bench.set_index("item_id")
        present_items = set(responses["item_id"].unique())
        items_bench = items_bench[items_bench.index.isin(present_items)]

        needs_agg = responses["trial"].nunique() > 1 or (
            "test_condition" in responses.columns and responses["test_condition"].notna().any()
        )
        if needs_agg:
            agg = responses.groupby(["subject_id", "item_id"], as_index=False)["response"].mean()
        else:
            agg = responses[["subject_id", "item_id", "response"]]

        matrix = agg.pivot(index="subject_id", columns="item_id", values="response")

        ordered_item_ids = [iid for iid in items_bench.index if iid in matrix.columns]
        matrix = matrix.reindex(columns=ordered_item_ids)

        subjects_by_id = subjects.set_index("subject_id") if "subject_id" in subjects.columns else subjects
        subject_ids = list(matrix.index)
        display_names = [
            str(subjects_by_id.at[sid, "display_name"])
            if (
                hasattr(subjects_by_id, "index")
                and sid in subjects_by_id.index
                and "display_name" in getattr(subjects_by_id, "columns", [])
            )
            else sid
            for sid in subject_ids
        ]
        item_contents = [
            (
                items_bench.at[iid, "content"]
                if (iid in items_bench.index and "content" in items_bench.columns)
                else None
            )
            or ""
            for iid in ordered_item_ids
        ]
        item_contents = [str(c) for c in item_contents]

        tensor = torch.tensor(matrix.values, dtype=torch.float32)

        info = dict(data.info) if data.info is not None else {}
        info.setdefault("benchmark_id", name)

        return cls(
            data=tensor,
            subject_ids=display_names,
            item_ids=ordered_item_ids,
            item_contents=item_contents,
            subject_metadata=None,
            info=info,
        )


    def __repr__(self) -> str:
        return f"ResponseMatrix(n_subjects={self.n_subjects}, n_items={self.n_items}, density={self.density:.2%})"