diff --git a/README.md b/README.md index 1de16567..99932233 100644 --- a/README.md +++ b/README.md @@ -82,3 +82,4 @@ See this [step-by-step guide](https://huggingface.co/docs/evaluate/creating_and_ ## Credits Thanks to [@marella](https://github.com/marella) for letting us use the `evaluate` namespace on PyPi previously used by his [library](https://github.com/marella/evaluate). + diff --git a/data/human_ai_trust_demo.csv b/data/human_ai_trust_demo.csv new file mode 100644 index 00000000..e60111f1 --- /dev/null +++ b/data/human_ai_trust_demo.csv @@ -0,0 +1,121 @@ +prediction,reference,confidence,human_trust,belief_prior,belief_posterior,explanation_length +0,0,0.462,0.389,0.462,0.462,111 +1,1,0.746,0.768,0.695,0.721,57 +0,0,0.759,0.764,0.572,0.665,63 +0,0,0.99,0.925,0.368,0.679,64 +0,0,0.721,0.936,0.396,0.559,99 +1,1,0.795,0.859,0.199,0.497,72 +0,0,0.745,0.542,0.396,0.571,86 +0,0,0.575,0.593,0.342,0.458,104 +0,0,0.921,0.855,0.465,0.693,90 +0,1,0.69,0.775,0.235,0.462,98 +0,0,0.869,0.789,0.504,0.686,64 +0,0,0.614,0.602,0.707,0.66,86 +0,0,0.96,0.99,0.378,0.669,38 +0,0,0.54,0.626,0.48,0.51,107 +1,1,0.838,0.718,0.538,0.688,61 +1,0,0.792,0.759,0.32,0.556,60 +0,1,0.481,0.434,0.445,0.463,105 +0,1,0.532,0.467,0.403,0.467,95 +0,1,0.612,0.789,0.42,0.516,113 +0,0,0.674,0.715,0.245,0.46,97 +0,1,0.414,0.288,0.405,0.409,120 +0,0,0.76,0.852,0.5,0.63,93 +1,1,0.591,0.803,0.69,0.64,99 +1,1,0.821,0.924,0.592,0.706,62 +1,1,0.612,0.46,0.831,0.721,111 +1,1,0.982,0.934,0.247,0.615,57 +1,1,0.633,0.759,0.574,0.603,83 +1,1,0.702,0.631,0.437,0.569,83 +1,1,0.872,0.916,0.838,0.855,56 +1,1,0.565,0.643,0.238,0.402,88 +1,0,0.627,0.535,0.232,0.43,103 +0,0,0.946,0.94,0.28,0.613,75 +1,1,0.509,0.185,0.05,0.279,94 +1,1,0.778,0.675,0.295,0.536,82 +1,1,0.789,0.764,0.248,0.519,68 +0,0,0.867,0.742,0.43,0.649,61 +1,1,0.564,0.728,0.468,0.516,96 +1,0,0.442,0.299,0.775,0.608,108 +0,0,0.828,0.784,0.59,0.709,71 +0,0,0.795,0.808,0.285,0.54,66 +0,0,0.788,0.932,0.22,0.504,78 +0,0,0.802,0.658,0.498,0.65,73 +1,1,0.648,0.764,0.136,0.392,89 +1,1,0.785,0.786,0.766,0.776,66 +1,1,0.794,0.696,0.636,0.715,68 +0,1,0.514,0.56,0.306,0.41,115 +1,1,0.99,0.99,0.057,0.524,56 +0,0,0.821,0.761,0.671,0.746,61 +1,1,0.571,0.578,0.377,0.474,102 +0,1,0.679,0.64,0.648,0.663,96 +0,0,0.604,0.615,0.081,0.342,80 +1,1,0.868,0.934,0.28,0.574,71 +0,0,0.924,0.99,0.401,0.662,52 +1,1,0.627,0.503,0.409,0.518,100 +1,0,0.716,0.929,0.31,0.513,76 +0,1,0.65,0.454,0.525,0.587,74 +0,1,0.699,0.683,0.186,0.443,69 +0,0,0.99,0.99,0.372,0.681,51 +1,0,0.571,0.599,0.424,0.497,104 +0,0,0.637,0.575,0.503,0.57,84 +1,0,0.493,0.472,0.542,0.518,117 +0,0,0.628,0.578,0.175,0.401,78 +0,0,0.738,0.679,0.093,0.416,80 +0,0,0.801,0.886,0.656,0.728,61 +0,0,0.792,0.827,0.466,0.629,68 +0,1,0.699,0.63,0.25,0.475,86 +1,1,0.752,0.842,0.71,0.731,71 +0,0,0.968,0.99,0.423,0.696,49 +0,1,0.568,0.65,0.636,0.602,111 +1,1,0.99,0.99,0.414,0.702,45 +1,1,0.844,0.761,0.812,0.828,77 +1,1,0.621,0.565,0.751,0.686,84 +0,0,0.589,0.664,0.35,0.47,104 +0,1,0.658,0.719,0.594,0.626,105 +1,0,0.573,0.571,0.529,0.551,76 +1,1,0.857,0.869,0.674,0.765,59 +1,1,0.821,0.949,0.207,0.514,77 +0,1,0.591,0.532,0.537,0.564,97 +0,0,0.623,0.678,0.612,0.617,98 +1,1,0.523,0.503,0.05,0.286,101 +0,0,0.683,0.661,0.163,0.423,88 +1,1,0.878,0.988,0.05,0.464,63 +0,0,0.782,0.865,0.346,0.564,87 +0,1,0.451,0.532,0.544,0.497,118 +1,0,0.621,0.751,0.7,0.661,98 +1,0,0.646,0.648,0.415,0.531,88 +1,1,0.617,0.686,0.726,0.672,91 +0,0,0.773,0.742,0.124,0.449,72 +1,1,0.759,0.791,0.059,0.409,82 +1,1,0.579,0.566,0.389,0.484,96 +1,1,0.804,0.813,0.477,0.64,76 +1,1,0.834,0.894,0.393,0.614,90 +1,1,0.912,0.831,0.05,0.481,69 +1,1,0.908,0.99,0.382,0.645,57 +0,1,0.435,0.334,0.139,0.287,129 +1,1,0.609,0.488,0.534,0.572,92 +1,1,0.827,0.943,0.473,0.65,50 +1,1,0.827,0.906,0.212,0.52,60 +1,1,0.827,0.89,0.297,0.562,52 +0,0,0.99,0.99,0.188,0.589,47 +0,0,0.836,0.834,0.387,0.612,69 +1,1,0.92,0.831,0.591,0.756,76 +1,1,0.893,0.901,0.203,0.548,66 +0,1,0.678,0.61,0.501,0.589,86 +1,1,0.703,0.8,0.294,0.498,93 +1,1,0.864,0.849,0.241,0.553,44 +1,1,0.634,0.552,0.379,0.506,96 +1,1,0.714,0.682,0.193,0.454,91 +1,1,0.677,0.718,0.289,0.483,73 +0,0,0.762,0.706,0.16,0.461,89 +1,1,0.99,0.908,0.793,0.891,54 +0,0,0.47,0.494,0.407,0.438,109 +1,1,0.853,0.877,0.26,0.556,73 +0,1,0.406,0.356,0.443,0.425,143 +1,0,0.543,0.496,0.378,0.46,106 +0,1,0.731,0.754,0.356,0.543,84 +0,0,0.76,0.615,0.523,0.641,74 +0,1,0.471,0.33,0.552,0.511,105 +1,1,0.643,0.571,0.294,0.468,101 +1,0,0.682,0.66,0.285,0.483,79 diff --git a/data/validate.py b/data/validate.py new file mode 100644 index 00000000..154a6858 --- /dev/null +++ b/data/validate.py @@ -0,0 +1,19 @@ +import evaluate +import pandas as pd + +metric = evaluate.load("human_ai_trust") + +df = pd.read_csv("data/human_ai_trust_demo.csv") + +out = metric.compute( + predictions=df["prediction"].tolist(), + references=df["reference"].tolist(), + confidences=df["confidence"].tolist(), + human_trust_scores=df["human_trust"].tolist(), + belief_priors=df["belief_prior"].tolist(), + belief_posteriors=df["belief_posterior"].tolist(), + explanation_complexity=df["explanation_length"].tolist(), +) + +print(out) + diff --git a/examples/human_ai_trust_demo.ipynb b/examples/human_ai_trust_demo.ipynb new file mode 100644 index 00000000..800a752d --- /dev/null +++ b/examples/human_ai_trust_demo.ipynb @@ -0,0 +1,395 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "37c7a89a", + "metadata": {}, + "source": [ + "# Human Trust & Uncertainty Metrics — Demo\n", + "\n", + "This notebook demonstrates the `human_ai_trust` evaluation metric suite for human-centered AI evaluation.\n", + "\n", + "Unlike traditional metrics that focus solely on predictive accuracy, these metrics surface:\n", + "\n", + "- trust calibration \n", + "- belief updating \n", + "- uncertainty sensitivity \n", + "- asymmetric harm from overconfident errors \n", + "- explanation–confidence alignment \n", + "\n", + "The goal is to show how human–AI interaction quality can be evaluated computationally using theory-grounded metrics.\n", + "\n", + "---\n", + "\n", + "**Companion Dataset:** \n", + "`dyra-12/human_ai_trust_demo`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abf76e3e", + "metadata": {}, + "outputs": [], + "source": [ + "import evaluate\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5097debe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
predictionreferenceconfidencehuman_trustbelief_priorbelief_posteriorexplanation_length
0000.4620.3890.4620.462111
1110.7460.7680.6950.72157
2000.7590.7640.5720.66563
3000.9900.9250.3680.67964
4000.7210.9360.3960.55999
\n", + "
" + ], + "text/plain": [ + " prediction reference confidence human_trust belief_prior \\\n", + "0 0 0 0.462 0.389 0.462 \n", + "1 1 1 0.746 0.768 0.695 \n", + "2 0 0 0.759 0.764 0.572 \n", + "3 0 0 0.990 0.925 0.368 \n", + "4 0 0 0.721 0.936 0.396 \n", + "\n", + " belief_posterior explanation_length \n", + "0 0.462 111 \n", + "1 0.721 57 \n", + "2 0.665 63 \n", + "3 0.679 64 \n", + "4 0.559 99 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load demo dataset\n", + "from pathlib import Path\n", + "\n", + "# Prefer the local copy shipped with this repo to avoid auth/network issues.\n", + "url = \"https://huggingface.co/datasets/Dyra1204/human_ai_trust_demo/resolve/main/human_ai_trust_demo.csv\"\n", + "local_path = Path(\"..\") / \"data\" / \"human_ai_trust_demo.csv\"\n", + "\n", + "df = pd.read_csv(local_path if local_path.exists() else url)\n", + "\n", + "df.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "9c166c5c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'expected_trust_error': 0.07086666797598203,\n", + " 'trust_sensitivity_index': 0.8666191489611619,\n", + " 'belief_shift_magnitude': 0.16639166666666666,\n", + " 'overconfidence_penalty': 0.1472083330154419,\n", + " 'overconfidence_penalty_normalized': 0.20490186898320353,\n", + " 'explanation_confidence_alignment': -0.863066801839177}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pathlib import Path\n", + "\n", + "# The metric isn't on the Hub; load it from the local repo checkout.\n", + "try:\n", + " metric = evaluate.load(\"human_ai_trust\")\n", + "except FileNotFoundError:\n", + " repo_root = Path.cwd()\n", + " while not (repo_root / \"metrics\").exists() and repo_root != repo_root.parent:\n", + " repo_root = repo_root.parent\n", + " metric = evaluate.load(str(repo_root / \"metrics\" / \"human_ai_trust\"))\n", + "\n", + "out = metric.compute(\n", + " predictions=df[\"prediction\"].tolist(),\n", + " references=df[\"reference\"].tolist(),\n", + " confidences=df[\"confidence\"].tolist(),\n", + " human_trust_scores=df[\"human_trust\"].tolist(),\n", + " belief_priors=df[\"belief_prior\"].tolist(),\n", + " belief_posteriors=df[\"belief_posterior\"].tolist(),\n", + " explanation_complexity=df[\"explanation_length\"].tolist(),\n", + ")\n", + "\n", + "out\n" + ] + }, + { + "cell_type": "markdown", + "id": "aa5965b7", + "metadata": {}, + "source": [ + "## Metric Outputs\n", + "\n", + "The metric suite produces the following values:\n", + "\n", + "- **Expected Trust Error (ETE):** \n", + " Misalignment between human trust and model confidence.\n", + "\n", + "- **Trust Sensitivity Index (TSI):** \n", + " Correlation between trust and confidence.\n", + "\n", + "- **Belief Shift Magnitude (BSM):** \n", + " Degree of belief updating after AI exposure.\n", + "\n", + "- **Overconfidence Penalty (OCP):** \n", + " Asymmetric harm from confident but wrong predictions.\n", + "\n", + "- **Explanation–Confidence Alignment (ECA):** \n", + " Coupling between explanation complexity and model confidence.\n", + "\n", + "These values surface human-centered failure modes that accuracy alone cannot detect.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0cb8d8da", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "plt.scatter(df[\"confidence\"], df[\"human_trust\"], alpha=0.6)\n", + "plt.xlabel(\"Model Confidence\")\n", + "plt.ylabel(\"Human Trust\")\n", + "plt.title(\"Trust Calibration: Confidence vs Human Trust\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5ad2c538", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "belief_shift = df[\"belief_posterior\"] - df[\"belief_prior\"]\n", + "\n", + "plt.figure()\n", + "plt.hist(belief_shift, bins=20)\n", + "plt.xlabel(\"Belief Shift (Posterior - Prior)\")\n", + "plt.ylabel(\"Count\")\n", + "plt.title(\"Distribution of Belief Shifts After AI Exposure\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c493b9f5", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure()\n", + "plt.scatter(df[\"confidence\"], df[\"explanation_length\"], alpha=0.6)\n", + "plt.xlabel(\"Model Confidence\")\n", + "plt.ylabel(\"Explanation Length\")\n", + "plt.title(\"Explanation–Confidence Alignment\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "id": "3306ecdf", + "metadata": {}, + "source": [ + "## Why Accuracy Alone Is Insufficient\n", + "\n", + "Accuracy treats all errors equally.\n", + "\n", + "However, in human–AI interaction:\n", + "\n", + "- A confident but wrong prediction is more damaging than a hesitant error \n", + "- Users may over-trust highly confident systems \n", + "- Users may ignore uncertainty signals \n", + "- Beliefs may shift even when the model is wrong \n", + "\n", + "The metrics demonstrated here surface:\n", + "\n", + "- misalignment between trust and uncertainty (ETE) \n", + "- insensitivity to uncertainty (low TSI) \n", + "- asymmetric harm from confident errors (OCP) \n", + "- belief influence (BSM) \n", + "- explanation–uncertainty coupling (ECA) \n", + "\n", + "These human-centered dimensions are invisible to accuracy alone.\n" + ] + }, + { + "cell_type": "markdown", + "id": "54e29fad", + "metadata": {}, + "source": [ + "## Cognitive Interpretation\n", + "\n", + "This dataset demonstrates a psychologically plausible interaction pattern:\n", + "\n", + "- Human trust tracks model confidence with noise \n", + "- Beliefs partially update toward AI outputs \n", + "- Explanations are longer when confidence is lower \n", + "\n", + "The resulting metrics show:\n", + "\n", + "- Low ETE → trust is well calibrated \n", + "- High TSI → users are uncertainty-sensitive \n", + "- Moderate BSM → partial belief updating \n", + "- Non-zero OCP → confident errors exist \n", + "- Strong negative ECA → explanations hedge under uncertainty \n", + "\n", + "This illustrates how cognitive and social dimensions of human–AI interaction can be formalized into computational evaluation metrics.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/metrics/human_ai_trust/README.md b/metrics/human_ai_trust/README.md new file mode 100644 index 00000000..506c5fe7 --- /dev/null +++ b/metrics/human_ai_trust/README.md @@ -0,0 +1,133 @@ +# Human Trust & Uncertainty Metrics + +This module provides a human-centered evaluation framework for AI systems that operationalizes: + +- trust calibration +- belief updating +- uncertainty alignment +- asymmetric harm from overconfident errors +- explanation–confidence coupling + +It complements traditional performance metrics by surfacing how users interpret, trust, and act on model outputs under uncertainty. + +**Trust calibration** refers to the alignment between a user's level of trust in an AI system and the system's actual reliability or confidence. Well-calibrated trust means users trust the AI appropriately—more when it's confident and correct, less when it's uncertain or error-prone. + +--- + +## Why These Metrics Exist + +Accuracy alone is insufficient for evaluating AI systems in high-stakes or vulnerable contexts. + +Human decision-makers experience AI failures asymmetrically: +a confident but wrong prediction is far more damaging than a hesitant error. + +Moreover, highly accurate systems can still cause harm if they: + +- induce over-reliance (automation bias) +- induce under-reliance (algorithmic aversion) +- miscommunicate uncertainty +- distort user beliefs + +This metric suite provides theory-grounded, computational signals for evaluating these human-centered failure modes. + +--- + +## Metrics Included + +| Metric | What It Measures | +|--------|------------------| +| Expected Trust Error (ETE) | Misalignment between human trust and model confidence | +| Trust Sensitivity Index (TSI) | Responsiveness of trust to uncertainty signals | +| Belief Shift Magnitude (BSM) | Degree of belief updating after AI exposure | +| Overconfidence Penalty (OCP) | Asymmetric harm from confident but wrong predictions | +| OCP (normalized) | Scale-invariant version of OCP | +| Explanation–Confidence Alignment (ECA) | Coupling between explanation form and model confidence | + +--- + +## Usage + +```python +import evaluate + +metric = evaluate.load("human_ai_trust") + +out = metric.compute( + predictions=[1, 0, 1], + references=[1, 1, 0], + confidences=[0.9, 0.7, 0.8], + human_trust_scores=[0.85, 0.6, 0.75], + belief_priors=[0.3, 0.4, 0.5], + belief_posteriors=[0.6, 0.5, 0.7], + explanation_complexity=[10, 20, 15], +) + +print(out) +``` + +--- + +## Interpretation Guide + +**Low ETE + High TSI** +→ well-calibrated, uncertainty-sensitive users + +**High ETE + High TSI** +→ sensitive but miscalibrated trust + +**Low TSI** +→ users ignore uncertainty signals + +**High OCP** +→ confident errors dominate harm + +**High BSM** +→ strong AI influence on beliefs + +**Strong ECA (±)** +→ explanation style tracks uncertainty + +--- + +## Limitations + +- These are descriptive metrics, not causal estimators +- They do not evaluate explanation correctness or faithfulness +- They do not measure fairness or bias +- They assume confidence and trust are provided on comparable scales +- Synthetic datasets cannot substitute for real human data + +--- + +## Theory Foundations + +These metrics draw on: + +- calibration in probabilistic judgment +- trust in automation +- Bayesian belief updating +- overconfidence bias +- explanation trust mismatch + +They operationalize core human-centered constructs into computational form. + +--- + +## Companion Dataset + +A small reference dataset is available at: + +**dyra-12/human_ai_trust_demo** + +It demonstrates: + +- trust calibration +- belief updating +- uncertainty communication +- explanation–confidence alignment + +--- + +## License + +Apache 2.0 \ No newline at end of file diff --git a/metrics/human_ai_trust/human_ai_trust.json b/metrics/human_ai_trust/human_ai_trust.json new file mode 100644 index 00000000..1b7e25fa --- /dev/null +++ b/metrics/human_ai_trust/human_ai_trust.json @@ -0,0 +1,16 @@ +{ + "description": "Human-centered evaluation metrics for trust calibration, belief updating, and uncertainty alignment in AI systems.", + "citation": "@misc{human_ai_trust2026,\n title={Human Trust & Uncertainty Metrics for AI Evaluation},\n author={Dyra},\n year={2026},\n note={Hugging Face Evaluate metric}\n}", + "homepage": "https://huggingface.co/datasets/Dyra1204/human_ai_trust_demo", + "license": "Apache-2.0", + "inputs_description": "predictions: List[Any], references: List[Any], confidences: List[float], human_trust_scores: List[float], belief_priors: Optional[List[float]], belief_posteriors: Optional[List[float]], explanation_complexity: Optional[List[float]]", + "keywords": [ + "trust", + "uncertainty", + "human-ai-interaction", + "calibration", + "interpretability", + "belief-updating", + "hcai" + ] +} diff --git a/metrics/human_ai_trust/human_ai_trust.py b/metrics/human_ai_trust/human_ai_trust.py new file mode 100644 index 00000000..e0ceb4ac --- /dev/null +++ b/metrics/human_ai_trust/human_ai_trust.py @@ -0,0 +1,197 @@ +from typing import Any, Dict, List, Optional + +import datasets +import evaluate +import numpy as np + + +_DESCRIPTION = """ +Human Trust & Uncertainty Metrics for AI Evaluation. + +This metric suite operationalizes trust calibration, belief updating, +and uncertainty alignment for human–AI interaction evaluation. +It complements traditional performance metrics by surfacing +human-centered signals about trust, belief dynamics, and confidence communication. +""" + +_KWARGS_DESCRIPTION = """ +Args: + predictions (List[Any]): + Model predictions. + references (List[Any]): + Ground truth labels. + confidences (List[float]): + Model confidence values in [0, 1]. + human_trust_scores (List[float]): + Human trust ratings in [0, 1]. + belief_priors (Optional[List[float]]): + User beliefs before seeing AI output. + belief_posteriors (Optional[List[float]]): + User beliefs after seeing AI output. + explanation_complexity (Optional[List[float]]): + Explanation complexity scores (e.g., length, entropy, readability). + +Returns: + Dict[str, float]: + A dictionary containing: + - expected_trust_error + - trust_sensitivity_index + - belief_shift_magnitude (optional) + - overconfidence_penalty + - overconfidence_penalty_normalized + - explanation_confidence_alignment (optional) +""" + + +def _safe_mean(x: np.ndarray) -> float: + if len(x) == 0: + return 0.0 + return float(np.mean(x)) + + +def _safe_corr(x: np.ndarray, y: np.ndarray) -> float: + if len(x) == 0 or len(y) == 0: + return 0.0 + if len(x) < 2 or len(y) < 2: + return 0.0 + if np.std(x) == 0 or np.std(y) == 0: + return 0.0 + corr = float(np.corrcoef(x, y)[0, 1]) + return 0.0 if np.isnan(corr) else corr + + +class HumanAITrust(evaluate.Metric): + def _info(self): + return evaluate.MetricInfo( + description=_DESCRIPTION, + citation="", + inputs_description=_KWARGS_DESCRIPTION, + features=datasets.Features( + { + "predictions": datasets.Value("int32"), + "references": datasets.Value("int32"), + "confidences": datasets.Value("float32"), + "human_trust_scores": datasets.Value("float32"), + } + ), + reference_urls=[], + ) + + def _validate_inputs( + self, + predictions: List[Any], + references: List[Any], + confidences: List[float], + human_trust_scores: List[float], + belief_priors: Optional[List[float]], + belief_posteriors: Optional[List[float]], + explanation_complexity: Optional[List[float]] + ) -> None: + # Length checks + n = len(predictions) + if not ( + len(references) == n and + len(confidences) == n and + len(human_trust_scores) == n + ): + raise ValueError("All required input lists must have equal length.") + + if belief_priors is not None and len(belief_priors) != n: + raise ValueError("belief_priors must have the same length as predictions.") + + if belief_posteriors is not None and len(belief_posteriors) != n: + raise ValueError("belief_posteriors must have the same length as predictions.") + + if explanation_complexity is not None and len(explanation_complexity) != n: + raise ValueError("explanation_complexity must have the same length as predictions.") + + # Range checks + for c in confidences: + if not (0.0 <= c <= 1.0): + raise ValueError("All confidence values must be in [0, 1].") + + for t in human_trust_scores: + if not (0.0 <= t <= 1.0): + raise ValueError("All human trust scores must be in [0, 1].") + + if belief_priors is not None: + for b in belief_priors: + if not (0.0 <= b <= 1.0): + raise ValueError("All belief_priors values must be in [0, 1].") + + if belief_posteriors is not None: + for b in belief_posteriors: + if not (0.0 <= b <= 1.0): + raise ValueError("All belief_posteriors values must be in [0, 1].") + + def _compute( + self, + predictions: List[Any], + references: List[Any], + confidences: List[float], + human_trust_scores: List[float], + belief_priors: Optional[List[float]] = None, + belief_posteriors: Optional[List[float]] = None, + explanation_complexity: Optional[List[float]] = None, + ) -> Dict[str, Optional[float]]: + + # Input validation + self._validate_inputs( + predictions, + references, + confidences, + human_trust_scores, + belief_priors, + belief_posteriors, + explanation_complexity, + ) + + # Convert to numpy + confidences = np.array(confidences, dtype=float) + trust = np.array(human_trust_scores, dtype=float) + + # === Expected Trust Error (ETE) === + ete = _safe_mean(np.abs(trust - confidences)) + + # === Trust Sensitivity Index (TSI) === + tsi = _safe_corr(trust, confidences) + + # === Belief Shift Magnitude (BSM) === + if belief_priors is not None and belief_posteriors is not None: + belief_priors_arr = np.array(belief_priors, dtype=float) + belief_posteriors_arr = np.array(belief_posteriors, dtype=float) + bsm = _safe_mean(np.abs(belief_posteriors_arr - belief_priors_arr)) + else: + bsm = None + + # === Explanation–Confidence Alignment (ECA) === + if explanation_complexity is not None: + expl = np.array(explanation_complexity, dtype=float) + eca = _safe_corr(confidences, expl) + else: + eca = None + + # === Overconfidence Penalty (OCP) === + errors = np.array( + [pred != ref for pred, ref in zip(predictions, references)], + dtype=float + ) + ocp = _safe_mean(confidences * errors) + + # === Normalized Overconfidence Penalty === + mean_conf = _safe_mean(confidences) + if mean_conf > 0: + ocp_norm = ocp / mean_conf + else: + ocp_norm = 0.0 + + results = { + "expected_trust_error": float(ete), + "trust_sensitivity_index": float(tsi), + "belief_shift_magnitude": None if bsm is None else float(bsm), + "overconfidence_penalty": float(ocp), + "overconfidence_penalty_normalized": float(ocp_norm), + "explanation_confidence_alignment": None if eca is None else float(eca), + } + + return results \ No newline at end of file diff --git a/metrics/human_ai_trust/test_human_ai_trust.py b/metrics/human_ai_trust/test_human_ai_trust.py new file mode 100644 index 00000000..0addf799 --- /dev/null +++ b/metrics/human_ai_trust/test_human_ai_trust.py @@ -0,0 +1,100 @@ +import evaluate +import numpy as np + + +def test_basic_metrics(): + metric = evaluate.load("human_ai_trust") + + preds = [1, 0, 1, 1] + refs = [1, 1, 0, 1] + confs = [0.9, 0.7, 0.8, 0.6] + trust = [0.85, 0.6, 0.75, 0.65] + + out = metric.compute( + predictions=preds, + references=refs, + confidences=confs, + human_trust_scores=trust, + ) + + # ETE + ete_expected = np.mean(np.abs(np.array(trust) - np.array(confs))) + assert abs(out["expected_trust_error"] - ete_expected) < 1e-6 + + # TSI + tsi_expected = np.corrcoef(trust, confs)[0, 1] + assert abs(out["trust_sensitivity_index"] - tsi_expected) < 1e-6 + + # OCP + errors = np.array([p != r for p, r in zip(preds, refs)], dtype=float) + ocp_expected = np.mean(np.array(confs) * errors) + assert abs(out["overconfidence_penalty"] - ocp_expected) < 1e-6 + + # OCP_norm + ocp_norm_expected = ocp_expected / np.mean(confs) + assert abs(out["overconfidence_penalty_normalized"] - ocp_norm_expected) < 1e-6 + + +def test_zero_variance_confidence(): + metric = evaluate.load("human_ai_trust") + + preds = [1, 0, 1] + refs = [1, 1, 0] + confs = [0.5, 0.5, 0.5] + trust = [0.4, 0.6, 0.5] + + out = metric.compute( + predictions=preds, + references=refs, + confidences=confs, + human_trust_scores=trust, + ) + + assert out["trust_sensitivity_index"] == 0.0 + + +def test_bsm_and_eca(): + metric = evaluate.load("human_ai_trust") + + preds = [1, 0, 1] + refs = [1, 1, 0] + confs = [0.9, 0.7, 0.8] + trust = [0.85, 0.6, 0.75] + priors = [0.3, 0.4, 0.5] + posts = [0.6, 0.5, 0.7] + expl = [10, 20, 15] + + out = metric.compute( + predictions=preds, + references=refs, + confidences=confs, + human_trust_scores=trust, + belief_priors=priors, + belief_posteriors=posts, + explanation_complexity=expl, + ) + + bsm_expected = np.mean(np.abs(np.array(posts) - np.array(priors))) + assert abs(out["belief_shift_magnitude"] - bsm_expected) < 1e-6 + + eca_expected = np.corrcoef(confs, expl)[0, 1] + assert abs(out["explanation_confidence_alignment"] - eca_expected) < 1e-6 + + +def test_missing_optional_inputs(): + metric = evaluate.load("human_ai_trust") + + preds = [1, 0] + refs = [1, 1] + confs = [0.8, 0.6] + trust = [0.75, 0.65] + + out = metric.compute( + predictions=preds, + references=refs, + confidences=confs, + human_trust_scores=trust, + ) + + assert out["belief_shift_magnitude"] is None + assert out["explanation_confidence_alignment"] is None \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 00000000..ba847d33 --- /dev/null +++ b/test.py @@ -0,0 +1,12 @@ +import evaluate + +metric = evaluate.load("human_ai_trust") + +out = metric.compute( + predictions=[1, 0, 1], + references=[1, 1, 0], + confidences=[0.9, 0.7, 0.8], + human_trust_scores=[0.85, 0.6, 0.75], +) + +print(out)