{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# ruff: noqa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Tabular Data\n", "\n", "`xskillscore` can be used on tabular data such as that stored in a `pandas.DataFrame`.\n", "\n", "It can be used most effectively when evaluating predictions over different fields." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import xskillscore as xs\n", "from sklearn.datasets import fetch_california_housing\n", "from sklearn.metrics import mean_squared_error\n", "\n", "np.random.seed(seed=42)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## California house prices dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A small example is to take a dataset and evaluate the model according to a field (column).\n", "\n", "Load the California house prices dataset:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | MedInc | \n", "HouseAge | \n", "AveRooms | \n", "AveBedrms | \n", "Population | \n", "AveOccup | \n", "Latitude | \n", "Longitude | \n", "y | \n", "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "8.3252 | \n", "41.0 | \n", "7.0 | \n", "1.023810 | \n", "322.0 | \n", "2.555556 | \n", "37.88 | \n", "-122.23 | \n", "4.526 | \n", "
| 1 | \n", "8.3014 | \n", "21.0 | \n", "6.0 | \n", "0.971880 | \n", "2401.0 | \n", "2.109842 | \n", "37.86 | \n", "-122.22 | \n", "3.585 | \n", "
| 2 | \n", "7.2574 | \n", "52.0 | \n", "8.0 | \n", "1.073446 | \n", "496.0 | \n", "2.802260 | \n", "37.85 | \n", "-122.24 | \n", "3.521 | \n", "
| 3 | \n", "5.6431 | \n", "52.0 | \n", "6.0 | \n", "1.073059 | \n", "558.0 | \n", "2.547945 | \n", "37.85 | \n", "-122.25 | \n", "3.413 | \n", "
| 4 | \n", "3.8462 | \n", "52.0 | \n", "6.0 | \n", "1.081081 | \n", "565.0 | \n", "2.181467 | \n", "37.85 | \n", "-122.25 | \n", "3.422 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 20635 | \n", "1.5603 | \n", "25.0 | \n", "5.0 | \n", "1.133333 | \n", "845.0 | \n", "2.560606 | \n", "39.48 | \n", "-121.09 | \n", "0.781 | \n", "
| 20636 | \n", "2.5568 | \n", "18.0 | \n", "6.0 | \n", "1.315789 | \n", "356.0 | \n", "3.122807 | \n", "39.49 | \n", "-121.21 | \n", "0.771 | \n", "
| 20637 | \n", "1.7000 | \n", "17.0 | \n", "5.0 | \n", "1.120092 | \n", "1007.0 | \n", "2.325635 | \n", "39.43 | \n", "-121.22 | \n", "0.923 | \n", "
| 20638 | \n", "1.8672 | \n", "18.0 | \n", "5.0 | \n", "1.171920 | \n", "741.0 | \n", "2.123209 | \n", "39.43 | \n", "-121.32 | \n", "0.847 | \n", "
| 20639 | \n", "2.3886 | \n", "16.0 | \n", "5.0 | \n", "1.162264 | \n", "1387.0 | \n", "2.616981 | \n", "39.37 | \n", "-121.24 | \n", "0.894 | \n", "
20640 rows × 9 columns
\n", "| \n", " | \n", " | y | \n", "yhat | \n", "
|---|---|---|---|
| index | \n", "AveRooms | \n", "\n", " | \n", " |
| 0 | \n", "7.0 | \n", "4.526 | \n", "3.390337 | \n", "
| 1 | \n", "6.0 | \n", "3.585 | \n", "6.816622 | \n", "
| 2 | \n", "8.0 | \n", "3.521 | \n", "5.154701 | \n", "
| 3 | \n", "6.0 | \n", "3.413 | \n", "4.086443 | \n", "
| 4 | \n", "6.0 | \n", "3.422 | \n", "1.067792 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 20635 | \n", "5.0 | \n", "0.781 | \n", "0.611083 | \n", "
| 20636 | \n", "6.0 | \n", "0.771 | \n", "1.497737 | \n", "
| 20637 | \n", "5.0 | \n", "0.923 | \n", "0.648200 | \n", "
| 20638 | \n", "5.0 | \n", "0.847 | \n", "1.470100 | \n", "
| 20639 | \n", "5.0 | \n", "0.894 | \n", "0.166662 | \n", "
20640 rows × 2 columns
\n", "<xarray.Dataset> Size: 16MB\n",
"Dimensions: (index: 20640, AveRooms: 47)\n",
"Coordinates:\n",
" * index (index) int64 165kB 0 1 2 3 4 5 ... 20635 20636 20637 20638 20639\n",
" * AveRooms (AveRooms) float64 376B 1.0 2.0 3.0 4.0 ... 60.0 62.0 133.0 142.0\n",
"Data variables:\n",
" y (index, AveRooms) float64 8MB nan nan nan nan ... nan nan nan nan\n",
" yhat (index, AveRooms) float64 8MB nan nan nan nan ... nan nan nan nan<xarray.DataArray (AveRooms: 47)> Size: 376B\n",
"array([1.78946554e+00, 1.82700352e+00, 1.49245536e+00, 1.35284843e+00,\n",
" 1.38475581e+00, 1.78975720e+00, 3.26525395e+00, 4.80747797e+00,\n",
" 5.16524336e+00, 3.13496890e+00, 1.23401728e+00, 1.12562885e+00,\n",
" 5.54888374e-01, 2.61824323e+00, 8.84529997e-01, 1.17865387e+00,\n",
" 8.96786588e-01, 6.93484341e-01, 8.44837355e-01, 9.50615751e-01,\n",
" 2.55912220e+00, 4.16548298e-01, 3.07284580e-01, 8.31537279e-01,\n",
" 4.06466713e+00, 8.79983025e-01, 1.09491040e-02, 1.12379707e+00,\n",
" 1.50188148e+00, 1.56069394e+00, 2.73330025e-02, 2.68438951e-01,\n",
" 4.63967683e-01, 1.47081770e+00, 3.28568563e+00, 4.86835859e-01,\n",
" 5.48064237e-04, 1.40563208e+00, 9.04093610e-01, 3.26459003e-01,\n",
" 1.48460982e-01, 3.39427104e+00, 4.19379397e+00, 1.74130396e-01,\n",
" 1.04411235e+00, 1.23495233e+00, 2.64087781e-01])\n",
"Coordinates:\n",
" * AveRooms (AveRooms) float64 376B 1.0 2.0 3.0 4.0 ... 60.0 62.0 133.0 142.0| \n", " | DATE | \n", "STORE | \n", "SKU | \n", "y | \n", "yhat | \n", "
|---|---|---|---|---|---|
| 0 | \n", "2020-01-01 | \n", "0 | \n", "0 | \n", "6 | \n", "3.874272 | \n", "
| 1 | \n", "2020-01-01 | \n", "0 | \n", "1 | \n", "9 | \n", "13.551266 | \n", "
| 2 | \n", "2020-01-01 | \n", "0 | \n", "2 | \n", "8 | \n", "3.979884 | \n", "
| 3 | \n", "2020-01-01 | \n", "0 | \n", "3 | \n", "3 | \n", "3.222543 | \n", "
| 4 | \n", "2020-01-01 | \n", "0 | \n", "4 | \n", "6 | \n", "1.647346 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 99995 | \n", "2020-01-10 | \n", "99 | \n", "95 | \n", "1 | \n", "1.000000 | \n", "
| 99996 | \n", "2020-01-10 | \n", "99 | \n", "96 | \n", "4 | \n", "2.770135 | \n", "
| 99997 | \n", "2020-01-10 | \n", "99 | \n", "97 | \n", "7 | \n", "5.820397 | \n", "
| 99998 | \n", "2020-01-10 | \n", "99 | \n", "98 | \n", "2 | \n", "1.000000 | \n", "
| 99999 | \n", "2020-01-10 | \n", "99 | \n", "99 | \n", "2 | \n", "1.000000 | \n", "
100000 rows × 5 columns
\n", "